src/util.c - external/github.com/FRIGN/libgrapheme - Git at Google

 /* See LICENSE file for copyright and license details. */
 #include <limits.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>

 #include "../gen/types.h"
 #include "../grapheme.h"
 #include "util.h"

 void
 herodotus_reader_init(HERODOTUS_READER *r, enum herodotus_type type,
                       const void *src, size_t srclen)
 {
 	size_t i;

 	r->type = type;
 	r->src = src;
 	r->srclen = srclen;
 	r->off = 0;
 	r->terminated_by_null = false;

 	for (i = 0; i < LEN(r->soft_limit); i++) {
 		r->soft_limit[i] = SIZE_MAX;
 	}
 }

 void
 herodotus_reader_copy(const HERODOTUS_READER *src, HERODOTUS_READER *dest)
 {
 	size_t i;

 	/*
 	 * we copy such that we have a "fresh" start and build
 	 * on the fact that src->soft_limit[i] for any i and src->srclen
 	 * are always larger or equal to src->off
 	 */
 	dest->type = src->type;
 	if (src->type == HERODOTUS_TYPE_CODEPOINT) {
 		dest->src = ((const uint_least32_t *)(src->src)) + src->off;
 	} else { /* src->type == HERODOTUS_TYPE_UTF8 */
 		dest->src = ((const char *)(src->src)) + src->off;
 	}
 	if (src->srclen == SIZE_MAX) {
 		dest->srclen = SIZE_MAX;
 	} else {
 		dest->srclen = src->srclen - src->off;
 	}
 	dest->off = 0;
 	dest->terminated_by_null = src->terminated_by_null;

 	for (i = 0; i < LEN(src->soft_limit); i++) {
 		if (src->soft_limit[i] == SIZE_MAX) {
 			dest->soft_limit[i] = src->soft_limit[i];
 		} else {
 			dest->soft_limit[i] = src->soft_limit[i] - src->off;
 		}
 	}
 }

 void
 herodotus_reader_push_advance_limit(HERODOTUS_READER *r, size_t count)
 {
 	size_t i;

 	for (i = LEN(r->soft_limit) - 1; i >= 1; i--) {
 		r->soft_limit[i] = r->soft_limit[i - 1];
 	}
 	r->soft_limit[0] = r->off + count;
 }

 void
 herodotus_reader_pop_limit(HERODOTUS_READER *r)
 {
 	size_t i;

 	for (i = 0; i < LEN(r->soft_limit) - 1; i++) {
 		r->soft_limit[i] = r->soft_limit[i + 1];
 	}
 	r->soft_limit[LEN(r->soft_limit) - 1] = SIZE_MAX;
 }

 size_t
 herodotus_reader_next_word_break(const HERODOTUS_READER *r)
 {
 	if (r->type == HERODOTUS_TYPE_CODEPOINT) {
 		return grapheme_next_word_break(
 			(const uint_least32_t *)(r->src) + r->off,
 			MIN(r->srclen, r->soft_limit[0]) - r->off);
 	} else { /* r->type == HERODOTUS_TYPE_UTF8 */
 		return grapheme_next_word_break_utf8(
 			(const char *)(r->src) + r->off,
 			MIN(r->srclen, r->soft_limit[0]) - r->off);
 	}
 }

 size_t
 herodotus_reader_next_codepoint_break(const HERODOTUS_READER *r)
 {
 	if (r->type == HERODOTUS_TYPE_CODEPOINT) {
 		return (r->off < MIN(r->srclen, r->soft_limit[0])) ? 1 : 0;
 	} else { /* r->type == HERODOTUS_TYPE_UTF8 */
 		return grapheme_decode_utf8(
 			(const char *)(r->src) + r->off,
 			MIN(r->srclen, r->soft_limit[0]) - r->off, NULL);
 	}
 }

 size_t
 herodotus_reader_number_read(const HERODOTUS_READER *r)
 {
 	return r->off;
 }

 enum herodotus_status
 herodotus_read_codepoint(HERODOTUS_READER *r, bool advance, uint_least32_t *cp)
 {
 	size_t ret;

 	if (r->terminated_by_null || r->off >= r->srclen || r->src == NULL) {
 		*cp = GRAPHEME_INVALID_CODEPOINT;
 		return HERODOTUS_STATUS_END_OF_BUFFER;
 	}

 	if (r->off >= r->soft_limit[0]) {
 		*cp = GRAPHEME_INVALID_CODEPOINT;
 		return HERODOTUS_STATUS_SOFT_LIMIT_REACHED;
 	}

 	if (r->type == HERODOTUS_TYPE_CODEPOINT) {
 		*cp = ((const uint_least32_t *)(r->src))[r->off];

 		if (advance) {
 			r->off++;
 		}
 	} else { /* r->type == HERODOTUS_TYPE_UTF8 */
 		ret = grapheme_decode_utf8((const char *)r->src + r->off,
 		                           MIN(r->srclen, r->soft_limit[0]) -
 		                           r->off, cp);

 		if (unlikely(r->srclen == SIZE_MAX && *cp == 0)) {
 			/*
 			 * We encountered a NUL-byte. Don't increment
 			 * offset and return as if the buffer had ended
 			 * here all along
 			 */
 			r->terminated_by_null = true;
 			return HERODOTUS_STATUS_END_OF_BUFFER;
 		}

 		if (r->off + ret > MIN(r->srclen, r->soft_limit[0])) {
 			/*
 			 * we want more than we have; instead of
 			 * returning garbage we terminate here.
 			 */
 			return HERODOTUS_STATUS_END_OF_BUFFER;
 		}

 		/*
 		 * Increase offset which we now know won't surpass
 		 * the limits, unless we got told otherwise
 		 */
 		if (advance) {
 			r->off += ret;
 		}
 	}

 	return HERODOTUS_STATUS_SUCCESS;
 }

 void
 herodotus_writer_init(HERODOTUS_WRITER *w, enum herodotus_type type,
                       void *dest, size_t destlen)
 {
 	w->type = type;
 	w->dest = dest;
 	w->destlen = destlen;
 	w->off = 0;
 	w->first_unwritable_offset = SIZE_MAX;
 }

 void
 herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
 {
 	if (w->dest == NULL) {
 		return;
 	}

 	if (w->off < w->destlen) {
 		/* We still have space in the buffer. Simply use it */
 		if (w->type == HERODOTUS_TYPE_CODEPOINT) {
 			((uint_least32_t *)(w->dest))[w->off] = 0;
 		} else { /* w->type == HERODOTUS_TYPE_UTF8 */
 			((char *)(w->dest))[w->off] = '\0';
 		}
 	} else if (w->first_unwritable_offset < w->destlen) {
 		/*
 		 * There is no more space in the buffer. However,
 		 * we have noted down the first offset we couldn't
 		 * use to write into the buffer and it's smaller than
 		 * destlen. Thus we bailed writing into the
 		 * destination when a multibyte-codepoint couldn't be
 		 * written. So the last "real" byte might be at
 		 * destlen-4, destlen-3, destlen-2 or destlen-1
 		 * (the last case meaning truncation).
 		 */
 		if (w->type == HERODOTUS_TYPE_CODEPOINT) {
 			((uint_least32_t *)(w->dest))
 				[w->first_unwritable_offset] = 0;
 		} else { /* w->type == HERODOTUS_TYPE_UTF8 */
 			((char *)(w->dest))[w->first_unwritable_offset] = '\0';
 		}
 	} else {
 		/*
 		 * In this case, there is no more space in the buffer and
 		 * the last unwritable offset is larger than
 		 * or equal to the destination buffer length. This means
 		 * that we are forced to simply write into the last
 		 * byte.
 		 */
 		if (w->type == HERODOTUS_TYPE_CODEPOINT) {
 			((uint_least32_t *)(w->dest))
 				[w->destlen - 1] = 0;
 		} else { /* w->type == HERODOTUS_TYPE_UTF8 */
 			((char *)(w->dest))[w->destlen - 1] = '\0';
 		}
 	}

 	/* w->off is not incremented in any case */
 }

 size_t
 herodotus_writer_number_written(const HERODOTUS_WRITER *w)
 {
 	return w->off;
 }

 void
 herodotus_write_codepoint(HERODOTUS_WRITER *w, uint_least32_t cp)
 {
 	size_t ret;

 	/*
 	 * This function will always faithfully say how many codepoints
 	 * were written, even if the buffer ends. This is used to enable
 	 * truncation detection.
 	 */
 	if (w->type == HERODOTUS_TYPE_CODEPOINT) {
 		if (w->dest != NULL && w->off < w->destlen) {
 			((uint_least32_t *)(w->dest))[w->off] = cp;
 		}

 		w->off += 1;
 	} else { /* w->type == HERODOTUS_TYPE_UTF8 */
 		/*
 		 * First determine how many bytes we need to encode the
 		 * codepoint
 		 */
 		ret = grapheme_encode_utf8(cp, NULL, 0);

 		if (w->dest != NULL && w->off + ret < w->destlen) {
 			/* we still have enough room in the buffer */
 			grapheme_encode_utf8(cp, (char *)(w->dest) +
 			                     w->off, w->destlen - w->off);
 		} else if (w->first_unwritable_offset == SIZE_MAX) {
 			/*
 			 * the first unwritable offset has not been
 			 * noted down, so this is the first time we can't
 			 * write (completely) to an offset
 			 */
 			w->first_unwritable_offset = w->off;
 		}

 		w->off += ret;
 	}
 }

 void
 proper_init(const HERODOTUS_READER *r, void *state, uint_least8_t no_prop,
             uint_least8_t (*get_break_prop)(uint_least32_t),
             bool (*is_skippable_prop)(uint_least8_t),
             void (*skip_shift_callback)(uint_least8_t, void *),
             struct proper *p)
 {
 	uint_least8_t prop;
 	uint_least32_t cp;
 	size_t i;

 	/* set internal variables */
 	p->state = state;
 	p->no_prop = no_prop;
 	p->get_break_prop = get_break_prop;
 	p->is_skippable_prop = is_skippable_prop;
 	p->skip_shift_callback = skip_shift_callback;

 	/*
 	 * Initialize mid-reader, which is basically just there
 	 * to reflect the current position of the viewing-line
 	 */
 	herodotus_reader_copy(r, &(p->mid_reader));

 	/*
 	 * In the initialization, we simply (try to) fill in next_prop.
 	 * If we cannot read in more (due to the buffer ending), we
 	 * fill in the prop as invalid
 	 */

 	/*
 	 * initialize the previous properties to have no property
 	 * (given we are at the start of the buffer)
 	 */
 	p->raw.prev_prop[1] = p->raw.prev_prop[0] = p->no_prop;
 	p->skip.prev_prop[1] = p->skip.prev_prop[0] = p->no_prop;

 	/*
 	 * initialize the next properties
 	 */

 	/* initialize the raw reader */
 	herodotus_reader_copy(r, &(p->raw_reader));

 	/* fill in the two next raw properties (after no-initialization) */
 	p->raw.next_prop[0] = p->raw.next_prop[1] = p->no_prop;
 	for (i = 0; i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, &cp) ==
 	     HERODOTUS_STATUS_SUCCESS; ) {
 		p->raw.next_prop[i++] = p->get_break_prop(cp);
 	}

 	/* initialize the skip reader */
 	herodotus_reader_copy(r, &(p->skip_reader));

 	/* fill in the two next skip properties (after no-initialization) */
 	p->skip.next_prop[0] = p->skip.next_prop[1] = p->no_prop;
 	for (i = 0; i < 2 && herodotus_read_codepoint(&(p->skip_reader), true, &cp) ==
 	     HERODOTUS_STATUS_SUCCESS; ) {
 		prop = p->get_break_prop(cp);
 		if (!p->is_skippable_prop(prop)) {
 			p->skip.next_prop[i++] = prop;
 		}
 	}
 }

 int
 proper_advance(struct proper *p)
 {
 	uint_least8_t prop;
 	uint_least32_t cp;

 	/* read in next "raw" property */
 	if (herodotus_read_codepoint(&(p->raw_reader), true, &cp) ==
 	    HERODOTUS_STATUS_SUCCESS) {
 		prop = p->get_break_prop(cp);
 	} else {
 		prop = p->no_prop;
 	}

 	/*
 	 * do a shift-in, unless we find that the property that is to
 	 * be moved past the "raw-viewing-line" (this property is stored
 	 * in p->raw.next_prop[0]) is a no_prop, indicating that
 	 * we are at the end of the buffer.
 	 */
 	if (p->raw.next_prop[0] == p->no_prop) {
 		return 1;
 	}

 	/* shift in the properties */
 	p->raw.prev_prop[1] = p->raw.prev_prop[0];
 	p->raw.prev_prop[0] = p->raw.next_prop[0];
 	p->raw.next_prop[0] = p->raw.next_prop[1];
 	p->raw.next_prop[1] = prop;

 	/* advance the middle reader viewing-line */
 	(void)herodotus_read_codepoint(&(p->mid_reader), true, &cp);

 	/* check skippability-property */
 	if (!p->is_skippable_prop(p->raw.prev_prop[0])) {
 		/*
 		 * the property that has moved past the "raw-viewing-line"
 		 * (this property is now (after the raw-shift) stored in
 		 * p->raw.prev_prop[0] and guaranteed not to be a no-prop,
 		 * guaranteeing that we won't shift a no-prop past the
 		 * "viewing-line" in the skip-properties) is not a skippable
 		 * property, thus we need to shift the skip property as well.
 		 */
 		p->skip.prev_prop[1] = p->skip.prev_prop[0];
 		p->skip.prev_prop[0] = p->skip.next_prop[0];
 		p->skip.next_prop[0] = p->skip.next_prop[1];

 		/*
 		 * call the skip-shift-callback on the property that
 		 * passed the skip-viewing-line (this property is now
 		 * stored in p->skip.prev_prop[0]).
 		 */
 		p->skip_shift_callback(p->skip.prev_prop[0], p->state);

 		/* determine the next shift property */
 		p->skip.next_prop[1] = p->no_prop;
 		while (herodotus_read_codepoint(&(p->skip_reader), true, &cp) ==
 		       HERODOTUS_STATUS_SUCCESS) {
 			prop = p->get_break_prop(cp);
 			if (!p->is_skippable_prop(prop)) {
 				p->skip.next_prop[1] = prop;
 				break;
 			}
 		}
 	}

 	return 0;
 }

 inline size_t
 get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
 {
 	if (offset < len) {
 		*cp = ((const uint_least32_t *)str)[offset];
 		return 1;
 	} else {
 		*cp = GRAPHEME_INVALID_CODEPOINT;
 		return 0;
 	}
 }

 inline size_t
 get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t *cp)
 {
 	size_t ret;

 	if (offset < len) {
 		ret = grapheme_decode_utf8((const char *)str + offset,
 		                           len - offset, cp);

 		if (unlikely(len == SIZE_MAX && cp == 0)) {
 			return 0;
 		} else {
 			return ret;
 		}
 	} else {
 		*cp = GRAPHEME_INVALID_CODEPOINT;
 		return 0;
 	}
 }

 inline size_t
 set_codepoint(uint_least32_t cp, void *str, size_t len, size_t offset)
 {
 	if (str == NULL || len == 0) {
 		return 1;
 	}

 	if (offset < len) {
 		((uint_least32_t *)str)[offset] = cp;
 		return 1;
 	} else {
 		return 0;
 	}
 }

 inline size_t
 set_codepoint_utf8(uint_least32_t cp, void *str, size_t len, size_t offset)
 {
 	if (str == NULL || len == 0) {
 		return grapheme_encode_utf8(cp, NULL, 0);
 	}

 	if (offset < len) {
 		return grapheme_encode_utf8(cp, (char *)str + offset,
 		                            len - offset);
 	} else {
 		return grapheme_encode_utf8(cp, NULL, 0);
 	}
 }
	/* See LICENSE file for copyright and license details. */
	#include <limits.h>
	#include <stdbool.h>
	#include <stddef.h>
	#include <stdint.h>

	#include "../gen/types.h"
	#include "../grapheme.h"
	#include "util.h"

	void
	herodotus_reader_init(HERODOTUS_READER *r, enum herodotus_type type,
	const void *src, size_t srclen)
	{
	size_t i;

	r->type = type;
	r->src = src;
	r->srclen = srclen;
	r->off = 0;
	r->terminated_by_null = false;

	for (i = 0; i < LEN(r->soft_limit); i++) {
	r->soft_limit[i] = SIZE_MAX;
	}
	}

	void
	herodotus_reader_copy(const HERODOTUS_READER src, HERODOTUS_READER dest)
	{
	size_t i;

	/*
	* we copy such that we have a "fresh" start and build
	* on the fact that src->soft_limit[i] for any i and src->srclen
	* are always larger or equal to src->off
	*/
	dest->type = src->type;
	if (src->type == HERODOTUS_TYPE_CODEPOINT) {
	dest->src = ((const uint_least32_t *)(src->src)) + src->off;
	} else { /* src->type == HERODOTUS_TYPE_UTF8 */
	dest->src = ((const char *)(src->src)) + src->off;
	}
	if (src->srclen == SIZE_MAX) {
	dest->srclen = SIZE_MAX;
	} else {
	dest->srclen = src->srclen - src->off;
	}
	dest->off = 0;
	dest->terminated_by_null = src->terminated_by_null;

	for (i = 0; i < LEN(src->soft_limit); i++) {
	if (src->soft_limit[i] == SIZE_MAX) {
	dest->soft_limit[i] = src->soft_limit[i];
	} else {
	dest->soft_limit[i] = src->soft_limit[i] - src->off;
	}
	}
	}

	void
	herodotus_reader_push_advance_limit(HERODOTUS_READER *r, size_t count)
	{
	size_t i;

	for (i = LEN(r->soft_limit) - 1; i >= 1; i--) {
	r->soft_limit[i] = r->soft_limit[i - 1];
	}
	r->soft_limit[0] = r->off + count;
	}

	void
	herodotus_reader_pop_limit(HERODOTUS_READER *r)
	{
	size_t i;

	for (i = 0; i < LEN(r->soft_limit) - 1; i++) {
	r->soft_limit[i] = r->soft_limit[i + 1];
	}
	r->soft_limit[LEN(r->soft_limit) - 1] = SIZE_MAX;
	}

	size_t
	herodotus_reader_next_word_break(const HERODOTUS_READER *r)
	{
	if (r->type == HERODOTUS_TYPE_CODEPOINT) {
	return grapheme_next_word_break(
	(const uint_least32_t *)(r->src) + r->off,
	MIN(r->srclen, r->soft_limit[0]) - r->off);
	} else { /* r->type == HERODOTUS_TYPE_UTF8 */
	return grapheme_next_word_break_utf8(
	(const char *)(r->src) + r->off,
	MIN(r->srclen, r->soft_limit[0]) - r->off);
	}
	}

	size_t
	herodotus_reader_next_codepoint_break(const HERODOTUS_READER *r)
	{
	if (r->type == HERODOTUS_TYPE_CODEPOINT) {
	return (r->off < MIN(r->srclen, r->soft_limit[0])) ? 1 : 0;
	} else { /* r->type == HERODOTUS_TYPE_UTF8 */
	return grapheme_decode_utf8(
	(const char *)(r->src) + r->off,
	MIN(r->srclen, r->soft_limit[0]) - r->off, NULL);
	}
	}

	size_t
	herodotus_reader_number_read(const HERODOTUS_READER *r)
	{
	return r->off;
	}

	enum herodotus_status
	herodotus_read_codepoint(HERODOTUS_READER r, bool advance, uint_least32_t cp)
	{
	size_t ret;

	if (r->terminated_by_null \|\| r->off >= r->srclen \|\| r->src == NULL) {
	*cp = GRAPHEME_INVALID_CODEPOINT;
	return HERODOTUS_STATUS_END_OF_BUFFER;
	}

	if (r->off >= r->soft_limit[0]) {
	*cp = GRAPHEME_INVALID_CODEPOINT;
	return HERODOTUS_STATUS_SOFT_LIMIT_REACHED;
	}

	if (r->type == HERODOTUS_TYPE_CODEPOINT) {
	cp = ((const uint_least32_t )(r->src))[r->off];

	if (advance) {
	r->off++;
	}
	} else { /* r->type == HERODOTUS_TYPE_UTF8 */
	ret = grapheme_decode_utf8((const char *)r->src + r->off,
	MIN(r->srclen, r->soft_limit[0]) -
	r->off, cp);

	if (unlikely(r->srclen == SIZE_MAX && *cp == 0)) {
	/*
	* We encountered a NUL-byte. Don't increment
	* offset and return as if the buffer had ended
	* here all along
	*/
	r->terminated_by_null = true;
	return HERODOTUS_STATUS_END_OF_BUFFER;
	}

	if (r->off + ret > MIN(r->srclen, r->soft_limit[0])) {
	/*
	* we want more than we have; instead of
	* returning garbage we terminate here.
	*/
	return HERODOTUS_STATUS_END_OF_BUFFER;
	}

	/*
	* Increase offset which we now know won't surpass
	* the limits, unless we got told otherwise
	*/
	if (advance) {
	r->off += ret;
	}
	}

	return HERODOTUS_STATUS_SUCCESS;
	}

	void
	herodotus_writer_init(HERODOTUS_WRITER *w, enum herodotus_type type,
	void *dest, size_t destlen)
	{
	w->type = type;
	w->dest = dest;
	w->destlen = destlen;
	w->off = 0;
	w->first_unwritable_offset = SIZE_MAX;
	}

	void
	herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
	{
	if (w->dest == NULL) {
	return;
	}

	if (w->off < w->destlen) {
	/* We still have space in the buffer. Simply use it */
	if (w->type == HERODOTUS_TYPE_CODEPOINT) {
	((uint_least32_t *)(w->dest))[w->off] = 0;
	} else { /* w->type == HERODOTUS_TYPE_UTF8 */
	((char *)(w->dest))[w->off] = '\0';
	}
	} else if (w->first_unwritable_offset < w->destlen) {
	/*
	* There is no more space in the buffer. However,
	* we have noted down the first offset we couldn't
	* use to write into the buffer and it's smaller than
	* destlen. Thus we bailed writing into the
	* destination when a multibyte-codepoint couldn't be
	* written. So the last "real" byte might be at
	* destlen-4, destlen-3, destlen-2 or destlen-1
	* (the last case meaning truncation).
	*/
	if (w->type == HERODOTUS_TYPE_CODEPOINT) {
	((uint_least32_t *)(w->dest))
	[w->first_unwritable_offset] = 0;
	} else { /* w->type == HERODOTUS_TYPE_UTF8 */
	((char *)(w->dest))[w->first_unwritable_offset] = '\0';
	}
	} else {
	/*
	* In this case, there is no more space in the buffer and
	* the last unwritable offset is larger than
	* or equal to the destination buffer length. This means
	* that we are forced to simply write into the last
	* byte.
	*/
	if (w->type == HERODOTUS_TYPE_CODEPOINT) {
	((uint_least32_t *)(w->dest))
	[w->destlen - 1] = 0;
	} else { /* w->type == HERODOTUS_TYPE_UTF8 */
	((char *)(w->dest))[w->destlen - 1] = '\0';
	}
	}

	/* w->off is not incremented in any case */
	}

	size_t
	herodotus_writer_number_written(const HERODOTUS_WRITER *w)
	{
	return w->off;
	}

	void
	herodotus_write_codepoint(HERODOTUS_WRITER *w, uint_least32_t cp)
	{
	size_t ret;

	/*
	* This function will always faithfully say how many codepoints
	* were written, even if the buffer ends. This is used to enable
	* truncation detection.
	*/
	if (w->type == HERODOTUS_TYPE_CODEPOINT) {
	if (w->dest != NULL && w->off < w->destlen) {
	((uint_least32_t *)(w->dest))[w->off] = cp;
	}

	w->off += 1;
	} else { /* w->type == HERODOTUS_TYPE_UTF8 */
	/*
	* First determine how many bytes we need to encode the
	* codepoint
	*/
	ret = grapheme_encode_utf8(cp, NULL, 0);

	if (w->dest != NULL && w->off + ret < w->destlen) {
	/* we still have enough room in the buffer */
	grapheme_encode_utf8(cp, (char *)(w->dest) +
	w->off, w->destlen - w->off);
	} else if (w->first_unwritable_offset == SIZE_MAX) {
	/*
	* the first unwritable offset has not been
	* noted down, so this is the first time we can't
	* write (completely) to an offset
	*/
	w->first_unwritable_offset = w->off;
	}

	w->off += ret;
	}
	}

	void
	proper_init(const HERODOTUS_READER r, void state, uint_least8_t no_prop,
	uint_least8_t (*get_break_prop)(uint_least32_t),
	bool (*is_skippable_prop)(uint_least8_t),
	void (skip_shift_callback)(uint_least8_t, void ),
	struct proper *p)
	{
	uint_least8_t prop;
	uint_least32_t cp;
	size_t i;

	/* set internal variables */
	p->state = state;
	p->no_prop = no_prop;
	p->get_break_prop = get_break_prop;
	p->is_skippable_prop = is_skippable_prop;
	p->skip_shift_callback = skip_shift_callback;

	/*
	* Initialize mid-reader, which is basically just there
	* to reflect the current position of the viewing-line
	*/
	herodotus_reader_copy(r, &(p->mid_reader));

	/*
	* In the initialization, we simply (try to) fill in next_prop.
	* If we cannot read in more (due to the buffer ending), we
	* fill in the prop as invalid
	*/

	/*
	* initialize the previous properties to have no property
	* (given we are at the start of the buffer)
	*/
	p->raw.prev_prop[1] = p->raw.prev_prop[0] = p->no_prop;
	p->skip.prev_prop[1] = p->skip.prev_prop[0] = p->no_prop;

	/*
	* initialize the next properties
	*/

	/* initialize the raw reader */
	herodotus_reader_copy(r, &(p->raw_reader));

	/* fill in the two next raw properties (after no-initialization) */
	p->raw.next_prop[0] = p->raw.next_prop[1] = p->no_prop;
	for (i = 0; i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, &cp) ==
	HERODOTUS_STATUS_SUCCESS; ) {
	p->raw.next_prop[i++] = p->get_break_prop(cp);
	}

	/* initialize the skip reader */
	herodotus_reader_copy(r, &(p->skip_reader));

	/* fill in the two next skip properties (after no-initialization) */
	p->skip.next_prop[0] = p->skip.next_prop[1] = p->no_prop;
	for (i = 0; i < 2 && herodotus_read_codepoint(&(p->skip_reader), true, &cp) ==
	HERODOTUS_STATUS_SUCCESS; ) {
	prop = p->get_break_prop(cp);
	if (!p->is_skippable_prop(prop)) {
	p->skip.next_prop[i++] = prop;
	}
	}
	}

	int
	proper_advance(struct proper *p)
	{
	uint_least8_t prop;
	uint_least32_t cp;

	/* read in next "raw" property */
	if (herodotus_read_codepoint(&(p->raw_reader), true, &cp) ==
	HERODOTUS_STATUS_SUCCESS) {
	prop = p->get_break_prop(cp);
	} else {
	prop = p->no_prop;
	}

	/*
	* do a shift-in, unless we find that the property that is to
	* be moved past the "raw-viewing-line" (this property is stored
	* in p->raw.next_prop[0]) is a no_prop, indicating that
	* we are at the end of the buffer.
	*/
	if (p->raw.next_prop[0] == p->no_prop) {
	return 1;
	}

	/* shift in the properties */
	p->raw.prev_prop[1] = p->raw.prev_prop[0];
	p->raw.prev_prop[0] = p->raw.next_prop[0];
	p->raw.next_prop[0] = p->raw.next_prop[1];
	p->raw.next_prop[1] = prop;

	/* advance the middle reader viewing-line */
	(void)herodotus_read_codepoint(&(p->mid_reader), true, &cp);

	/* check skippability-property */
	if (!p->is_skippable_prop(p->raw.prev_prop[0])) {
	/*
	* the property that has moved past the "raw-viewing-line"
	* (this property is now (after the raw-shift) stored in
	* p->raw.prev_prop[0] and guaranteed not to be a no-prop,
	* guaranteeing that we won't shift a no-prop past the
	* "viewing-line" in the skip-properties) is not a skippable
	* property, thus we need to shift the skip property as well.
	*/
	p->skip.prev_prop[1] = p->skip.prev_prop[0];
	p->skip.prev_prop[0] = p->skip.next_prop[0];
	p->skip.next_prop[0] = p->skip.next_prop[1];

	/*
	* call the skip-shift-callback on the property that
	* passed the skip-viewing-line (this property is now
	* stored in p->skip.prev_prop[0]).
	*/
	p->skip_shift_callback(p->skip.prev_prop[0], p->state);

	/* determine the next shift property */
	p->skip.next_prop[1] = p->no_prop;
	while (herodotus_read_codepoint(&(p->skip_reader), true, &cp) ==
	HERODOTUS_STATUS_SUCCESS) {
	prop = p->get_break_prop(cp);
	if (!p->is_skippable_prop(prop)) {
	p->skip.next_prop[1] = prop;
	break;
	}
	}
	}

	return 0;
	}

	inline size_t
	get_codepoint(const void str, size_t len, size_t offset, uint_least32_t cp)
	{
	if (offset < len) {
	cp = ((const uint_least32_t )str)[offset];
	return 1;
	} else {
	*cp = GRAPHEME_INVALID_CODEPOINT;
	return 0;
	}
	}

	inline size_t
	get_codepoint_utf8(const void str, size_t len, size_t offset, uint_least32_t cp)
	{
	size_t ret;

	if (offset < len) {
	ret = grapheme_decode_utf8((const char *)str + offset,
	len - offset, cp);

	if (unlikely(len == SIZE_MAX && cp == 0)) {
	return 0;
	} else {
	return ret;
	}
	} else {
	*cp = GRAPHEME_INVALID_CODEPOINT;
	return 0;
	}
	}

	inline size_t
	set_codepoint(uint_least32_t cp, void *str, size_t len, size_t offset)
	{
	if (str == NULL \|\| len == 0) {
	return 1;
	}

	if (offset < len) {
	((uint_least32_t *)str)[offset] = cp;
	return 1;
	} else {
	return 0;
	}
	}

	inline size_t
	set_codepoint_utf8(uint_least32_t cp, void *str, size_t len, size_t offset)
	{
	if (str == NULL \|\| len == 0) {
	return grapheme_encode_utf8(cp, NULL, 0);
	}

	if (offset < len) {
	return grapheme_encode_utf8(cp, (char *)str + offset,
	len - offset);
	} else {
	return grapheme_encode_utf8(cp, NULL, 0);
	}
	}