/* See LICENSE file for copyright and license details. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "util.h"

#define FILE_EAW   "data/EastAsianWidth.txt"
#define FILE_EMOJI "data/emoji-data.txt"
#define FILE_LINE  "data/LineBreak.txt"

static const struct property_spec line_break_property[] = {
	{
		.enumname = "AL",
		.file = FILE_LINE,
		.ucdname = "AL",
	},
	/*
	 * Both extended pictographic and cn are large classes,
	 * but we are only interested in their intersection for LB30b,
	 * so we have the following two temporary classes. At first
	 * the extpict-class is filled, then the cn-class, which leads
	 * to conflicts (that we handle by putting them in the "proper"
	 * class BOTH_CN_EXTPICT). We make use of the fact that there
	 * is no intersection between AL and Cn.
	 *
	 * Any consecutive conflicts are permitted to overwrite
	 * TMP_EXTENDED_PICTOGRAPHIC and TMP_CN, because we don't need
	 * them, and in the final postprocessing we "reset" all
	 * remaining matches (that then didn't fit any of the other
	 * classes) to the generic class AL.
	 */
	{
		.enumname = "TMP_CN",
		.file = FILE_LINE,
		.ucdname = "Cn",
	},
	{
		.enumname = "TMP_EXTENDED_PICTOGRAPHIC",
		.file = FILE_EMOJI,
		.ucdname = "Extended_Pictographic",
	},
	/* end of special block */
	{
		.enumname = "B2",
		.file = FILE_LINE,
		.ucdname = "B2",
	},
	{
		.enumname = "BA",
		.file = FILE_LINE,
		.ucdname = "BA",
	},
	{
		.enumname = "BB",
		.file = FILE_LINE,
		.ucdname = "BB",
	},
	{
		.enumname = "BK",
		.file = FILE_LINE,
		.ucdname = "BK",
	},
	{
		.enumname = "BOTH_CN_EXTPICT",
		.file = NULL,
		.ucdname = NULL,
	},
	{
		.enumname = "CB",
		.file = FILE_LINE,
		.ucdname = "CB",
	},
	{
		.enumname = "CL",
		.file = FILE_LINE,
		.ucdname = "CL",
	},
	{
		.enumname = "CM",
		.file = FILE_LINE,
		.ucdname = "CM",
	},
	{
		.enumname = "CP_WITHOUT_EAW_HWF",
		.file = FILE_LINE,
		.ucdname = "CP",
	},
	{
		.enumname = "CP_WITH_EAW_HWF",
		.file = NULL,
		.ucdname = NULL,
	},
	{
		.enumname = "CR",
		.file = FILE_LINE,
		.ucdname = "CR",
	},
	{
		.enumname = "EB",
		.file = FILE_LINE,
		.ucdname = "EB",
	},
	{
		.enumname = "EM",
		.file = FILE_LINE,
		.ucdname = "EM",
	},
	{
		.enumname = "EX",
		.file = FILE_LINE,
		.ucdname = "EX",
	},
	{
		.enumname = "GL",
		.file = FILE_LINE,
		.ucdname = "GL",
	},
	{
		.enumname = "H2",
		.file = FILE_LINE,
		.ucdname = "H2",
	},
	{
		.enumname = "H3",
		.file = FILE_LINE,
		.ucdname = "H3",
	},
	{
		.enumname = "HL",
		.file = FILE_LINE,
		.ucdname = "HL",
	},
	{
		.enumname = "HY",
		.file = FILE_LINE,
		.ucdname = "HY",
	},
	{
		.enumname = "ID",
		.file = FILE_LINE,
		.ucdname = "ID",
	},
	{
		.enumname = "IN",
		.file = FILE_LINE,
		.ucdname = "IN",
	},
	{
		.enumname = "IS",
		.file = FILE_LINE,
		.ucdname = "IS",
	},
	{
		.enumname = "JL",
		.file = FILE_LINE,
		.ucdname = "JL",
	},
	{
		.enumname = "JT",
		.file = FILE_LINE,
		.ucdname = "JT",
	},
	{
		.enumname = "JV",
		.file = FILE_LINE,
		.ucdname = "JV",
	},
	{
		.enumname = "LF",
		.file = FILE_LINE,
		.ucdname = "LF",
	},
	{
		.enumname = "NL",
		.file = FILE_LINE,
		.ucdname = "NL",
	},
	{
		.enumname = "NS",
		.file = FILE_LINE,
		.ucdname = "NS",
	},
	{
		.enumname = "NU",
		.file = FILE_LINE,
		.ucdname = "NU",
	},
	{
		.enumname = "OP_WITHOUT_EAW_HWF",
		.file = FILE_LINE,
		.ucdname = "OP",
	},
	{
		.enumname = "OP_WITH_EAW_HWF",
		.file = NULL,
		.ucdname = NULL,
	},
	{
		.enumname = "PO",
		.file = FILE_LINE,
		.ucdname = "PO",
	},
	{
		.enumname = "PR",
		.file = FILE_LINE,
		.ucdname = "PR",
	},
	{
		.enumname = "QU",
		.file = FILE_LINE,
		.ucdname = "QU",
	},
	{
		.enumname = "RI",
		.file = FILE_LINE,
		.ucdname = "RI",
	},
	{
		.enumname = "SP",
		.file = FILE_LINE,
		.ucdname = "SP",
	},
	{
		.enumname = "SY",
		.file = FILE_LINE,
		.ucdname = "SY",
	},
	{
		.enumname = "WJ",
		.file = FILE_LINE,
		.ucdname = "WJ",
	},
	{
		.enumname = "ZW",
		.file = FILE_LINE,
		.ucdname = "ZW",
	},
	{
		.enumname = "ZWJ",
		.file = FILE_LINE,
		.ucdname = "ZWJ",
	},
	{
		.enumname = "TMP_AI",
		.file = FILE_LINE,
		.ucdname = "AI",
	},
	{
		.enumname = "TMP_CJ",
		.file = FILE_LINE,
		.ucdname = "CJ",
	},
	{
		.enumname = "TMP_XX",
		.file = NULL,
		.ucdname = NULL,
	},
	{
		.enumname = "TMP_MN",
		.file = FILE_LINE,
		.ucdname = "Mn",
	},
	{
		.enumname = "TMP_MC",
		.file = FILE_LINE,
		.ucdname = "Mc",
	},
	{
		.enumname = "TMP_SA_WITHOUT_MN_OR_MC",
		.file = FILE_LINE,
		.ucdname = "SA",
	},
	{
		.enumname = "TMP_SA_WITH_MN_OR_MC",
		.file = FILE_LINE,
		.ucdname = "SA",
	},
	{
		.enumname = "TMP_SG",
		.file = FILE_LINE,
		.ucdname = "SG",
	},
	{
		.enumname = "TMP_EAW_H",
		.file = FILE_EAW,
		.ucdname = "H",
	},
	{
		.enumname = "TMP_EAW_W",
		.file = FILE_EAW,
		.ucdname = "W",
	},
	{
		.enumname = "TMP_EAW_F",
		.file = FILE_EAW,
		.ucdname = "F",
	},
};

static uint_least8_t
handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
{
	uint_least8_t result = prop2;
	char *target = NULL;

	(void)cp;

	if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") ||
	     !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") ||
	     !strcmp(line_break_property[prop1].enumname, "TMP_EAW_F")) ||
	    (!strcmp(line_break_property[prop2].enumname, "TMP_EAW_H") ||
	     !strcmp(line_break_property[prop2].enumname, "TMP_EAW_W") ||
	     !strcmp(line_break_property[prop2].enumname, "TMP_EAW_F"))) {
		if (!strcmp(line_break_property[prop1].enumname,
		            "CP_WITHOUT_EAW_HWF") ||
		    !strcmp(line_break_property[prop2].enumname,
		            "CP_WITHOUT_EAW_HWF")) {
			target = "CP_WITH_EAW_HWF";
		} else if (!strcmp(line_break_property[prop1].enumname,
		                   "OP_WITHOUT_EAW_HWF") ||
		           !strcmp(line_break_property[prop2].enumname,
		                   "OP_WITHOUT_EAW_HWF")) {
			target = "OP_WITH_EAW_HWF";
		} else {
			/* ignore EAW for the rest */
			if ((!strcmp(line_break_property[prop1].enumname,
			             "TMP_EAW_H") ||
			     !strcmp(line_break_property[prop1].enumname,
			             "TMP_EAW_W") ||
			     !strcmp(line_break_property[prop1].enumname,
			             "TMP_EAW_F"))) {
				result = prop2;
			} else {
				result = prop1;
			}
		}
	} else if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") ||
	            !strcmp(line_break_property[prop1].enumname, "TMP_MC")) ||
	           (!strcmp(line_break_property[prop2].enumname, "TMP_MN") ||
	            !strcmp(line_break_property[prop2].enumname, "TMP_MC"))) {
		if (!strcmp(line_break_property[prop1].enumname,
		            "SA_WITHOUT_MN_OR_MC") ||
		    !strcmp(line_break_property[prop2].enumname,
		            "SA_WITHOUT_MN_OR_MC")) {
			target = "SA_WITH_MN_OR_MC";
		} else {
			/* ignore Mn and Mc for the rest */
			if ((!strcmp(line_break_property[prop1].enumname,
			             "TMP_MN") ||
			     !strcmp(line_break_property[prop1].enumname,
			             "TMP_MC"))) {
				result = prop2;
			} else {
				result = prop1;
			}
		}
	} else if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") ||
	           !strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
		if (!strcmp(line_break_property[prop1].enumname,
		            "TMP_EXTENDED_PICTOGRAPHIC") ||
		    !strcmp(line_break_property[prop2].enumname,
		            "TMP_EXTENDED_PICTOGRAPHIC")) {
			target = "BOTH_CN_EXTPICT";
		} else {
			/* ignore Cn for all the other properties */
			if (!strcmp(line_break_property[prop1].enumname,
			            "TMP_CN")) {
				result = prop2;
			} else {
				result = prop1;
			}
		}
	} else if (!strcmp(line_break_property[prop1].enumname,
	                   "TMP_EXTENDED_PICTOGRAPHIC") ||
	           !strcmp(line_break_property[prop2].enumname,
	                   "TMP_EXTENDED_PICTOGRAPHIC")) {
		if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") ||
		    !strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
			target = "BOTH_CN_EXTPICT";
		} else {
			/* ignore Extended_Pictographic for all the other
			 * properties */
			if (!strcmp(line_break_property[prop1].enumname,
			            "TMP_EXTENDED_PICTOGRAPHIC")) {
				result = prop2;
			} else {
				result = prop1;
			}
		}
	} else {
		fprintf(stderr,
		        "handle_conflict: Cannot handle conflict %s <- %s.\n",
		        line_break_property[prop1].enumname,
		        line_break_property[prop2].enumname);
		exit(1);
	}

	if (target) {
		for (result = 0; result < LEN(line_break_property); result++) {
			if (!strcmp(line_break_property[result].enumname,
			            target)) {
				break;
			}
		}
		if (result == LEN(line_break_property)) {
			fprintf(stderr, "handle_conflict: Internal error.\n");
			exit(1);
		}
	}

	return result;
}

static void
post_process(struct properties *prop)
{
	const char *target;
	uint_least8_t result;
	size_t i;

	/* post-mapping according to the line breaking algorithm */
	for (i = 0; i < UINT32_C(0x110000); i++) {
		/* LB1 */
		if (!strcmp(line_break_property[prop[i].property].enumname,
		            "TMP_AI") ||
		    !strcmp(line_break_property[prop[i].property].enumname,
		            "TMP_SG") ||
		    !strcmp(line_break_property[prop[i].property].enumname,
		            "TMP_XX")) {
			/* map AI, SG and XX to AL */
			target = "AL";
		} else if (!strcmp(line_break_property[prop[i].property]
		                           .enumname,
		                   "TMP_SA_WITH_MN_OR_MC")) {
			/* map SA (with General_Category Mn or Mc) to CM */
			target = "CM";
		} else if (!strcmp(line_break_property[prop[i].property]
		                           .enumname,
		                   "TMP_SA_WITHOUT_MN_OR_MC")) {
			/* map SA (without General_Category Mn or Mc) to AL */
			target = "AL";
		} else if (!strcmp(line_break_property[prop[i].property]
		                           .enumname,
		                   "TMP_CJ")) {
			/* map CJ to NS */
			target = "NS";
		} else if (
			!strcmp(line_break_property[prop[i].property].enumname,
		                "TMP_CN") ||
			!strcmp(line_break_property[prop[i].property].enumname,
		                "TMP_EXTENDED_PICTOGRAPHIC") ||
			!strcmp(line_break_property[prop[i].property].enumname,
		                "TMP_MN") ||
			!strcmp(line_break_property[prop[i].property].enumname,
		                "TMP_MC") ||
			!strcmp(line_break_property[prop[i].property].enumname,
		                "TMP_EAW_H") ||
			!strcmp(line_break_property[prop[i].property].enumname,
		                "TMP_EAW_W") ||
			!strcmp(line_break_property[prop[i].property].enumname,
		                "TMP_EAW_F")) {
			/* map all the temporary classes "residue" to AL */
			target = "AL";
		} else {
			target = NULL;
		}

		if (target) {
			for (result = 0; result < LEN(line_break_property);
			     result++) {
				if (!strcmp(line_break_property[result]
				                    .enumname,
				            target)) {
					break;
				}
			}
			if (result == LEN(line_break_property)) {
				fprintf(stderr,
				        "handle_conflict: Internal error.\n");
				exit(1);
			}

			prop[i].property = result;
		}
	}
}

int
main(int argc, char *argv[])
{
	(void)argc;

	properties_generate_break_property(
		line_break_property, LEN(line_break_property), NULL,
		handle_conflict, post_process, "line_break", argv[0]);

	return 0;
}
