| /******************************************************************** | 
 |  * COPYRIGHT: | 
 |  * Copyright (C) 2002-2003 IBM, Inc.   All Rights Reserved. | 
 |  * | 
 |  ********************************************************************/ | 
 |  | 
 | /**  | 
 |  * This program demos string collation | 
 |  */ | 
 |  | 
 | const char gHelpString[] = | 
 |     "usage: strsrch [options*] -source source_string -pattern pattern_string\n" | 
 |     "-help            Display this message.\n" | 
 |     "-locale name     ICU locale to use.  Default is en_US\n" | 
 |     "-rules rule      Collation rules file (overrides locale)\n" | 
 |     "-french          French accent ordering\n" | 
 |     "-norm            Normalizing mode on\n" | 
 |     "-shifted         Shifted mode\n" | 
 |     "-lower           Lower case first\n" | 
 |     "-upper           Upper case first\n" | 
 |     "-case            Enable separate case level\n" | 
 |     "-level n         Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n" | 
 | 	"-source string   Source string\n" | 
 | 	"-pattern string  Pattern string to look for in source\n" | 
 | 	"-overlap         Enable searching to be done on overlapping patterns\n" | 
 | 	"-canonical       Enable searching to be done matching canonical equivalent patterns" | 
 |     "Example strsrch -rules \\u0026b\\u003ca -source a\\u0020b\\u0020bc -pattern b\n" | 
 | 	"The format \\uXXXX is supported for the rules and comparison strings\n" | 
 | 	; | 
 |  | 
 | #include <stdio.h> | 
 | #include <string.h> | 
 | #include <stdlib.h> | 
 |  | 
 | #include <unicode/utypes.h> | 
 | #include <unicode/ucol.h> | 
 | #include <unicode/usearch.h> | 
 | #include <unicode/ustring.h> | 
 |  | 
 | /**  | 
 |  * Command line option variables | 
 |  *    These global variables are set according to the options specified | 
 |  *    on the command line by the user. | 
 |  */ | 
 | char * opt_locale      = "en_US"; | 
 | char * opt_rules       = 0; | 
 | UBool  opt_help        = FALSE; | 
 | UBool  opt_norm        = FALSE; | 
 | UBool  opt_french      = FALSE; | 
 | UBool  opt_shifted     = FALSE; | 
 | UBool  opt_lower       = FALSE; | 
 | UBool  opt_upper       = FALSE; | 
 | UBool  opt_case        = FALSE; | 
 | UBool  opt_overlap     = FALSE; | 
 | UBool  opt_canonical   = FALSE; | 
 | int    opt_level       = 0; | 
 | char * opt_source      = "International Components for Unicode"; | 
 | char * opt_pattern     = "Unicode"; | 
 | UCollator * collator   = 0; | 
 | UStringSearch * search = 0; | 
 | UChar rules[100]; | 
 | UChar source[100]; | 
 | UChar pattern[100]; | 
 |  | 
 | /**  | 
 |  * Definitions for the command line options | 
 |  */ | 
 | struct OptSpec { | 
 |     const char *name; | 
 |     enum {FLAG, NUM, STRING} type; | 
 |     void *pVar; | 
 | }; | 
 |  | 
 | OptSpec opts[] = { | 
 |     {"-locale",      OptSpec::STRING, &opt_locale}, | 
 |     {"-rules",       OptSpec::STRING, &opt_rules}, | 
 | 	{"-source",      OptSpec::STRING, &opt_source}, | 
 |     {"-pattern",     OptSpec::STRING, &opt_pattern}, | 
 |     {"-norm",        OptSpec::FLAG,   &opt_norm}, | 
 |     {"-french",      OptSpec::FLAG,   &opt_french}, | 
 |     {"-shifted",     OptSpec::FLAG,   &opt_shifted}, | 
 |     {"-lower",       OptSpec::FLAG,   &opt_lower}, | 
 |     {"-upper",       OptSpec::FLAG,   &opt_upper}, | 
 |     {"-case",        OptSpec::FLAG,   &opt_case}, | 
 |     {"-level",       OptSpec::NUM,    &opt_level}, | 
 | 	{"-overlap",     OptSpec::FLAG,   &opt_overlap}, | 
 | 	{"-canonical",   OptSpec::FLAG,   &opt_canonical}, | 
 |     {"-help",        OptSpec::FLAG,   &opt_help}, | 
 |     {"-?",           OptSpec::FLAG,   &opt_help}, | 
 |     {0, OptSpec::FLAG, 0} | 
 | }; | 
 |  | 
 | /**   | 
 |  * processOptions()  Function to read the command line options. | 
 |  */ | 
 | UBool processOptions(int argc, const char **argv, OptSpec opts[]) | 
 | { | 
 |     for (int argNum = 1; argNum < argc; argNum ++) { | 
 |         const char *pArgName = argv[argNum]; | 
 |         for (OptSpec *pOpt = opts;  pOpt->name != 0; pOpt ++) { | 
 |             if (strcmp(pOpt->name, pArgName) == 0) { | 
 |                 switch (pOpt->type) { | 
 |                 case OptSpec::FLAG: | 
 |                     *(UBool *)(pOpt->pVar) = TRUE; | 
 |                     break; | 
 |                 case OptSpec::STRING: | 
 |                     argNum ++; | 
 |                     if (argNum >= argc) { | 
 |                         fprintf(stderr, "value expected for \"%s\" option.\n",  | 
 | 							    pOpt->name); | 
 |                         return FALSE; | 
 |                     } | 
 |                     *(const char **)(pOpt->pVar) = argv[argNum]; | 
 |                     break; | 
 |                 case OptSpec::NUM: | 
 |                     argNum ++; | 
 |                     if (argNum >= argc) { | 
 |                         fprintf(stderr, "value expected for \"%s\" option.\n",  | 
 | 							    pOpt->name); | 
 |                         return FALSE; | 
 |                     } | 
 |                     char *endp; | 
 |                     int i = strtol(argv[argNum], &endp, 0); | 
 |                     if (endp == argv[argNum]) { | 
 |                         fprintf(stderr,  | 
 | 							    "integer value expected for \"%s\" option.\n",  | 
 | 								pOpt->name); | 
 |                         return FALSE; | 
 |                     } | 
 |                     *(int *)(pOpt->pVar) = i; | 
 |                 } | 
 |                 break; | 
 |             } | 
 |         } | 
 |         if (pOpt->name == 0) | 
 |         { | 
 |             fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName); | 
 |             return FALSE; | 
 |         } | 
 |     } | 
 | 	return TRUE; | 
 | } | 
 |  | 
 | /** | 
 |  * Creates a collator | 
 |  */ | 
 | UBool processCollator() | 
 | { | 
 | 	// Set up an ICU collator | 
 |     UErrorCode status = U_ZERO_ERROR; | 
 |  | 
 |     if (opt_rules != 0) { | 
 | 		u_unescape(opt_rules, rules, 100); | 
 |         collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY,  | 
 | 			                  NULL, &status); | 
 |     } | 
 |     else { | 
 |         collator = ucol_open(opt_locale, &status); | 
 |     } | 
 | 	if (U_FAILURE(status)) { | 
 |         fprintf(stderr, "Collator creation failed.: %d\n", status); | 
 |         return FALSE; | 
 |     } | 
 |     if (status == U_USING_DEFAULT_WARNING) { | 
 |         fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n",  | 
 | 			    opt_locale); | 
 |     } | 
 |     if (status == U_USING_FALLBACK_WARNING) { | 
 |         fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n",  | 
 | 			    opt_locale); | 
 |     } | 
 |     if (opt_norm) { | 
 |         ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | 
 |     } | 
 |     if (opt_french) { | 
 |         ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status); | 
 |     } | 
 |     if (opt_lower) { | 
 |         ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST,  | 
 | 			              &status); | 
 |     } | 
 |     if (opt_upper) { | 
 |         ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST,  | 
 | 			              &status); | 
 |     } | 
 |     if (opt_case) { | 
 |         ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status); | 
 |     } | 
 |     if (opt_shifted) { | 
 |         ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,  | 
 | 			              &status); | 
 |     } | 
 |     if (opt_level != 0) { | 
 |         switch (opt_level) { | 
 |         case 1: | 
 |             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status); | 
 |             break; | 
 |         case 2: | 
 |             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY,  | 
 | 				              &status); | 
 |             break; | 
 |         case 3: | 
 |             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status); | 
 |             break; | 
 |         case 4: | 
 |             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY,  | 
 | 				              &status); | 
 |             break; | 
 |         case 5: | 
 |             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL,  | 
 | 				              &status); | 
 |             break; | 
 |         default: | 
 |             fprintf(stderr, "-level param must be between 1 and 5\n"); | 
 |             return FALSE; | 
 |         } | 
 |     } | 
 |     if (U_FAILURE(status)) { | 
 |         fprintf(stderr, "Collator attribute setting failed.: %d\n", status); | 
 |         return FALSE; | 
 |     } | 
 | 	return TRUE; | 
 | } | 
 |  | 
 | /** | 
 |  * Creates a string search | 
 |  */ | 
 | UBool processStringSearch() | 
 | { | 
 | 	u_unescape(opt_source, source, 100); | 
 | 	u_unescape(opt_pattern, pattern, 100); | 
 | 	UErrorCode status = U_ZERO_ERROR; | 
 | 	search = usearch_openFromCollator(pattern, -1, source, -1, collator, NULL,  | 
 | 		                              &status); | 
 | 	if (U_FAILURE(status)) { | 
 | 		return FALSE; | 
 | 	} | 
 | 	if (opt_overlap == TRUE) { | 
 | 		usearch_setAttribute(search, USEARCH_OVERLAP, USEARCH_ON, &status); | 
 | 	} | 
 | 	if (opt_canonical == TRUE) { | 
 | 		usearch_setAttribute(search, USEARCH_CANONICAL_MATCH, USEARCH_ON,  | 
 | 			                 &status); | 
 | 	} | 
 | 	if (U_FAILURE(status)) { | 
 | 		fprintf(stderr, "Error setting search attributes\n"); | 
 | 		return FALSE; | 
 | 	} | 
 | 	return TRUE; | 
 | } | 
 |  | 
 | UBool findPattern() | 
 | { | 
 | 	UErrorCode status = U_ZERO_ERROR; | 
 | 	int32_t offset = usearch_next(search, &status); | 
 | 	if (offset == USEARCH_DONE) { | 
 | 		fprintf(stdout, "Pattern not found in source\n"); | 
 | 	} | 
 | 	while (offset != USEARCH_DONE) { | 
 | 		fprintf(stdout, "Pattern found at offset %d size %d\n", offset, | 
 | 				usearch_getMatchedLength(search)); | 
 | 		offset = usearch_next(search, &status); | 
 | 	} | 
 | 	if (U_FAILURE(status)) { | 
 | 		fprintf(stderr, "Error in searching for pattern %d\n", status); | 
 | 		return FALSE; | 
 | 	} | 
 | 	fprintf(stdout, "End of search\n"); | 
 | 	return TRUE; | 
 | } | 
 |  | 
 | /**  | 
 |  * Main   --  process command line, read in and pre-process the test file, | 
 |  *            call other functions to do the actual tests. | 
 |  */ | 
 | int main(int argc, const char** argv)  | 
 | { | 
 |     if (processOptions(argc, argv, opts) != TRUE || opt_help) { | 
 |         printf(gHelpString); | 
 |         return -1; | 
 |     } | 
 |  | 
 |     if (processCollator() != TRUE) { | 
 | 		fprintf(stderr, "Error creating collator\n"); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	if (processStringSearch() != TRUE) { | 
 | 		fprintf(stderr, "Error creating string search\n"); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	fprintf(stdout, "Finding pattern %s in source %s\n", opt_pattern,  | 
 | 		    opt_source); | 
 |  | 
 | 	findPattern(); | 
 | 	ucol_close(collator); | 
 | 	usearch_close(search); | 
 | 	return 0; | 
 | } |