|  | /******************************************************************** | 
|  | * COPYRIGHT: | 
|  | * Copyright (C) 2002-2003 IBM, Inc.   All Rights Reserved. | 
|  | * | 
|  | ********************************************************************/ | 
|  |  | 
|  | /** | 
|  | * This program demos string collation | 
|  | */ | 
|  |  | 
|  | const char gHelpString[] = | 
|  | "usage: strsrch [options*] -source source_string -pattern pattern_string\n" | 
|  | "-help            Display this message.\n" | 
|  | "-locale name     ICU locale to use.  Default is en_US\n" | 
|  | "-rules rule      Collation rules file (overrides locale)\n" | 
|  | "-french          French accent ordering\n" | 
|  | "-norm            Normalizing mode on\n" | 
|  | "-shifted         Shifted mode\n" | 
|  | "-lower           Lower case first\n" | 
|  | "-upper           Upper case first\n" | 
|  | "-case            Enable separate case level\n" | 
|  | "-level n         Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n" | 
|  | "-source string   Source string\n" | 
|  | "-pattern string  Pattern string to look for in source\n" | 
|  | "-overlap         Enable searching to be done on overlapping patterns\n" | 
|  | "-canonical       Enable searching to be done matching canonical equivalent patterns" | 
|  | "Example strsrch -rules \\u0026b\\u003ca -source a\\u0020b\\u0020bc -pattern b\n" | 
|  | "The format \\uXXXX is supported for the rules and comparison strings\n" | 
|  | ; | 
|  |  | 
|  | #include <stdio.h> | 
|  | #include <string.h> | 
|  | #include <stdlib.h> | 
|  |  | 
|  | #include <unicode/utypes.h> | 
|  | #include <unicode/ucol.h> | 
|  | #include <unicode/usearch.h> | 
|  | #include <unicode/ustring.h> | 
|  |  | 
|  | /** | 
|  | * Command line option variables | 
|  | *    These global variables are set according to the options specified | 
|  | *    on the command line by the user. | 
|  | */ | 
|  | char * opt_locale      = "en_US"; | 
|  | char * opt_rules       = 0; | 
|  | UBool  opt_help        = FALSE; | 
|  | UBool  opt_norm        = FALSE; | 
|  | UBool  opt_french      = FALSE; | 
|  | UBool  opt_shifted     = FALSE; | 
|  | UBool  opt_lower       = FALSE; | 
|  | UBool  opt_upper       = FALSE; | 
|  | UBool  opt_case        = FALSE; | 
|  | UBool  opt_overlap     = FALSE; | 
|  | UBool  opt_canonical   = FALSE; | 
|  | int    opt_level       = 0; | 
|  | char * opt_source      = "International Components for Unicode"; | 
|  | char * opt_pattern     = "Unicode"; | 
|  | UCollator * collator   = 0; | 
|  | UStringSearch * search = 0; | 
|  | UChar rules[100]; | 
|  | UChar source[100]; | 
|  | UChar pattern[100]; | 
|  |  | 
|  | /** | 
|  | * Definitions for the command line options | 
|  | */ | 
|  | struct OptSpec { | 
|  | const char *name; | 
|  | enum {FLAG, NUM, STRING} type; | 
|  | void *pVar; | 
|  | }; | 
|  |  | 
|  | OptSpec opts[] = { | 
|  | {"-locale",      OptSpec::STRING, &opt_locale}, | 
|  | {"-rules",       OptSpec::STRING, &opt_rules}, | 
|  | {"-source",      OptSpec::STRING, &opt_source}, | 
|  | {"-pattern",     OptSpec::STRING, &opt_pattern}, | 
|  | {"-norm",        OptSpec::FLAG,   &opt_norm}, | 
|  | {"-french",      OptSpec::FLAG,   &opt_french}, | 
|  | {"-shifted",     OptSpec::FLAG,   &opt_shifted}, | 
|  | {"-lower",       OptSpec::FLAG,   &opt_lower}, | 
|  | {"-upper",       OptSpec::FLAG,   &opt_upper}, | 
|  | {"-case",        OptSpec::FLAG,   &opt_case}, | 
|  | {"-level",       OptSpec::NUM,    &opt_level}, | 
|  | {"-overlap",     OptSpec::FLAG,   &opt_overlap}, | 
|  | {"-canonical",   OptSpec::FLAG,   &opt_canonical}, | 
|  | {"-help",        OptSpec::FLAG,   &opt_help}, | 
|  | {"-?",           OptSpec::FLAG,   &opt_help}, | 
|  | {0, OptSpec::FLAG, 0} | 
|  | }; | 
|  |  | 
|  | /** | 
|  | * processOptions()  Function to read the command line options. | 
|  | */ | 
|  | UBool processOptions(int argc, const char **argv, OptSpec opts[]) | 
|  | { | 
|  | for (int argNum = 1; argNum < argc; argNum ++) { | 
|  | const char *pArgName = argv[argNum]; | 
|  | for (OptSpec *pOpt = opts;  pOpt->name != 0; pOpt ++) { | 
|  | if (strcmp(pOpt->name, pArgName) == 0) { | 
|  | switch (pOpt->type) { | 
|  | case OptSpec::FLAG: | 
|  | *(UBool *)(pOpt->pVar) = TRUE; | 
|  | break; | 
|  | case OptSpec::STRING: | 
|  | argNum ++; | 
|  | if (argNum >= argc) { | 
|  | fprintf(stderr, "value expected for \"%s\" option.\n", | 
|  | pOpt->name); | 
|  | return FALSE; | 
|  | } | 
|  | *(const char **)(pOpt->pVar) = argv[argNum]; | 
|  | break; | 
|  | case OptSpec::NUM: | 
|  | argNum ++; | 
|  | if (argNum >= argc) { | 
|  | fprintf(stderr, "value expected for \"%s\" option.\n", | 
|  | pOpt->name); | 
|  | return FALSE; | 
|  | } | 
|  | char *endp; | 
|  | int i = strtol(argv[argNum], &endp, 0); | 
|  | if (endp == argv[argNum]) { | 
|  | fprintf(stderr, | 
|  | "integer value expected for \"%s\" option.\n", | 
|  | pOpt->name); | 
|  | return FALSE; | 
|  | } | 
|  | *(int *)(pOpt->pVar) = i; | 
|  | } | 
|  | break; | 
|  | } | 
|  | } | 
|  | if (pOpt->name == 0) | 
|  | { | 
|  | fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName); | 
|  | return FALSE; | 
|  | } | 
|  | } | 
|  | return TRUE; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Creates a collator | 
|  | */ | 
|  | UBool processCollator() | 
|  | { | 
|  | // Set up an ICU collator | 
|  | UErrorCode status = U_ZERO_ERROR; | 
|  |  | 
|  | if (opt_rules != 0) { | 
|  | u_unescape(opt_rules, rules, 100); | 
|  | collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY, | 
|  | NULL, &status); | 
|  | } | 
|  | else { | 
|  | collator = ucol_open(opt_locale, &status); | 
|  | } | 
|  | if (U_FAILURE(status)) { | 
|  | fprintf(stderr, "Collator creation failed.: %d\n", status); | 
|  | return FALSE; | 
|  | } | 
|  | if (status == U_USING_DEFAULT_WARNING) { | 
|  | fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", | 
|  | opt_locale); | 
|  | } | 
|  | if (status == U_USING_FALLBACK_WARNING) { | 
|  | fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", | 
|  | opt_locale); | 
|  | } | 
|  | if (opt_norm) { | 
|  | ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | 
|  | } | 
|  | if (opt_french) { | 
|  | ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status); | 
|  | } | 
|  | if (opt_lower) { | 
|  | ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, | 
|  | &status); | 
|  | } | 
|  | if (opt_upper) { | 
|  | ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, | 
|  | &status); | 
|  | } | 
|  | if (opt_case) { | 
|  | ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status); | 
|  | } | 
|  | if (opt_shifted) { | 
|  | ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, | 
|  | &status); | 
|  | } | 
|  | if (opt_level != 0) { | 
|  | switch (opt_level) { | 
|  | case 1: | 
|  | ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status); | 
|  | break; | 
|  | case 2: | 
|  | ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY, | 
|  | &status); | 
|  | break; | 
|  | case 3: | 
|  | ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status); | 
|  | break; | 
|  | case 4: | 
|  | ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY, | 
|  | &status); | 
|  | break; | 
|  | case 5: | 
|  | ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL, | 
|  | &status); | 
|  | break; | 
|  | default: | 
|  | fprintf(stderr, "-level param must be between 1 and 5\n"); | 
|  | return FALSE; | 
|  | } | 
|  | } | 
|  | if (U_FAILURE(status)) { | 
|  | fprintf(stderr, "Collator attribute setting failed.: %d\n", status); | 
|  | return FALSE; | 
|  | } | 
|  | return TRUE; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Creates a string search | 
|  | */ | 
|  | UBool processStringSearch() | 
|  | { | 
|  | u_unescape(opt_source, source, 100); | 
|  | u_unescape(opt_pattern, pattern, 100); | 
|  | UErrorCode status = U_ZERO_ERROR; | 
|  | search = usearch_openFromCollator(pattern, -1, source, -1, collator, NULL, | 
|  | &status); | 
|  | if (U_FAILURE(status)) { | 
|  | return FALSE; | 
|  | } | 
|  | if (opt_overlap == TRUE) { | 
|  | usearch_setAttribute(search, USEARCH_OVERLAP, USEARCH_ON, &status); | 
|  | } | 
|  | if (opt_canonical == TRUE) { | 
|  | usearch_setAttribute(search, USEARCH_CANONICAL_MATCH, USEARCH_ON, | 
|  | &status); | 
|  | } | 
|  | if (U_FAILURE(status)) { | 
|  | fprintf(stderr, "Error setting search attributes\n"); | 
|  | return FALSE; | 
|  | } | 
|  | return TRUE; | 
|  | } | 
|  |  | 
|  | UBool findPattern() | 
|  | { | 
|  | UErrorCode status = U_ZERO_ERROR; | 
|  | int32_t offset = usearch_next(search, &status); | 
|  | if (offset == USEARCH_DONE) { | 
|  | fprintf(stdout, "Pattern not found in source\n"); | 
|  | } | 
|  | while (offset != USEARCH_DONE) { | 
|  | fprintf(stdout, "Pattern found at offset %d size %d\n", offset, | 
|  | usearch_getMatchedLength(search)); | 
|  | offset = usearch_next(search, &status); | 
|  | } | 
|  | if (U_FAILURE(status)) { | 
|  | fprintf(stderr, "Error in searching for pattern %d\n", status); | 
|  | return FALSE; | 
|  | } | 
|  | fprintf(stdout, "End of search\n"); | 
|  | return TRUE; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Main   --  process command line, read in and pre-process the test file, | 
|  | *            call other functions to do the actual tests. | 
|  | */ | 
|  | int main(int argc, const char** argv) | 
|  | { | 
|  | if (processOptions(argc, argv, opts) != TRUE || opt_help) { | 
|  | printf(gHelpString); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | if (processCollator() != TRUE) { | 
|  | fprintf(stderr, "Error creating collator\n"); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | if (processStringSearch() != TRUE) { | 
|  | fprintf(stderr, "Error creating string search\n"); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | fprintf(stdout, "Finding pattern %s in source %s\n", opt_pattern, | 
|  | opt_source); | 
|  |  | 
|  | findPattern(); | 
|  | ucol_close(collator); | 
|  | usearch_close(search); | 
|  | return 0; | 
|  | } |