|  | // Copyright (C) 2016 and later: Unicode, Inc. and others. | 
|  | // License & terms of use: http://www.unicode.org/copyright.html | 
|  | /* | 
|  | ******************************************************************************* | 
|  | *   Copyright (C) 2003-2014, International Business Machines | 
|  | *   Corporation and others.  All Rights Reserved. | 
|  | ******************************************************************************* | 
|  | * | 
|  | * File prscmnts.cpp | 
|  | * | 
|  | * Modification History: | 
|  | * | 
|  | *   Date          Name        Description | 
|  | *   08/22/2003    ram         Creation. | 
|  | ******************************************************************************* | 
|  | */ | 
|  |  | 
|  | // Safer use of UnicodeString. | 
|  | #ifndef UNISTR_FROM_CHAR_EXPLICIT | 
|  | #   define UNISTR_FROM_CHAR_EXPLICIT explicit | 
|  | #endif | 
|  |  | 
|  | // Less important, but still a good idea. | 
|  | #ifndef UNISTR_FROM_STRING_EXPLICIT | 
|  | #   define UNISTR_FROM_STRING_EXPLICIT explicit | 
|  | #endif | 
|  |  | 
|  | #include "unicode/regex.h" | 
|  | #include "unicode/unistr.h" | 
|  | #include "unicode/parseerr.h" | 
|  | #include "prscmnts.h" | 
|  | #include <stdio.h> | 
|  | #include <stdlib.h> | 
|  |  | 
|  | U_NAMESPACE_USE | 
|  |  | 
|  | #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */ | 
|  |  | 
|  | #define MAX_SPLIT_STRINGS 20 | 
|  |  | 
|  | const char *patternStrings[UPC_LIMIT]={ | 
|  | "^translate\\s*(.*)", | 
|  | "^note\\s*(.*)" | 
|  | }; | 
|  |  | 
|  | U_CFUNC int32_t | 
|  | removeText(UChar *source, int32_t srcLen, | 
|  | UnicodeString patString,uint32_t options, | 
|  | UnicodeString replaceText, UErrorCode *status){ | 
|  |  | 
|  | if(status == NULL || U_FAILURE(*status)){ | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | UnicodeString src(source, srcLen); | 
|  |  | 
|  | RegexMatcher    myMatcher(patString, src, options, *status); | 
|  | if(U_FAILURE(*status)){ | 
|  | return 0; | 
|  | } | 
|  | UnicodeString dest; | 
|  |  | 
|  |  | 
|  | dest = myMatcher.replaceAll(replaceText,*status); | 
|  |  | 
|  |  | 
|  | return dest.extract(source, srcLen, *status); | 
|  |  | 
|  | } | 
|  | U_CFUNC int32_t | 
|  | trim(UChar *src, int32_t srcLen, UErrorCode *status){ | 
|  | srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines | 
|  | srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces | 
|  | srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes | 
|  | return srcLen; | 
|  | } | 
|  |  | 
|  | U_CFUNC int32_t | 
|  | removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ | 
|  | srcLen = trim(source, srcLen, status); | 
|  | UnicodeString patString("^\\s*?\\*\\s*?");  // remove pattern like " * " at the begining of the line | 
|  | srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status); | 
|  | return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines; | 
|  | } | 
|  |  | 
|  | U_CFUNC int32_t | 
|  | getText(const UChar* source, int32_t srcLen, | 
|  | UChar** dest, int32_t destCapacity, | 
|  | UnicodeString patternString, | 
|  | UErrorCode* status){ | 
|  |  | 
|  | if(status == NULL || U_FAILURE(*status)){ | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | UnicodeString     stringArray[MAX_SPLIT_STRINGS]; | 
|  | RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status); | 
|  | UnicodeString src (source,srcLen); | 
|  |  | 
|  | if (U_FAILURE(*status)) { | 
|  | return 0; | 
|  | } | 
|  | pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | 
|  |  | 
|  | RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); | 
|  | if (U_FAILURE(*status)) { | 
|  | return 0; | 
|  | } | 
|  | for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ | 
|  | matcher.reset(stringArray[i]); | 
|  | if(matcher.lookingAt(*status)){ | 
|  | UnicodeString out = matcher.group(1, *status); | 
|  |  | 
|  | return out.extract(*dest, destCapacity,*status); | 
|  | } | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  |  | 
|  | #define AT_SIGN  0x0040 | 
|  |  | 
|  | U_CFUNC int32_t | 
|  | getDescription( const UChar* source, int32_t srcLen, | 
|  | UChar** dest, int32_t destCapacity, | 
|  | UErrorCode* status){ | 
|  | if(status == NULL || U_FAILURE(*status)){ | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | UnicodeString     stringArray[MAX_SPLIT_STRINGS]; | 
|  | RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); | 
|  | UnicodeString src(source, srcLen); | 
|  |  | 
|  | if (U_FAILURE(*status)) { | 
|  | return 0; | 
|  | } | 
|  | pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); | 
|  |  | 
|  | if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ | 
|  | int32_t destLen =  stringArray[0].extract(*dest, destCapacity, *status); | 
|  | return trim(*dest, destLen, status); | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | U_CFUNC int32_t | 
|  | getCount(const UChar* source, int32_t srcLen, | 
|  | UParseCommentsOption option, UErrorCode *status){ | 
|  |  | 
|  | if(status == NULL || U_FAILURE(*status)){ | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | UnicodeString     stringArray[MAX_SPLIT_STRINGS]; | 
|  | RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); | 
|  | UnicodeString src (source, srcLen); | 
|  |  | 
|  |  | 
|  | if (U_FAILURE(*status)) { | 
|  | return 0; | 
|  | } | 
|  | int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | 
|  |  | 
|  | UnicodeString patternString(patternStrings[option]); | 
|  | RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); | 
|  | if (U_FAILURE(*status)) { | 
|  | return 0; | 
|  | } | 
|  | int32_t count = 0; | 
|  | for(int32_t i=0; i<retLen; i++){ | 
|  | matcher.reset(stringArray[i]); | 
|  | if(matcher.lookingAt(*status)){ | 
|  | count++; | 
|  | } | 
|  | } | 
|  | if(option == UPC_TRANSLATE && count > 1){ | 
|  | fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); | 
|  | exit(U_UNSUPPORTED_ERROR); | 
|  | } | 
|  | return count; | 
|  | } | 
|  |  | 
|  | U_CFUNC int32_t | 
|  | getAt(const UChar* source, int32_t srcLen, | 
|  | UChar** dest, int32_t destCapacity, | 
|  | int32_t index, | 
|  | UParseCommentsOption option, | 
|  | UErrorCode* status){ | 
|  |  | 
|  | if(status == NULL || U_FAILURE(*status)){ | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | UnicodeString     stringArray[MAX_SPLIT_STRINGS]; | 
|  | RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); | 
|  | UnicodeString src (source, srcLen); | 
|  |  | 
|  |  | 
|  | if (U_FAILURE(*status)) { | 
|  | return 0; | 
|  | } | 
|  | int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | 
|  |  | 
|  | UnicodeString patternString(patternStrings[option]); | 
|  | RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); | 
|  | if (U_FAILURE(*status)) { | 
|  | return 0; | 
|  | } | 
|  | int32_t count = 0; | 
|  | for(int32_t i=0; i<retLen; i++){ | 
|  | matcher.reset(stringArray[i]); | 
|  | if(matcher.lookingAt(*status)){ | 
|  | if(count == index){ | 
|  | UnicodeString out = matcher.group(1, *status); | 
|  | return out.extract(*dest, destCapacity,*status); | 
|  | } | 
|  | count++; | 
|  |  | 
|  | } | 
|  | } | 
|  | return 0; | 
|  |  | 
|  | } | 
|  |  | 
|  | U_CFUNC int32_t | 
|  | getTranslate( const UChar* source, int32_t srcLen, | 
|  | UChar** dest, int32_t destCapacity, | 
|  | UErrorCode* status){ | 
|  | UnicodeString     notePatternString("^translate\\s*?(.*)"); | 
|  |  | 
|  | int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); | 
|  | return trim(*dest, destLen, status); | 
|  | } | 
|  |  | 
|  | U_CFUNC int32_t | 
|  | getNote(const UChar* source, int32_t srcLen, | 
|  | UChar** dest, int32_t destCapacity, | 
|  | UErrorCode* status){ | 
|  |  | 
|  | UnicodeString     notePatternString("^note\\s*?(.*)"); | 
|  | int32_t destLen =  getText(source, srcLen, dest, destCapacity, notePatternString, status); | 
|  | return trim(*dest, destLen, status); | 
|  |  | 
|  | } | 
|  |  | 
|  | #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ | 
|  |  |