blob: 34d2a26043c950d26fd9c7a6644e4715d3839964 [file] [log] [blame]
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2003-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
*
* File prscmnts.cpp
*
* Modification History:
*
* Date Name Description
* 08/22/2003 ram Creation.
*******************************************************************************
*/
// Safer use of UnicodeString.
#ifndef UNISTR_FROM_CHAR_EXPLICIT
# define UNISTR_FROM_CHAR_EXPLICIT explicit
#endif
// Less important, but still a good idea.
#ifndef UNISTR_FROM_STRING_EXPLICIT
# define UNISTR_FROM_STRING_EXPLICIT explicit
#endif
#include "unicode/regex.h"
#include "unicode/unistr.h"
#include "unicode/parseerr.h"
#include "prscmnts.h"
#include <stdio.h>
#include <stdlib.h>
U_NAMESPACE_USE
#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
#define MAX_SPLIT_STRINGS 20
const char *patternStrings[UPC_LIMIT]={
"^translate\\s*(.*)",
"^note\\s*(.*)"
};
U_CFUNC int32_t
removeText(UChar *source, int32_t srcLen,
UnicodeString patString,uint32_t options,
UnicodeString replaceText, UErrorCode *status){
if(status == NULL || U_FAILURE(*status)){
return 0;
}
UnicodeString src(source, srcLen);
RegexMatcher myMatcher(patString, src, options, *status);
if(U_FAILURE(*status)){
return 0;
}
UnicodeString dest;
dest = myMatcher.replaceAll(replaceText,*status);
return dest.extract(source, srcLen, *status);
}
U_CFUNC int32_t
trim(UChar *src, int32_t srcLen, UErrorCode *status){
srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines
srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces
srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes
return srcLen;
}
U_CFUNC int32_t
removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
srcLen = trim(source, srcLen, status);
UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the beginning of the line
srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status);
return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines;
}
U_CFUNC int32_t
getText(const UChar* source, int32_t srcLen,
UChar** dest, int32_t destCapacity,
UnicodeString patternString,
UErrorCode* status){
if(status == NULL || U_FAILURE(*status)){
return 0;
}
UnicodeString stringArray[MAX_SPLIT_STRINGS];
RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
UnicodeString src (source,srcLen);
if (U_FAILURE(*status)) {
return 0;
}
pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
if (U_FAILURE(*status)) {
return 0;
}
for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
matcher.reset(stringArray[i]);
if(matcher.lookingAt(*status)){
UnicodeString out = matcher.group(1, *status);
return out.extract(*dest, destCapacity,*status);
}
}
return 0;
}
#define AT_SIGN 0x0040
U_CFUNC int32_t
getDescription( const UChar* source, int32_t srcLen,
UChar** dest, int32_t destCapacity,
UErrorCode* status){
if(status == NULL || U_FAILURE(*status)){
return 0;
}
UnicodeString stringArray[MAX_SPLIT_STRINGS];
RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
UnicodeString src(source, srcLen);
if (U_FAILURE(*status)) {
return 0;
}
pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status);
return trim(*dest, destLen, status);
}
return 0;
}
U_CFUNC int32_t
getCount(const UChar* source, int32_t srcLen,
UParseCommentsOption option, UErrorCode *status){
if(status == NULL || U_FAILURE(*status)){
return 0;
}
UnicodeString stringArray[MAX_SPLIT_STRINGS];
RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
UnicodeString src (source, srcLen);
if (U_FAILURE(*status)) {
return 0;
}
int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
UnicodeString patternString(patternStrings[option]);
RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
if (U_FAILURE(*status)) {
return 0;
}
int32_t count = 0;
for(int32_t i=0; i<retLen; i++){
matcher.reset(stringArray[i]);
if(matcher.lookingAt(*status)){
count++;
}
}
if(option == UPC_TRANSLATE && count > 1){
fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
exit(U_UNSUPPORTED_ERROR);
}
return count;
}
U_CFUNC int32_t
getAt(const UChar* source, int32_t srcLen,
UChar** dest, int32_t destCapacity,
int32_t index,
UParseCommentsOption option,
UErrorCode* status){
if(status == NULL || U_FAILURE(*status)){
return 0;
}
UnicodeString stringArray[MAX_SPLIT_STRINGS];
RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
UnicodeString src (source, srcLen);
if (U_FAILURE(*status)) {
return 0;
}
int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
UnicodeString patternString(patternStrings[option]);
RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
if (U_FAILURE(*status)) {
return 0;
}
int32_t count = 0;
for(int32_t i=0; i<retLen; i++){
matcher.reset(stringArray[i]);
if(matcher.lookingAt(*status)){
if(count == index){
UnicodeString out = matcher.group(1, *status);
return out.extract(*dest, destCapacity,*status);
}
count++;
}
}
return 0;
}
U_CFUNC int32_t
getTranslate( const UChar* source, int32_t srcLen,
UChar** dest, int32_t destCapacity,
UErrorCode* status){
UnicodeString notePatternString("^translate\\s*?(.*)");
int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
return trim(*dest, destLen, status);
}
U_CFUNC int32_t
getNote(const UChar* source, int32_t srcLen,
UChar** dest, int32_t destCapacity,
UErrorCode* status){
UnicodeString notePatternString("^note\\s*?(.*)");
int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
return trim(*dest, destLen, status);
}
#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */