blob: dd439ec5a176d8e36d735483e8597d5a3321a97d [file] [log] [blame]
/*
*******************************************************************************
*
* Copyright (C) 1998-2002, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* File parse.c
*
* Modification History:
*
* Date Name Description
* 05/26/99 stephen Creation.
* 02/25/00 weiv Overhaul to write udata
* 5/10/01 Ram removed ustdio dependency
* 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
*******************************************************************************
*/
#include "ucol_imp.h"
#include "parse.h"
#include "errmsg.h"
#include "uhash.h"
#include "cmemory.h"
#include "cstring.h"
#include "read.h"
#include "ustr.h"
#include "reslist.h"
#include "unicode/ustring.h"
#include "unicode/putil.h"
/* Number of tokens to read ahead of the current stream position */
#define MAX_LOOKAHEAD 2
#define U_ICU_UNIDATA "unidata"
#define CR 0x000D
#define LF 0x000A
#define SPACE 0x0020
#define ESCAPE 0x005C
#define HASH 0x0023
#define QUOTE 0x0027
#define STARTCOMMAND 0x005B
#define ENDCOMMAND 0x005D
U_STRING_DECL(k_type_string, "string", 6);
U_STRING_DECL(k_type_binary, "binary", 6);
U_STRING_DECL(k_type_bin, "bin", 3);
U_STRING_DECL(k_type_table, "table", 5);
U_STRING_DECL(k_type_int, "int", 3);
U_STRING_DECL(k_type_integer, "integer", 7);
U_STRING_DECL(k_type_array, "array", 5);
U_STRING_DECL(k_type_alias, "alias", 5);
U_STRING_DECL(k_type_intvector, "intvector", 9);
U_STRING_DECL(k_type_import, "import", 6);
U_STRING_DECL(k_type_include, "include", 7);
U_STRING_DECL(k_type_reserved, "reserved", 8);
enum EResourceType
{
RT_UNKNOWN,
RT_STRING,
RT_BINARY,
RT_TABLE,
RT_INTEGER,
RT_ARRAY,
RT_ALIAS,
RT_INTVECTOR,
RT_IMPORT,
RT_INCLUDE,
RT_RESERVED
};
/* only used for debugging */
const char *resourceNames[] =
{
"Unknown",
"String",
"Binary",
"Table",
"Integer",
"Array",
"Alias",
"Int vector",
"Import",
"Include",
"Reserved",
};
struct Lookahead
{
enum ETokenType type;
struct UString value;
uint32_t line;
};
/* keep in sync with token defines in read.h */
const char *tokenNames[] =
{
"string", /* A string token, such as "MonthNames" */
"'{'", /* An opening brace character */
"'}'", /* A closing brace character */
"','", /* A comma */
"':'", /* A colon */
"<end of file>", /* End of the file has been reached successfully */
"<error>", /* An error, such an unterminated quoted string */
};
/* Just to store "TRUE" */
static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
static struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
static uint32_t lookaheadPosition;
static UCHARBUF *buffer;
static struct SRBRoot *bundle;
static const char *inputdir;
static uint32_t inputdirLength;
static struct SResource *parseResource(char *tag, UErrorCode *status);
void initParser(void)
{
uint32_t i;
U_STRING_INIT(k_type_string, "string", 6);
U_STRING_INIT(k_type_binary, "binary", 6);
U_STRING_INIT(k_type_bin, "bin", 3);
U_STRING_INIT(k_type_table, "table", 5);
U_STRING_INIT(k_type_int, "int", 3);
U_STRING_INIT(k_type_integer, "integer", 7);
U_STRING_INIT(k_type_array, "array", 5);
U_STRING_INIT(k_type_alias, "alias", 5);
U_STRING_INIT(k_type_intvector, "intvector", 9);
U_STRING_INIT(k_type_import, "import", 6);
U_STRING_INIT(k_type_reserved, "reserved", 8);
U_STRING_INIT(k_type_include, "include", 7);
for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
{
ustr_init(&lookahead[i].value);
}
}
/* The nature of the lookahead buffer:
There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
When getToken is called, the current pointer is moved to the next slot and the
old slot is filled with the next token from the reader by calling getNextToken.
The token values are stored in the slot, which means that token values don't
survive a call to getToken, ie.
UString *value;
getToken(&value, NULL, status);
getToken(NULL, NULL, status); bad - value is now a different string
*/
static void
initLookahead(UCHARBUF *buf, UErrorCode *status)
{
static uint32_t initTypeStrings = 0;
uint32_t i;
if (!initTypeStrings)
{
initTypeStrings = 1;
}
lookaheadPosition = 0;
buffer = buf;
resetLineNumber();
for (i = 0; i < MAX_LOOKAHEAD; i++)
{
lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, status);
if (U_FAILURE(*status))
{
return;
}
}
*status = U_ZERO_ERROR;
}
static enum ETokenType
getToken(struct UString **tokenValue, uint32_t *linenumber, UErrorCode *status)
{
enum ETokenType result;
uint32_t i;
result = lookahead[lookaheadPosition].type;
if (tokenValue != NULL)
{
*tokenValue = &lookahead[lookaheadPosition].value;
}
if (linenumber != NULL)
{
*linenumber = lookahead[lookaheadPosition].line;
}
i = (lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
lookaheadPosition = (lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, status);
/* printf("getToken, returning %s\n", tokenNames[result]); */
return result;
}
static enum ETokenType
peekToken(uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, UErrorCode *status)
{
uint32_t i = (lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
if (U_FAILURE(*status))
{
return TOK_ERROR;
}
if (lookaheadCount >= MAX_LOOKAHEAD)
{
*status = U_INTERNAL_PROGRAM_ERROR;
return TOK_ERROR;
}
if (tokenValue != NULL)
{
*tokenValue = &lookahead[i].value;
}
if (linenumber != NULL)
{
*linenumber = lookahead[i].line;
}
return lookahead[i].type;
}
static void
expect(enum ETokenType expectedToken, struct UString **tokenValue, uint32_t *linenumber, UErrorCode *status)
{
uint32_t line;
enum ETokenType token = getToken(tokenValue, &line, status);
if (U_FAILURE(*status))
{
return;
}
if (linenumber != NULL)
{
*linenumber = line;
}
if (token != expectedToken)
{
*status = U_INVALID_FORMAT_ERROR;
error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
}
else /* "else" is added by Jing/GCL */
{
*status = U_ZERO_ERROR;
}
}
static char *getInvariantString(uint32_t *line, UErrorCode *status)
{
struct UString *tokenValue;
char *result;
uint32_t count;
expect(TOK_STRING, &tokenValue, line, status);
if (U_FAILURE(*status))
{
return NULL;
}
count = u_strlen(tokenValue->fChars) + 1;
result = uprv_malloc(count);
if (result == NULL)
{
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
u_UCharsToChars(tokenValue->fChars, result, count);
return result;
}
static enum EResourceType
parseResourceType(UErrorCode *status)
{
struct UString *tokenValue;
enum EResourceType result = RT_UNKNOWN;
uint32_t line=0;
expect(TOK_STRING, &tokenValue, &line, status);
if (U_FAILURE(*status))
{
return RT_UNKNOWN;
}
*status = U_ZERO_ERROR;
if (u_strcmp(tokenValue->fChars, k_type_string) == 0) {
result = RT_STRING;
} else if (u_strcmp(tokenValue->fChars, k_type_array) == 0) {
result = RT_ARRAY;
} else if (u_strcmp(tokenValue->fChars, k_type_alias) == 0) {
result = RT_ALIAS;
} else if (u_strcmp(tokenValue->fChars, k_type_table) == 0) {
result = RT_TABLE;
} else if (u_strcmp(tokenValue->fChars, k_type_binary) == 0) {
result = RT_BINARY;
} else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
result = RT_BINARY;
} else if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
result = RT_INTEGER;
} else if (u_strcmp(tokenValue->fChars, k_type_integer) == 0) {
result = RT_INTEGER;
} else if (u_strcmp(tokenValue->fChars, k_type_intvector) == 0) {
result = RT_INTVECTOR;
} else if (u_strcmp(tokenValue->fChars, k_type_import) == 0) {
result = RT_IMPORT;
} else if (u_strcmp(tokenValue->fChars, k_type_include) == 0) {
result = RT_INCLUDE;
} else if (u_strcmp(tokenValue->fChars, k_type_reserved) == 0) {
result = RT_RESERVED;
} else {
char tokenBuffer[1024];
u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
*status = U_INVALID_FORMAT_ERROR;
error(line, "unknown resource type '%s'", tokenBuffer);
}
return result;
}
static struct SResource *
parseUCARules(char *tag, uint32_t startline, UErrorCode *status)
{
struct SResource *result = NULL;
struct UString *tokenValue;
FileStream *file = NULL;
char filename[256] = { '\0' };
char cs[128] = { '\0' };
uint32_t line;
int len=0;
UBool quoted = FALSE;
UCHARBUF *ucbuf=NULL;
UChar32 c = 0;
const char* cp = NULL;
UChar *pTarget = NULL;
UChar *target = NULL;
UChar *targetLimit = NULL;
int32_t size = 0;
expect(TOK_STRING, &tokenValue, &line, status);
if(isVerbose()){
printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag,startline);
}
/* make the filename including the directory */
if (inputdir != NULL)
{
uprv_strcat(filename, inputdir);
if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
{
uprv_strcat(filename, U_FILE_SEP_STRING);
}
}
u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
expect(TOK_CLOSE_BRACE, NULL, NULL, status);
if (U_FAILURE(*status))
{
return NULL;
}
uprv_strcat(filename,"..");
uprv_strcat(filename,U_FILE_SEP_STRING);
uprv_strcat(filename, U_ICU_UNIDATA);
uprv_strcat(filename, U_FILE_SEP_STRING);
uprv_strcat(filename, cs);
ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
if (U_FAILURE(*status)) {
error(line, "An error occured while opening the input file %s\n", filename);
return NULL;
}
/* We allocate more space than actually required
* since the actual size needed for storing UChars
* is not known in UTF-8 byte stream
*/
size = ucbuf_size(ucbuf);
pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
target = pTarget;
targetLimit = pTarget+size;
/* read the rules into the buffer */
while (target < targetLimit)
{
c = ucbuf_getc(ucbuf, status);
if(c == QUOTE) {
quoted = !quoted;
}
/* weiv (06/26/2002): adding the following:
* - preserving spaces in commands [...]
* - # comments until the end of line
*/
if (c == STARTCOMMAND)
{
/* preserve commands
* closing bracket will be handled by the
* append at the end of the loop
*/
while(c != ENDCOMMAND) {
U_APPEND_CHAR32(c, target,len);
c = ucbuf_getc(ucbuf, status);
}
} else if (c == HASH && !quoted) {
/* skip comments */
while(c != CR && c != LF) {
c = ucbuf_getc(ucbuf, status);
}
} else if (c == ESCAPE)
{
c = unescape(ucbuf, status);
if (c == U_ERR)
{
uprv_free(pTarget);
T_FileStream_close(file);
return NULL;
}
}
else if (c == SPACE || c == CR || c == LF)
{
/* ignore spaces carriage returns
* and line feed unless in the form \uXXXX
*/
continue;
}
/* Append UChar * after dissembling if c > 0xffff*/
if (c != U_EOF)
{
U_APPEND_CHAR32(c, target,len);
}
else
{
break;
}
}
result = string_open(bundle, tag, pTarget, target - pTarget, status);
ucbuf_close(ucbuf);
uprv_free(pTarget);
T_FileStream_close(file);
return result;
}
static struct SResource *
parseString(char *tag, uint32_t startline, UErrorCode *status)
{
struct UString *tokenValue;
struct SResource *result = NULL;
if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
{
return parseUCARules(tag, startline, status);
}
if(isVerbose()){
printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag,startline);
}
expect(TOK_STRING, &tokenValue, NULL, status);
if (U_SUCCESS(*status))
{
/* create the string now - tokenValue doesn't survive a call to getToken (and therefore
doesn't survive expect either) */
result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, status);
expect(TOK_CLOSE_BRACE, NULL, NULL, status);
if (U_FAILURE(*status))
{
string_close(result, status);
return NULL;
}
}
return result;
}
static struct SResource *
parseAlias(char *tag, uint32_t startline, UErrorCode *status)
{
struct UString *tokenValue;
struct SResource *result = NULL;
expect(TOK_STRING, &tokenValue, NULL, status);
if(isVerbose()){
printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag,startline);
}
if (U_SUCCESS(*status))
{
/* create the string now - tokenValue doesn't survive a call to getToken (and therefore
doesn't survive expect either) */
result = alias_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, status);
expect(TOK_CLOSE_BRACE, NULL, NULL, status);
if (U_FAILURE(*status))
{
alias_close(result, status);
return NULL;
}
}
return result;
}
static struct SResource *
parseCollationElements(char *tag, uint32_t startline, UErrorCode *status)
{
struct SResource *result = NULL;
struct SResource *member = NULL;
struct UString *tokenValue;
enum ETokenType token;
char subtag[1024];
UVersionInfo version;
UBool override = FALSE;
uint32_t line;
result = table_open(bundle, tag, status);
if (result == NULL || U_FAILURE(*status))
{
return NULL;
}
if(isVerbose()){
printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag,startline);
}
/* '{' . (name resource)* '}' */
for (;;)
{
token = getToken(&tokenValue, &line, status);
if (token == TOK_CLOSE_BRACE)
{
return result;
}
if (token != TOK_STRING)
{
table_close(result, status);
*status = U_INVALID_FORMAT_ERROR;
if (token == TOK_EOF)
{
error(startline, "unterminated table");
}
else
{
error(line, "Unexpected token %s", tokenNames[token]);
}
return NULL;
}
u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
if (U_FAILURE(*status))
{
table_close(result, status);
return NULL;
}
member = parseResource(subtag, status);
if (U_FAILURE(*status))
{
table_close(result, status);
return NULL;
}
if (uprv_strcmp(subtag, "Version") == 0)
{
char ver[40];
int32_t length = member->u.fString.fLength;
if (length >= (int32_t) sizeof(ver))
{
length = (int32_t) sizeof(ver) - 1;
}
u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
u_versionFromString(version, ver);
table_add(result, member, line, status);
}
else if (uprv_strcmp(subtag, "Override") == 0)
{
override = FALSE;
if (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0)
{
override = TRUE;
}
table_add(result, member, line, status);
}
else if(uprv_strcmp(subtag, "%%CollationBin")==0)
{
/* discard duplicate %%CollationBin if any*/
}
else if (uprv_strcmp(subtag, "Sequence") == 0)
{
#if UCONFIG_NO_COLLATION
warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION, see uconfig.h");
#else
UErrorCode intStatus = U_ZERO_ERROR;
/* do the collation elements */
int32_t len = 0;
uint8_t *data = NULL;
UCollator *coll = NULL;
UParseError parseError;
/* add sequence */
table_add(result, member, line, status);
coll = ucol_openRules(member->u.fString.fChars, member->u.fString.fLength,
UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, &intStatus);
if (U_SUCCESS(intStatus) && coll != NULL)
{
data = ucol_cloneRuleData(coll, &len, &intStatus);
/* tailoring rules version */
coll->dataInfo.dataVersion[1] = version[0];
if (U_SUCCESS(intStatus) && data != NULL)
{
member = bin_open(bundle, "%%CollationBin", len, data, NULL, status);
/*table_add(bundle->fRoot, member, line, status);*/
table_add(result, member, line, status);
uprv_free(data);
}
else
{
warning(line, "could not obtain rules from collator");
if(isStrict()){
*status = U_INVALID_FORMAT_ERROR;
return NULL;
}
}
ucol_close(coll);
}
else
{
warning(line, "%%Collation could not be constructed from CollationElements - check context!");
if(isStrict()){
*status = U_INVALID_FORMAT_ERROR;
return NULL;
}
}
#endif
}
/*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
/*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
if (U_FAILURE(*status))
{
table_close(result, status);
return NULL;
}
}
/* not reached */
*status = U_INTERNAL_PROGRAM_ERROR;
return NULL;
}
/* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
if this weren't special-cased, wouldn't be set until the entire file had been processed. */
static struct SResource *
realParseTable(struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
{
struct SResource *member = NULL;
struct UString *tokenValue=NULL;
enum ETokenType token;
char subtag[1024];
uint32_t line;
UBool readToken = FALSE;
/* '{' . (name resource)* '}' */
if(isVerbose()){
printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag,startline);
}
for (;;)
{
token = getToken(&tokenValue, &line, status);
if (token == TOK_CLOSE_BRACE)
{
if (!readToken) {
warning(startline, "Encountered empty table");
}
return table;
}
if (token != TOK_STRING)
{
table_close(table, status);
*status = U_INVALID_FORMAT_ERROR;
if (token == TOK_EOF)
{
error(startline, "unterminated table");
}
else
{
error(line, "enexpected token %s", tokenNames[token]);
}
return NULL;
}
u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
if (U_FAILURE(*status))
{
error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
table_close(table, status);
return NULL;
}
member = parseResource(subtag, status);
if (member == NULL || U_FAILURE(*status))
{
error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
table_close(table, status);
return NULL;
}
table_add(table, member, line, status);
if (U_FAILURE(*status))
{
error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
table_close(table, status);
return NULL;
}
readToken = TRUE;
}
/* not reached */
*status = U_INTERNAL_PROGRAM_ERROR;
return NULL;
}
static struct SResource *
parseTable(char *tag, uint32_t startline, UErrorCode *status)
{
struct SResource *result;
if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
{
return parseCollationElements(tag, startline, status);
}
if(isVerbose()){
printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag,startline);
}
result = table_open(bundle, tag, status);
if (result == NULL || U_FAILURE(*status))
{
return NULL;
}
return realParseTable(result, tag, startline, status);
}
static struct SResource *
parseArray(char *tag, uint32_t startline, UErrorCode *status)
{
struct SResource *result = NULL;
struct SResource *member = NULL;
struct UString *tokenValue;
enum ETokenType token;
UBool readToken = FALSE;
result = array_open(bundle, tag, status);
if (result == NULL || U_FAILURE(*status))
{
return NULL;
}
if(isVerbose()){
printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag,startline);
}
/* '{' . resource [','] '}' */
for (;;)
{
/* check for end of array, but don't consume next token unless it really is the end */
token = peekToken(0, &tokenValue, NULL, status);
if (token == TOK_CLOSE_BRACE)
{
getToken(NULL, NULL, status);
if (!readToken) {
warning(startline, "Encountered empty array");
}
break;
}
if (token == TOK_EOF)
{
array_close(result, status);
*status = U_INVALID_FORMAT_ERROR;
error(startline, "unterminated array");
return NULL;
}
/* string arrays are a special case */
if (token == TOK_STRING)
{
getToken(&tokenValue, NULL, status);
member = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, status);
}
else
{
member = parseResource(NULL, status);
}
if (member == NULL || U_FAILURE(*status))
{
array_close(result, status);
return NULL;
}
array_add(result, member, status);
if (U_FAILURE(*status))
{
array_close(result, status);
return NULL;
}
/* eat optional comma if present */
token = peekToken(0, NULL, NULL, status);
if (token == TOK_COMMA)
{
getToken(NULL, NULL, status);
}
if (U_FAILURE(*status))
{
array_close(result, status);
return NULL;
}
readToken = TRUE;
}
return result;
}
static struct SResource *
parseIntVector(char *tag, uint32_t startline, UErrorCode *status)
{
struct SResource *result = NULL;
enum ETokenType token;
char *string;
int32_t value;
UBool readToken = FALSE;
/* added by Jing/GCL */
char *stopstring;
uint32_t len;
result = intvector_open(bundle, tag, status);
if (result == NULL || U_FAILURE(*status))
{
return NULL;
}
if(isVerbose()){
printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag,startline);
}
/* '{' . string [','] '}' */
for (;;)
{
/* check for end of array, but don't consume next token unless it really is the end */
token = peekToken(0, NULL, NULL, status);
if (token == TOK_CLOSE_BRACE)
{
/* it's the end, consume the close brace */
getToken(NULL, NULL, status);
if (!readToken) {
warning(startline, "Encountered empty int vector");
}
return result;
}
string = getInvariantString(NULL, status);
if (U_FAILURE(*status))
{
intvector_close(result, status);
return NULL;
}
/* Commented by Jing/GCL */
/*value = uprv_strtol(string, NULL, 10);
intvector_add(result, value, status);
uprv_free(string);
token = peekToken(0, NULL, NULL, status);*/
/* The following is added by Jing/GCL to handle illegal char in the Intvector */
value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
len=stopstring-string;
if(len==uprv_strlen(string))
{
intvector_add(result, value, status);
uprv_free(string);
token = peekToken(0, NULL, NULL, status);
}
else
{
uprv_free(string);
*status=U_INVALID_CHAR_FOUND;
}
/* The above is added by Jing/GCL */
if (U_FAILURE(*status))
{
intvector_close(result, status);
return NULL;
}
/* the comma is optional (even though it is required to prevent the reader from concatenating
consecutive entries) so that a missing comma on the last entry isn't an error */
if (token == TOK_COMMA)
{
getToken(NULL, NULL, status);
}
readToken = TRUE;
}
/* not reached */
intvector_close(result, status);
*status = U_INTERNAL_PROGRAM_ERROR;
return NULL;
}
static struct SResource *
parseBinary(char *tag, uint32_t startline, UErrorCode *status)
{
struct SResource *result = NULL;
uint8_t *value;
char *string;
char toConv[3] = {'\0', '\0', '\0'};
uint32_t count;
uint32_t i;
uint32_t line;
/* added by Jing/GCL */
char *stopstring;
uint32_t len;
string = getInvariantString(&line, status);
if (string == NULL || U_FAILURE(*status))
{
return NULL;
}
expect(TOK_CLOSE_BRACE, NULL, NULL, status);
if (U_FAILURE(*status))
{
uprv_free(string);
return NULL;
}
if(isVerbose()){
printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag,startline);
}
count = uprv_strlen(string);
if (count > 0){
if((count % 2)==0){
value = uprv_malloc(sizeof(uint8_t) * count);
if (value == NULL)
{
uprv_free(string);
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
for (i = 0; i < count; i += 2)
{
toConv[0] = string[i];
toConv[1] = string[i + 1];
value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
len=stopstring-toConv;
if(len!=uprv_strlen(toConv))
{
uprv_free(string);
*status=U_INVALID_CHAR_FOUND;
return NULL;
}
}
result = bin_open(bundle, tag, (i >> 1), value,NULL, status);
uprv_free(value);
}
else
{
*status = U_INVALID_CHAR_FOUND;
uprv_free(string);
error(line, "Encountered invalid binary string");
return NULL;
}
}
else
{
result = bin_open(bundle, tag, 0, NULL, "",status);
warning(startline, "Encountered empty binary tag");
}
uprv_free(string);
return result;
}
static struct SResource *
parseInteger(char *tag, uint32_t startline, UErrorCode *status)
{
struct SResource *result = NULL;
int32_t value;
char *string;
/* added by Jing/GCL */
char *stopstring;
uint32_t len;
string = getInvariantString(NULL, status);
if (string == NULL || U_FAILURE(*status))
{
return NULL;
}
expect(TOK_CLOSE_BRACE, NULL, NULL, status);
if (U_FAILURE(*status))
{
uprv_free(string);
return NULL;
}
if(isVerbose()){
printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag,startline);
}
if (uprv_strlen(string) <= 0)
{
warning(startline, "Encountered empty integer. Default value is 0.");
}
/* commented by Jing/GCL */
/* value = uprv_strtol(string, NULL, 10);*/
/* result = int_open(bundle, tag, value, status);*/
/* The following is added by Jing/GCL*/
/* to make integer support hexdecimal, octal digit and decimal*/
/* to handle illegal char in the integer*/
value = uprv_strtoul(string, &stopstring, 0);
len=stopstring-string;
if(len==uprv_strlen(string))
{
result = int_open(bundle, tag, value, status);
}
else
{
*status=U_INVALID_CHAR_FOUND;
}
uprv_free(string);
return result;
}
static struct SResource *
parseImport(char *tag, uint32_t startline, UErrorCode *status)
{
struct SResource *result;
FileStream *file;
int32_t len;
uint8_t *data;
char *filename;
uint32_t line;
char *fullname = NULL;
int32_t numRead = 0;
filename = getInvariantString(&line, status);
if (U_FAILURE(*status))
{
return NULL;
}
expect(TOK_CLOSE_BRACE, NULL, NULL, status);
if (U_FAILURE(*status))
{
uprv_free(filename);
return NULL;
}
if(isVerbose()){
printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag,startline);
}
/* Open the input file for reading */
if (inputdir == NULL)
{
file = T_FileStream_open(filename, "rb");
}
else
{
int32_t count = uprv_strlen(filename);
if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
{
fullname = (char *) uprv_malloc(inputdirLength + count + 2);
/* test for NULL */
if(fullname == NULL)
{
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_strcpy(fullname, inputdir);
fullname[inputdirLength] = U_FILE_SEP_CHAR;
fullname[inputdirLength + 1] = '\0';
uprv_strcat(fullname, filename);
}
else
{
fullname = (char *) uprv_malloc(inputdirLength + count + 1);
/* test for NULL */
if(fullname == NULL)
{
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_strcpy(fullname, inputdir);
uprv_strcat(fullname, filename);
}
file = T_FileStream_open(fullname, "rb");
}
if (file == NULL)
{
error(line, "couldn't open input file %s", filename);
*status = U_FILE_ACCESS_ERROR;
return NULL;
}
len = T_FileStream_size(file);
data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
/* test for NULL */
if(data == NULL)
{
*status = U_MEMORY_ALLOCATION_ERROR;
T_FileStream_close (file);
return NULL;
}
numRead = T_FileStream_read (file, data, len);
T_FileStream_close (file);
result = bin_open(bundle, tag, len, data, fullname, status);
uprv_free(data);
uprv_free(filename);
uprv_free(fullname);
return result;
}
static struct SResource *
parseInclude(char *tag, uint32_t startline, UErrorCode *status)
{
struct SResource *result;
int32_t len=0;
char *filename;
uint32_t line;
UChar *pTarget = NULL;
UCHARBUF *ucbuf;
char *fullname = NULL;
int32_t count = 0;
const char* cp = NULL;
filename = getInvariantString(&line, status);
count = uprv_strlen(filename);
if (U_FAILURE(*status))
{
return NULL;
}
expect(TOK_CLOSE_BRACE, NULL, NULL, status);
if (U_FAILURE(*status))
{
uprv_free(filename);
return NULL;
}
if(isVerbose()){
printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag,startline);
}
fullname = (char *) uprv_malloc(inputdirLength + count + 2);
/* test for NULL */
if(fullname == NULL)
{
*status = U_MEMORY_ALLOCATION_ERROR;
uprv_free(filename);
return NULL;
}
if(inputdir!=NULL){
if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
{
uprv_strcpy(fullname, inputdir);
fullname[inputdirLength] = U_FILE_SEP_CHAR;
fullname[inputdirLength + 1] = '\0';
uprv_strcat(fullname, filename);
}
else
{
uprv_strcpy(fullname, inputdir);
uprv_strcat(fullname, filename);
}
}else{
uprv_strcpy(fullname,filename);
}
ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
if (U_FAILURE(*status)) {
error(line, "couldn't open input file %s\n", filename);
return NULL;
}
result = string_open(bundle, tag,(UChar*) ucbuf_getBuffer(ucbuf,&len,status),len, status);
uprv_free(pTarget);
uprv_free(filename);
uprv_free(fullname);
return result;
}
static struct SResource *
parseResource(char *tag, UErrorCode *status)
{
enum ETokenType token;
enum EResourceType resType = RT_UNKNOWN;
struct UString *tokenValue;
uint32_t startline;
uint32_t line;
token = getToken(&tokenValue, &startline, status);
if(isVerbose()){
printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag,startline);
}
/* name . [ ':' type ] '{' resource '}' */
/* This function parses from the colon onwards. If the colon is present, parse the
type then try to parse a resource of that type. If there is no explicit type,
work it out using the lookahead tokens. */
switch (token)
{
case TOK_EOF:
*status = U_INVALID_FORMAT_ERROR;
error(startline, "Unexpected EOF encountered");
return NULL;
case TOK_ERROR:
*status = U_INVALID_FORMAT_ERROR;
return NULL;
case TOK_COLON:
resType = parseResourceType(status);
expect(TOK_OPEN_BRACE, &tokenValue, &startline, status);
if (U_FAILURE(*status))
{
return NULL;
}
break;
case TOK_OPEN_BRACE:
break;
default:
*status = U_INVALID_FORMAT_ERROR;
error(startline, "syntax error while reading a resource, expected '{' or ':'");
return NULL;
}
if (resType == RT_UNKNOWN)
{
/* No explicit type, so try to work it out. At this point, we've read the first '{'.
We could have any of the following:
{ { => array (nested)
{ :/} => array
{ string , => string array
commented by Jing/GCL
{ string { => table
added by Jing/GCL
{ string :/{ => table
{ string } => string
*/
token = peekToken(0, NULL, &line, status);
if (U_FAILURE(*status))
{
return NULL;
}
/* Commented by Jing/GCL */
/* if (token == TOK_OPEN_BRACE || token == TOK_COLON )*/
if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
{
resType = RT_ARRAY;
}
else if (token == TOK_STRING)
{
token = peekToken(1, NULL, &line, status);
if (U_FAILURE(*status))
{
return NULL;
}
switch (token)
{
case TOK_COMMA: resType = RT_ARRAY; break;
case TOK_OPEN_BRACE: resType = RT_TABLE; break;
case TOK_CLOSE_BRACE: resType = RT_STRING; break;
/* added by Jing/GCL to make table work when :table is omitted */
case TOK_COLON: resType = RT_TABLE; break;
default:
*status = U_INVALID_FORMAT_ERROR;
error(line, "Unexpected token after string, expected ',', '{' or '}'");
return NULL;
}
}
else
{
*status = U_INVALID_FORMAT_ERROR;
error(line, "Unexpected token after '{'");
return NULL;
}
/* printf("Type guessed as %s\n", resourceNames[resType]); */
}
/* We should now know what we need to parse next, so call the appropriate parser
function and return. */
switch (resType)
{
case RT_STRING: return parseString (tag, startline, status);
case RT_TABLE: return parseTable (tag, startline, status);
case RT_ARRAY: return parseArray (tag, startline, status);
case RT_ALIAS: return parseAlias (tag, startline, status);
case RT_BINARY: return parseBinary (tag, startline, status);
case RT_INTEGER: return parseInteger (tag, startline, status);
case RT_IMPORT: return parseImport (tag, startline, status);
case RT_INCLUDE: return parseInclude (tag, startline, status);
case RT_INTVECTOR: return parseIntVector (tag, startline, status);
default:
*status = U_INTERNAL_PROGRAM_ERROR;
error(startline, "internal error: unknown resource type found and not handled");
}
return NULL;
}
struct SRBRoot *
parse(UCHARBUF *buf, const char *currentInputDir, UErrorCode *status)
{
struct UString *tokenValue;
uint32_t line;
/* added by Jing/GCL */
enum EResourceType bundleType;
enum ETokenType token;
initLookahead(buf, status);
inputdir = currentInputDir;
inputdirLength = (inputdir != NULL) ? uprv_strlen(inputdir) : 0;
bundle = bundle_open(status);
if (bundle == NULL || U_FAILURE(*status))
{
return NULL;
}
expect(TOK_STRING, &tokenValue, NULL, status);
bundle_setlocale(bundle, tokenValue->fChars, status);
/* Commented by Jing/GCL */
/* expect(TOK_OPEN_BRACE, NULL, &line, status); */
/* The following code is to make Empty bundle work no matter with :table specifer or not */
token = getToken(NULL, &line, status);
if(token==TOK_COLON)
{
*status=U_ZERO_ERROR;
}
else
{
*status=U_PARSE_ERROR;
}
if(U_SUCCESS(*status)){
bundleType=parseResourceType(status);
if(bundleType==RT_TABLE)
{
expect(TOK_OPEN_BRACE, NULL, &line, status);
}
else
{
*status=U_PARSE_ERROR;
error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
}
}
else
{
if(token==TOK_OPEN_BRACE)
{
*status=U_ZERO_ERROR;
}
else
{
error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
}
}
/* The above is added by Jing/GCL */
if (U_FAILURE(*status))
{
bundle_close(bundle, status);
return NULL;
}
realParseTable(bundle->fRoot, NULL, line, status);
if (U_FAILURE(*status))
{
/* realParseTable has already closed the table */
bundle->fRoot = NULL;
bundle_close(bundle, status);
return NULL;
}
if (getToken(NULL, &line, status) != TOK_EOF)
{
warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
if(isStrict()){
*status = U_INVALID_FORMAT_ERROR;
return NULL;
}
}
return bundle;
}