| /* |
| ****************************************************************************** |
| * Copyright (C) 2005-2007, International Business Machines Corporation and * |
| * others. All Rights Reserved. * |
| ****************************************************************************** |
| */ |
| |
| #include <stdio.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <time.h> |
| |
| #include "wbnf.h" |
| |
| // Most of this code is meant to test the test code. It's a self test. |
| // Normally this isn't run. |
| #define TEST_WBNF_TEST 0 |
| |
| /////////////////////////////////////////////////////////// |
| // |
| // Constants and the most basic helper classes |
| // |
| |
| static const char DIGIT_CHAR[] = "0123456789"; |
| static const char WHITE_SPACE[] = {'\t', ' ', '\r', '\n', 0}; |
| static const char ALPHABET[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; |
| static const char SPECIAL[] = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; |
| |
| static inline UBool isInList(const char c /*in*/, const char list[] /*in*/){ |
| const char * p = list; |
| for (;*p != 0 && *p != c; p++); |
| return *p?TRUE:FALSE; |
| } |
| static inline UBool isDigit(char c) {return isInList(c, DIGIT_CHAR);} |
| static inline UBool isWhiteSpace(char c) {return isInList(c, WHITE_SPACE);} |
| static inline UBool isAlphabet(char c) {return isInList(c, ALPHABET);} |
| static inline UBool isSpecialAsciiChar(char c) {return isInList(c,SPECIAL);} |
| |
| |
| |
| /////////////////////////////////////////////////////////// |
| // |
| // Helper classes |
| // |
| |
| class Buffer_byte{ |
| // Utility class, can be treated as an auto expanded array. no boundary check. |
| |
| typedef char byte; |
| byte * start; |
| byte * current; |
| int buffer_size; // size unit is byte |
| public: |
| inline int content_size(){return current - start;} // size unit is byte |
| |
| private: |
| inline void expand(int add_size = 100){ // size unit is byte |
| int new_size = buffer_size + add_size; |
| |
| int cs_snap = content_size(); |
| start = (byte *) realloc(start, new_size); // may change the value of start |
| current = start + cs_snap; |
| |
| memset(current, 0, add_size); |
| buffer_size = new_size; |
| } |
| |
| inline void expand_to(int size){ |
| int r = size - buffer_size; |
| if (r > 0) { |
| expand(r); // simply expand, no block alignment |
| } |
| } |
| Buffer_byte(const Buffer_byte &); |
| Buffer_byte & operator = (const Buffer_byte &); |
| public: |
| Buffer_byte():start(NULL),current(start),buffer_size(0){ |
| expand(); |
| } |
| ~Buffer_byte(){ |
| free(start); |
| } |
| |
| inline void reset(){ |
| start != NULL ? memset(start, 0, buffer_size) : 0; |
| current = start; |
| } |
| |
| // Using memory copy method to append a C array to buffer, |
| inline void append(const void * c, int size){ // size unit is byte |
| expand_to(content_size() + size) ; |
| memcpy(current, c, size); |
| current = current + size; |
| } |
| |
| byte * buffer(){ |
| return start; |
| } |
| }; |
| |
| /* |
| The class(es) try to work as bulid-in array, so it overloads these two operators |
| operator type *(); |
| type & operator[]; |
| The first is used to auto type convert, the latter is used to select member. |
| |
| A small trick is the class does not overload the address-of operator. This |
| behavior is different from bulid-in array, but it give us the opportunity |
| to get the address of the class itself. |
| */ |
| //template<typename type> |
| // class BUFFER{ |
| // typedef BUFFER name; |
| #define BUFFER(type, name)\ |
| class name {\ |
| private:\ |
| Buffer_byte buf;\ |
| public:\ |
| name & reset() {buf.reset(); return *this;}\ |
| name & append(type c) {buf.append(&c, sizeof(type)); return *this;}\ |
| name & append_array(const type * p, int size) {buf.append(p, sizeof(type)*size); return *this;}\ |
| type & operator [] (int i) { return ((type *) buf.buffer())[i];}\ |
| operator type *(){return (type *) buf.buffer();} \ |
| int content_size(){return buf.content_size() / sizeof(type);}\ |
| } |
| |
| |
| class Pick{ |
| /* The Pick is the basic language generator element*/ |
| public: |
| // generate a string accroding the syntax |
| // Return a null-terminated c-string. The buffer is owned by callee. |
| virtual const char* next() = 0; |
| virtual ~Pick(){}; |
| }; |
| |
| //typedef BUFFER<char> Buffer_char; |
| //typedef BUFFER<int> Buffer_int; |
| //typedef BUFFER<Pick *> Buffer_pPick; |
| BUFFER(char, Buffer_char); |
| BUFFER(int, Buffer_int); |
| BUFFER(Pick *, Buffer_pPick); |
| |
| class SymbolTable{ |
| /* Helper class. |
| * It's a mapping table between 'variable name' and its 'active Pick object' |
| */ |
| private: |
| Buffer_char name_buffer; // var names storage space |
| |
| Buffer_int names; // points to name (offset in name_buffer) |
| Buffer_pPick refs; // points to Pick |
| |
| int get_index(const char *const var_name){ |
| int len = names.content_size(); |
| for (int i=0; i< len; i++){ |
| if (strcmp(var_name, name_buffer + names[i]) == 0){ |
| return i; |
| } |
| } |
| return -1; |
| } |
| |
| public: |
| enum RESULT {EMPTY, NO_VAR, NO_REF, HAS_REF}; |
| |
| RESULT find(const char *const var_name /*[in] c-string*/, Pick * * ref = NULL /*[out] Pick* */){ |
| if (!var_name) return EMPTY; // NULL name |
| |
| int i = get_index(var_name); |
| if (i == -1){ |
| return NO_VAR; // new name |
| } |
| if (!refs[i]){ // exist name, no ref |
| return NO_REF; |
| } else { |
| if (ref) { |
| *ref = refs[i]; |
| } |
| return HAS_REF; // exist name, has ref |
| } |
| } |
| |
| void put(const char *const var_name, Pick *const var_ref = NULL){ |
| int i = get_index(var_name); |
| switch(find(var_name)){ |
| case EMPTY: // NULL name |
| break; |
| case NO_VAR: // new name |
| int offset; |
| offset = name_buffer.content_size(); |
| name_buffer.append_array(var_name, strlen(var_name) + 1); |
| names.append(offset); |
| refs.append(var_ref); |
| break; |
| case NO_REF: // exist name, no ref |
| refs[i] = var_ref; // link definition with variable |
| break; |
| case HAS_REF: // exist name, has ref |
| if (var_ref){ |
| refs[i] = var_ref; |
| } |
| break; |
| default: |
| ; // ASSERT(FALSE); |
| } |
| return; |
| } |
| |
| UBool is_complete(){ |
| int n = names.content_size(); |
| for (int i=0; i<n; ++i){ |
| if (refs[i] == NULL){ |
| return FALSE; |
| } |
| } |
| return TRUE; |
| } |
| |
| void reset(){ |
| names.reset(); |
| name_buffer.reset(); |
| |
| // release memory here |
| int s = refs.content_size(); |
| for (int i=0; i < s; i++){ |
| delete refs[i]; // TOFIX: point alias/recursion problem |
| } |
| refs.reset(); |
| } |
| |
| ~SymbolTable(){ |
| reset(); |
| } |
| }; |
| |
| |
| /* |
| // Document of class Escaper |
| // |
| // ATTENTION: |
| // From http://icu-project.org/userguide/Collate_Customization.html. |
| // We get the precedence of escape/quote operations |
| // |
| // (highest) 1. backslash \ |
| // 2. two single quotes '' |
| // 3. quoting ' ' |
| // |
| // ICU Collation should accept following as the same string. |
| // |
| // 1) 'ab'c _ |
| // 2) a\bc \ |
| // 3) a'b'\c |- They are equal. |
| // 4) abc _/ |
| // |
| // From "two single quotes", we have following deductions |
| // D1. empty quoting is illgal. (obviously) |
| // D2. no contact operation between two quotings |
| // '.''.' is not .. it is .'. |
| // D3. "two single quotes" cannot contact two quoting simultaneously |
| // '..''''.' is not ..'. it is ..''. |
| // NOTICE: |
| // "two single quotes" can contact before one quoting |
| // '''.' is '. |
| // "two single quotes" can literally contact after one quoting |
| // But, from syntax, it's one quoting including a "two single quotes" |
| // '.''' is .' |
| // D4. "two single quotes" cannot solely be included in quoting |
| // '''' is not ' it is '' |
| // NOTICE: These are legal |
| // '.''.' is .'. |
| // '.''' is .' |
| // |
| // dicision |
| // /\ |
| // /__\ |
| // output buffer input buffer |
| // |
| // To make our dicision (within an atom operation) without caring input and output buffer, |
| // following calling pattern (within an atom operation) shall be avoided |
| // |
| // P1 open_quoting() then close_quoting() (direct violation) D1 |
| // P2 close_quoting() then open_quoting() (direct violation) D2 |
| // P3 empty open_quoting() (indirect violation) D1, D4 |
| // P4 empty close_quoting() (indirect violation) D2, D3 |
| // P5 open_quoting() then two single quotes (indirect violation) D4 |
| // P6 close_quoting() then two single quotes (indirect violation) D3 |
| // |
| // two single quotes escaping will not open_ or close_ quoting() |
| // The choice will not lose some quoing forms. |
| // |
| // For open_quoting(), |
| // we may get this form quoting ''' P5 |
| // It may raise a bug ''''x |
| // If we expect |
| // '''.' let the next char open the quoting |
| // '.''.' the quoting is already opened by preceding char |
| // |
| // For close_quoting() |
| // we will get this form quoting '.''' P6 |
| // It may raise a bug '.''''.' |
| // If we expect |
| // '.'''\. let the next char close the quoting |
| // '.''''.' the expectation is wrong! using '.'\''.' instead |
| // |
| // It's a hard work to re-adjust generation opportunity for various escaping form. |
| // We just simply ignore it. |
| */ |
| class Escaper{ |
| public: |
| enum CHOICE {YES, NO, RAND}; |
| enum ESCAPE_FORM {BSLASH_ONLY, QUOTE_ONLY, QUOTE_AND_BSLAH, RAND_ESC}; |
| private: |
| class Bool{ // A wrapper class for CHOICE, to auto adapter UBool class |
| private: |
| const CHOICE tag; |
| public: |
| Bool(CHOICE flag=RAND):tag(flag){} |
| operator UBool() { // conversion operator |
| return tag == RAND ? rand()%2 : tag == YES; |
| //if (tag == RAND){ |
| // return rand()%2 == 1; |
| //} else { |
| // return tag == YES ? TRUE : FALSE; |
| //} |
| } |
| }; |
| public: |
| Escaper(CHOICE escapeLiteral = RAND, |
| CHOICE twoQuotesEscape = RAND, |
| ESCAPE_FORM escapeForm = RAND_ESC): |
| escape_form(escapeForm), |
| escape_literal(escapeLiteral), |
| two_quotes_escape(twoQuotesEscape), |
| is_quoting(FALSE){} |
| private: |
| Buffer_char str; |
| ESCAPE_FORM escape_form; |
| Bool escape_literal; |
| Bool two_quotes_escape; |
| UBool quote_escape; |
| UBool bslash_escape; |
| UBool is_quoting; |
| |
| void set_options(){ |
| ESCAPE_FORM t = escape_form == RAND_ESC ? (ESCAPE_FORM) (rand()%3) : escape_form; |
| switch (t){ |
| case BSLASH_ONLY : |
| bslash_escape = TRUE; quote_escape = FALSE; break; |
| case QUOTE_ONLY: |
| bslash_escape = FALSE;quote_escape = TRUE; break; |
| case QUOTE_AND_BSLAH: |
| bslash_escape = TRUE; quote_escape = TRUE; break; |
| default: |
| ;// error |
| } |
| } |
| |
| void reset(){ |
| str.reset(); |
| is_quoting = FALSE; |
| } |
| |
| inline void open_quoting(){ |
| if(is_quoting){ |
| // do nothing |
| } else { |
| str.append('\''); |
| is_quoting = TRUE; |
| } |
| } |
| inline void close_quoting(){ |
| if(is_quoting){ |
| str.append('\''); |
| is_quoting = FALSE; |
| } else { |
| // do nothing |
| } |
| } |
| |
| // str [in] null-terminated c-string |
| void append(const char * strToAppend){ |
| for(;*strToAppend != 0; strToAppend++){ |
| append(*strToAppend); |
| } |
| } |
| |
| inline void append(const char c){ |
| set_options(); |
| |
| if (c == '\\'){ |
| quote_escape ? open_quoting() : close_quoting(); |
| //bslash_escape always true here |
| str.append('\\'); |
| str.append('\\'); |
| } else if (c == '\''){ |
| if (two_quotes_escape){ // quoted using two single quotes |
| // See documents in anonymous.design |
| str.append('\''); |
| str.append('\''); |
| } else{ |
| quote_escape ? open_quoting() : close_quoting(); |
| //bslash_escape always true here |
| str.append('\\'); |
| str.append('\''); |
| } |
| } else if (isSpecialAsciiChar(c) || isWhiteSpace(c)){ |
| quote_escape ? open_quoting() : close_quoting(); |
| if (bslash_escape) str.append('\\'); |
| str.append(c); |
| } else { //if (isAlphabet(c) || isDigit(c) || TRUE){ // treat others as literal |
| if (escape_literal){ |
| quote_escape ? open_quoting() : close_quoting(); |
| if (bslash_escape) str.append('\\'); |
| str.append(c); |
| } else { |
| close_quoting(); |
| str.append(c); |
| } |
| } |
| } |
| |
| public: |
| // Return a null-terminate c-string. The buffer is owned by callee. |
| char * operator()(const char * literal /*c-string*/){ |
| str.reset(); |
| for(;*literal != 0; literal++){ |
| append(*literal); |
| } |
| close_quoting(); // P4 exception, to close whole quoting |
| return str; |
| } |
| }; |
| |
| class WeightedRand{ |
| // Return a random number in [0, size) |
| // Every number has different chance (aka weight) to be selected. |
| private: |
| Buffer_int weights; |
| double total; |
| WeightedRand(const WeightedRand &); |
| WeightedRand & operator = (const WeightedRand &); |
| public: |
| WeightedRand(Buffer_int * weight_list = NULL, int size = 0){ |
| if ( weight_list == NULL){ |
| for (int i=0; i<size; ++i) weights.append(DEFAULT_WEIGHT); |
| } else { |
| int s = weight_list->content_size(); |
| if (s < size){ |
| weights.append_array( (*weight_list),s); |
| for (int i=s; i<size; ++i) weights.append(DEFAULT_WEIGHT); |
| } else { // s >= size |
| weights.append_array( (*weight_list),size); |
| } |
| } |
| total = 0; |
| int c = weights.content_size(); |
| for (int i=0; i<c; ++i){ |
| total += weights[i]; |
| } |
| } |
| |
| void append(int weight){ |
| weights.append(weight); |
| total += weight; |
| } |
| |
| // Give a random number with the consideration of weight. |
| // Every random number is associated with a weight. |
| // It identifies the chance to be selected, |
| // larger weight has more chance to be selected. |
| // |
| // |
| // ______________________ every slot has equal chance |
| // |
| // [____][_][___][______] each item has different slots, hence different chance |
| // |
| // |
| // The algorithms to generate the number is illustrated by preceding figure. |
| // First, a slot is selected by rand(). Then we translate the slot to corresponding item. |
| // |
| int next(){ |
| // get a random in [0,1] |
| double reference_mark = (double)rand() / (double)RAND_MAX; |
| |
| // get the slot's index, 0 <= mark <= total; |
| double mark = total * reference_mark; |
| |
| // translate the slot to corresponding item |
| int i=0; |
| for (;;){ |
| mark -= weights[i]; // 0 <= mark <= total |
| if (mark <= 0) |
| break; |
| i++; |
| } |
| return i; |
| } |
| }; |
| |
| /////////////////////////////////////////////////////////// |
| // |
| // The parser result nodes |
| // |
| |
| class Literal : public Pick { |
| public: |
| virtual const char* next(){ |
| return str; |
| } |
| Literal(const char * s /*c-string*/){ |
| str.append_array(s, strlen(s) + 1); |
| } |
| private: |
| Buffer_char str; //null-terminated c-string |
| }; |
| |
| class Variable : public Pick { |
| public: |
| Variable(SymbolTable * symbols, const char * varName, Pick * varRef = NULL){ |
| this->var_name.append_array(varName, strlen(varName) + 1); |
| if ((symbol_table = symbols)){ |
| symbol_table->put(varName, varRef); |
| } |
| } |
| |
| operator const char *(){ |
| return var_name; |
| } |
| |
| virtual const char* next(){ |
| if (symbol_table){ |
| Pick * var_ref = NULL; |
| symbol_table->find(var_name, &var_ref); |
| if (var_ref) { |
| return var_ref->next(); |
| } |
| } |
| return ""; // dumb string |
| } |
| private: |
| Buffer_char var_name; |
| SymbolTable * symbol_table; |
| }; |
| |
| class Quote : public Pick{ |
| public: |
| Quote(Pick & base):item(base),e(Escaper::NO, Escaper::NO, Escaper::BSLASH_ONLY){ |
| } |
| virtual const char* next(){ |
| return e(item.next()); |
| } |
| private: |
| Pick & item; |
| Buffer_char str; |
| Escaper e; |
| }; |
| |
| |
| class Morph : public Pick{ |
| /* |
| The difference between morph and an arbitrary random string is that |
| a morph changes slowly. When we build collation rules, for example, |
| it is a much better test if the strings we use are all in the same |
| 'neighborhood'; they share many common characters. |
| */ |
| public: |
| Morph(Pick & base):item(base){} |
| |
| virtual const char* next(){ |
| current.reset(); |
| const char * s = item.next(); |
| current.append_array(s, strlen(s) + 1); |
| if (last.content_size() == 0) { |
| str.reset(); |
| last.reset(); |
| str.append_array(current, current.content_size()); |
| last.append_array(current, current.content_size()); |
| } else { |
| morph(); |
| } |
| return str; |
| } |
| private: |
| Pick & item; |
| Buffer_char str; |
| Buffer_char last; |
| Buffer_char current; |
| |
| char * p_last; |
| char * p_curr; |
| |
| void copy_curr(){ |
| if (*p_curr) { |
| str.append(*p_curr); |
| p_curr++; |
| } |
| } |
| |
| void copy_last(){ |
| if (*p_last) { |
| str.append(*p_last); |
| p_last++; |
| } |
| } |
| |
| // copy 0, 1, or 2 character(s) to str |
| void copy(){ |
| static WeightedRand wr(& Buffer_int().append(DEFAULT_WEIGHT * 10), 5); |
| |
| switch (wr.next()){ |
| case 0: // copy last -- has 10 times chance than others |
| copy_last(); |
| break; |
| case 1: // copy both |
| copy_curr(); |
| copy_last(); |
| break; |
| case 2: // copy both |
| copy_last(); |
| copy_curr(); |
| break; |
| case 3: |
| copy_curr(); |
| break; |
| case 4: // copy nothing |
| break; |
| default: |
| // ASSERT(FALSE); |
| ; |
| } |
| } |
| |
| void morph(void){ |
| int min = strlen(last); |
| int max = strlen(current); |
| if (min > max){ |
| int temp = min; |
| min = max; |
| max = temp; |
| } |
| |
| int len = min + rand()%(max - min + 1); // min + [0, diff] |
| p_curr = current; |
| p_last = last; |
| str.reset(); |
| |
| for (; str.content_size()<len && *p_curr && *p_last;){ |
| copy(); // copy 0, 1, or 2 character(s) to str |
| } |
| |
| if (str.content_size() == len) { |
| str.append(0); |
| final(); |
| return; |
| } |
| |
| if (str.content_size() > len) { // if the last copy copied two characters |
| str[len]=0; |
| final(); |
| return; |
| } |
| |
| // str.content_size() < len |
| if (*p_last) { |
| for (; str.content_size() < len; copy_last()); |
| } else if (*p_curr){ |
| for (; str.content_size() < len; copy_curr()); |
| } |
| |
| int last_len = last.content_size(); |
| for (;str.content_size() < len;){ |
| str.append(last[rand()%last_len]); |
| } |
| str.append(0); |
| final(); |
| } |
| |
| void final(){ |
| last.reset(); |
| last.append_array(current, current.content_size()); |
| } |
| }; |
| |
| class Sequence : public Pick { |
| public: |
| virtual const char* next(){ |
| str.reset(); |
| int s = items.content_size(); |
| for(int i=0; i < s; i++){ |
| const char * t = items[i]->next(); |
| str.append_array(t, strlen(t)); |
| } |
| str.append(0); // terminal null |
| return str; |
| } |
| |
| void append (Pick * node){ |
| items.append(node); |
| } |
| |
| virtual ~Sequence(){ |
| int s = items.content_size(); |
| for(int i=0; i < s; i++){ |
| //How can assure the item is got from heap? |
| //Let's assume it. |
| delete items[i]; // TOFIX: point alias/recursion problem |
| items[i] = NULL; |
| } |
| } |
| private: |
| Buffer_pPick items; |
| Buffer_char str; //null-terminated c-string |
| }; |
| |
| class Repeat : public Pick { |
| private: |
| Pick * item; |
| Buffer_char str; |
| WeightedRand wr; |
| int min; |
| int max; |
| int select_a_count(){ |
| return min + wr.next(); |
| } |
| public: |
| virtual const char* next(){ |
| str.reset(); |
| int c = select_a_count(); |
| for(int i=0; i< c; i++){ |
| const char * t = item->next(); |
| str.append_array(t, strlen(t)); |
| } |
| str.append(0); |
| return str; |
| } |
| |
| Repeat(Pick * base, int minCount =0, int maxCount = 1, Buffer_int * weights = NULL): |
| wr(weights, maxCount-minCount +1) { |
| this->item = base; |
| this->min = minCount; |
| this->max = maxCount; |
| } |
| virtual ~Repeat(){ |
| delete item; // TOFIX: point alias/recursion problem |
| item = NULL; |
| } |
| }; |
| |
| |
| class Alternation : public Pick { |
| public: |
| virtual const char* next(){ |
| str.reset(); |
| int i = wr.next(); |
| const char * t = items[i]->next(); |
| str.append_array(t, strlen(t) + 1); |
| return str; |
| } |
| virtual ~Alternation(){ |
| int s = items.content_size(); |
| for(int i=0; i < s; i++){ |
| delete items[i]; // TOFIX: point alias/recursion problem |
| items[i] = NULL; |
| } |
| } |
| |
| Alternation & append (Pick * node, int weight = DEFAULT_WEIGHT){ |
| items.append(node); |
| wr.append(weight); |
| return *this; |
| } |
| private: |
| Buffer_pPick items; |
| Buffer_char str; // null-terminated c-string |
| WeightedRand wr; |
| }; |
| |
| /////////////////////////////////////////////////////////// |
| // |
| // The parser |
| // |
| |
| enum TokenType {STRING, VAR, NUMBER, STREAM_END, ERROR, QUESTION, STAR, PLUS, LBRACE, RBRACE, LPAR, RPAR, SEMI, EQ, COMMA, BAR, AT, WAVE, PERCENT}; |
| |
| class Scanner{ |
| friend int DumpScanner(Scanner & s, UBool dumb); |
| private: |
| const char * source; |
| const char * working; |
| const char * history; // for debug |
| enum StateType {START, IN_NUM, IN_VAR_FIRST, IN_VAR, IN_QUOTE, IN_QUOTE_BSLASH, IN_BSLASH, IN_STRING, DONE}; |
| StateType state; |
| void terminated(TokenType t){ |
| working--; // return the peeked character |
| tokenType = t; |
| token.append(0); // close buffer |
| state = DONE; |
| } |
| public: |
| // the buffer of "source" is owned by caller |
| Scanner(const char *src/*[in] c-string*/ = NULL):source(src){ |
| working = src; |
| history = working; |
| state = DONE; |
| tokenType = ERROR; |
| } |
| |
| //void setSource(const char *const src /*[in] c-string*/){ |
| // *(&const_cast<const char *>(source)) = src; |
| //} |
| |
| Buffer_char token; |
| TokenType tokenType; |
| |
| TokenType getNextToken(){ |
| token.reset(); |
| state = START; |
| history = working; // for debug |
| while (state != DONE){ |
| char c = *working++; |
| if (c == 0 && state != START){//avoid buffer overflow. for IN_QUOE, IN_ESCAPE |
| terminated(ERROR); |
| break; // while |
| } |
| switch(state){ |
| case START: |
| tokenType = ERROR; |
| switch(c){ |
| case '?' : tokenType = QUESTION; break; |
| case '*' : tokenType = STAR; break; |
| case '+' : tokenType = PLUS; break; |
| case '{' : tokenType = LBRACE; break; |
| case '}' : tokenType = RBRACE; break; |
| case '(' : tokenType = LPAR; break; |
| case ')' : tokenType = RPAR; break; |
| case ';' : tokenType = SEMI; break; |
| case '=' : tokenType = EQ; break; |
| case ',' : tokenType = COMMA; break; |
| case '|' : tokenType = BAR; break; |
| case '@' : tokenType = AT; break; |
| case '~' : tokenType = WAVE; break; |
| case '%' : tokenType = PERCENT; break; |
| case 0 : tokenType = STREAM_END; working-- /*avoid buffer overflow*/; break; |
| } |
| if (tokenType != ERROR){ |
| token.append(c); |
| token.append(0); |
| state = DONE; |
| break; // START |
| } |
| switch(c){ |
| case '$' : state = IN_VAR_FIRST; token.append(c); break; |
| case '\'' : state = IN_QUOTE; break; |
| case '\\' : state = IN_BSLASH; break; |
| default: |
| if (isWhiteSpace(c)){ // state = START; //do nothing |
| } else if (isDigit(c)){ state = IN_NUM; token.append(c); |
| } else if (isAlphabet(c)){ state = IN_STRING; token.append(c); |
| } else {terminated(ERROR);} |
| } |
| break;//START |
| case IN_NUM: |
| if (isDigit(c)){ |
| token.append(c); |
| } else { |
| terminated(NUMBER); |
| } |
| break;//IN_NUM |
| case IN_VAR_FIRST: |
| if (isAlphabet(c)){ |
| token.append(c); |
| state = IN_VAR; |
| } else { |
| terminated(ERROR); |
| } |
| break; // IN_VAR_FISRT |
| case IN_VAR: |
| if (isAlphabet(c) || isDigit(c)){ |
| token.append(c); |
| } else { |
| terminated(VAR); |
| } |
| break;//IN_VAR |
| case IN_STRING: |
| // About the scanner's behavior for STRING, AT, and ESCAPE: |
| // All of them can be contacted with each other. |
| // This means the scanner will eat up as much as possible strings |
| // (STRING, AT, and ESCAPE) at one time, with no regard of their |
| // combining sequence. |
| // |
| if (c == '\''){ |
| state = IN_QUOTE; // the first time we see single quote |
| } else if (c =='\\'){ // back slash character |
| state = IN_BSLASH; |
| } else if (isAlphabet(c) || isDigit(c)){ |
| token.append(c); |
| } else{ |
| terminated(STRING); |
| } |
| break;//IN_STRING |
| case IN_QUOTE: |
| if (c == '\''){ // the second time we see single quote |
| state = IN_STRING; // see document in IN_STRING |
| } else if ( c== '\\') { // backslah escape in quote |
| state = IN_QUOTE_BSLASH; |
| } else { |
| token.append(c); // eat up everything, includes back slash |
| } |
| break;//IN_QUOTE |
| case IN_QUOTE_BSLASH: |
| case IN_BSLASH: |
| switch (c){ |
| case 'n' : token.append('\n'); break; |
| case 'r' : token.append('\r'); break; |
| case 't' : token.append('\t'); break; |
| case '\'' : token.append('\''); break; |
| case '\\' : token.append('\\'); break; |
| default: token.append(c); // unknown escaping, treat it as literal |
| } |
| if (state == IN_BSLASH){ |
| state = IN_STRING; // see document in IN_STRING |
| } else { // state == IN_QUOTE_BSLASH |
| state = IN_QUOTE; |
| } |
| break;//IN_BSLASH |
| case DONE: /* should never happen */ |
| default: |
| working--; |
| tokenType = ERROR; |
| state = DONE; |
| break; |
| }//switch(state) |
| }//while (state != DONE) |
| |
| return tokenType; |
| } |
| };//class Scanner |
| |
| class Parser{ |
| friend UBool TestParser(); |
| friend class TestParserT; |
| friend class LanguageGenerator_impl; |
| private: |
| Scanner s; |
| TokenType & token; |
| int min_max; // for the evil infinite |
| |
| UBool match(TokenType expected){ |
| if (token == expected) { |
| token = s.getNextToken(); |
| return TRUE; |
| } else { |
| //s.dumpCurrentPoint(); |
| return FALSE; |
| } |
| } |
| |
| UBool weight(int & value){ |
| if (token == NUMBER){ |
| int temp = atoi(s.token); |
| match(NUMBER); |
| if (match(PERCENT)){ |
| value = temp; |
| return TRUE; |
| } |
| } |
| return FALSE; |
| } |
| |
| UBool repeat (Pick* &node /*in,out*/){ |
| if (node == NULL) return FALSE; |
| |
| int count = -2; |
| int min = -2; |
| int max = -2; |
| UBool question = FALSE; |
| switch (token){ |
| case QUESTION: |
| match(QUESTION); |
| min = 0; |
| max = 1; |
| count = 2; |
| question = TRUE; |
| break; |
| case STAR: |
| match(STAR); |
| min = 0; |
| max = -1; |
| count = -1; |
| break; |
| case PLUS: |
| match(PLUS); |
| min = 1; |
| max = -1; |
| count = -1; |
| break; |
| case LBRACE: |
| match(LBRACE); |
| if (token != NUMBER){ |
| return FALSE; |
| }else { |
| min = atoi(s.token); |
| match(NUMBER); |
| if (token == RBRACE){ |
| match(RBRACE); |
| max = min; |
| count = 1; |
| } else if (token == COMMA) { |
| match(COMMA); |
| if (token == RBRACE){ |
| match(RBRACE); |
| max = -1; |
| count = -1; |
| } else if (token == NUMBER) { |
| max = atoi(s.token); |
| match(NUMBER); |
| count = max - min + 1; |
| if (!match(RBRACE)) { |
| return FALSE; |
| } |
| } else { |
| return FALSE; |
| } |
| } else { |
| return FALSE; |
| } |
| } |
| break; |
| default: |
| return FALSE; |
| } |
| |
| if (count == -2 || min == -2 || max == -2){ |
| //ASSERT(FALSE); |
| return FALSE; |
| } |
| |
| // eat up following weights |
| Buffer_int weights; |
| int w; |
| while (weight(w)){ |
| weights.append(w); |
| } |
| |
| // for the evil infinite |
| min_max = min_max > min ? min_max : min; |
| min_max = min_max > max ? min_max : max; |
| if (min_max > PSEUDO_INFINIT){ |
| return FALSE; // PSEUDO_INFINIT is less than the real maximum |
| } |
| if (max == -1){ // the evil infinite |
| max = PSEUDO_INFINIT; |
| } |
| // for the strange question mark |
| if (question && weights.content_size() > 0){ |
| Buffer_int w2; |
| w2.append(DEFAULT_WEIGHT - weights[0]).append(weights[0]); |
| node = new Repeat(node,min,max,&w2); |
| return TRUE; |
| } |
| node = new Repeat(node,min,max,&weights); |
| return TRUE; |
| } |
| |
| UBool core(Pick* &node /*out*/){ |
| if (node != NULL) return FALSE; //assert node == NULL |
| |
| switch(token){ |
| case LPAR: |
| match(LPAR); |
| if(defination(node) && match(RPAR)){ |
| return TRUE; |
| } |
| return FALSE; |
| case VAR: |
| node = new Variable(&symbols, s.token); |
| match(VAR); |
| return TRUE; |
| case STRING: |
| node = new Literal(s.token); |
| match(STRING); |
| return TRUE; |
| default: |
| return FALSE; |
| } |
| } |
| UBool modified(Pick* &node /*out*/){ |
| if (node != NULL) return FALSE; //assert node == NULL |
| |
| if (!core(node)) { |
| return FALSE; |
| } |
| |
| for (;;){ |
| switch(token){ |
| case WAVE: |
| match(WAVE); |
| node = new Morph(*node); |
| break; |
| case AT: |
| match(AT); |
| node = new Quote(*node); |
| break; |
| case QUESTION: |
| case STAR: |
| case PLUS: |
| case LBRACE: |
| if (!repeat(node)) return FALSE; |
| break; |
| case SEMI: // rule definiation closed |
| case RPAR: // within parenthesis (core closed) |
| case BAR: // in alternation |
| case NUMBER: // in alternation, with weight |
| case LPAR: // in sequence |
| case VAR: // in sequence |
| case STRING: // in sequence |
| return TRUE; |
| default: |
| return FALSE; |
| } |
| } |
| } |
| |
| |
| UBool sequence_list(Pick* &node /*in,out*/){ |
| if (node == NULL) return FALSE; // assert node != NULL |
| |
| Sequence* seq = new Sequence(); |
| Pick * n = node; |
| |
| while (token == VAR || token == STRING || token == LPAR){ |
| seq->append(n); |
| n = NULL; |
| if (modified(n)){ |
| // go on |
| } else { |
| goto FAIL; |
| } |
| } |
| |
| if (token == SEMI || token == RPAR || token == BAR){ |
| seq->append(n); |
| node = seq; |
| return TRUE; |
| } |
| FAIL: |
| delete seq; |
| return FALSE; |
| |
| } |
| |
| UBool sequence(Pick* &node /*out*/){ |
| if (node != NULL) return FALSE; //assert node == NULL |
| |
| if (!modified(node)) { |
| return FALSE; |
| } |
| |
| if (token == VAR || token == STRING || token == LPAR){ |
| return sequence_list(node); |
| } else { |
| return TRUE; // just a modified |
| } |
| } |
| |
| UBool alternation_list(Pick* &node /*in,out*/){ |
| if (node == NULL) return FALSE; // assert node != NULL |
| |
| Alternation * alt = new Alternation(); |
| Pick * n = node; |
| int w = DEFAULT_WEIGHT; |
| |
| while (token == NUMBER || token == BAR){ |
| if(token == NUMBER) { |
| if (weight(w)){ |
| if (token == BAR){ |
| // the middle item, go on |
| } else { |
| // the last item or encounter error |
| break; //while |
| } |
| } else { |
| goto FAIL; |
| } |
| } // else token == BAR |
| match(BAR); |
| alt->append(n,w); |
| |
| n = NULL; |
| w = DEFAULT_WEIGHT; |
| if (sequence(n)){ |
| // go on |
| } else { |
| goto FAIL; |
| } |
| } |
| |
| if (token == SEMI || token == RPAR) { |
| alt->append(n,w); |
| node = alt; |
| return TRUE; |
| } |
| FAIL: |
| delete alt; |
| return FALSE; |
| } |
| |
| UBool alternation(Pick* &node /*out*/){ |
| if (node != NULL) return FALSE; //assert node == NULL |
| |
| // 'sequence' has higher precedence than 'alternation' |
| if (!sequence(node)){ |
| return FALSE; |
| } |
| |
| if (token == BAR || token == NUMBER){ // find a real alternation1, create it. |
| return alternation_list(node); |
| } else { |
| return TRUE; // just a sequence_old |
| } |
| } |
| |
| |
| UBool defination(Pick* &node /*out*/){ |
| if (node != NULL) return FALSE; //assert node == NULL |
| return alternation(node); |
| } |
| |
| UBool rule(){ |
| if (token == VAR){ |
| Buffer_char name; |
| name.append_array(s.token, strlen(s.token) + 1); |
| match(VAR); |
| |
| if (match(EQ)){ |
| Pick * t = NULL; |
| if(defination(t)){ |
| symbols.put(name, t); |
| return match(SEMI); |
| } |
| } |
| } |
| return FALSE; |
| } |
| public: |
| UBool rules(){ |
| symbols.reset(); |
| token = s.getNextToken(); |
| while (rule()){ |
| } |
| if (token == STREAM_END){ |
| return TRUE; |
| } else { |
| //s.dumpCurrentPoint(); |
| return FALSE; |
| } |
| } |
| |
| public: |
| SymbolTable symbols; |
| |
| Parser(const char *const source):s(source), token(s.tokenType){ |
| min_max = -2; |
| } |
| UBool parse(){ |
| return rules(); |
| } |
| |
| }; // class Parser |
| |
| |
| /////////////////////////////////////////////////////////// |
| // |
| // |
| // |
| |
| int DumpScanner(Scanner & s, UBool dump = TRUE){ |
| int len = strlen(s.source); |
| int error_start_offset = s.history - s.source; |
| if (dump){ |
| printf("\n=================== DumpScanner ================\n"); |
| fwrite(s.source, len, 1, stdout); |
| printf("\n-----parsed-------------------------------------\n"); |
| fwrite(s.source, s.history - s.source, 1, stdout); |
| printf("\n-----current------------------------------------\n"); |
| fwrite(s.history, s.working - s.history, 1, stdout); |
| printf("\n-----unparsed-----------------------------------\n"); |
| fwrite(s.working, (s.source + len - s.working), 1, stdout); |
| printf("\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n"); |
| } |
| return error_start_offset; |
| } |
| |
| class LanguageGenerator_impl{ |
| public: |
| LanguageGenerator_impl(const char *const bnf_definition, const char *const top_node) |
| :par(bnf_definition), top_node_name(top_node){ |
| srand((unsigned)time( NULL )); |
| } |
| |
| LanguageGenerator::PARSE_RESULT parseBNF(UBool debug = TRUE){ |
| if (par.parse()){ |
| if (par.symbols.find(top_node_name, &top_node_ref) == SymbolTable::HAS_REF) { |
| if (par.symbols.is_complete()) { |
| return LanguageGenerator::OK; |
| } else { |
| if (debug) printf("The bnf definition is incomplete.\n"); |
| return LanguageGenerator::INCOMPLETE; |
| } |
| } else { |
| if (debug) printf("No top node is found.\n"); |
| return LanguageGenerator::NO_TOP_NODE; |
| } |
| } else { |
| if(debug) { |
| printf("The bnf definition is wrong\n"); |
| DumpScanner(par.s, TRUE); |
| } |
| return LanguageGenerator::BNF_DEF_WRONG; |
| } |
| } |
| const char * next(){ |
| return top_node_ref->next(); |
| } |
| |
| private: |
| Parser par; |
| const char *const top_node_name; |
| Pick * top_node_ref; |
| }; |
| |
| LanguageGenerator::LanguageGenerator():lang_gen(NULL){ |
| } |
| |
| LanguageGenerator::~LanguageGenerator(){ |
| delete lang_gen; |
| } |
| |
| LanguageGenerator::PARSE_RESULT LanguageGenerator::parseBNF(const char *const bnf_definition /*in*/, const char *const top_node/*in*/, UBool debug){ |
| if (lang_gen){ |
| delete lang_gen; |
| } |
| lang_gen = new LanguageGenerator_impl(bnf_definition, top_node); |
| PARSE_RESULT r = lang_gen->parseBNF(debug); |
| if (r != OK){ |
| delete lang_gen; |
| lang_gen = NULL; |
| return r; |
| } else { |
| return r; |
| } |
| } |
| const char *LanguageGenerator::next(){ // Return a null-terminated c-string. The buffer is owned by callee. |
| if (lang_gen){ |
| return lang_gen->next(); |
| }else { |
| return ""; |
| } |
| } |
| |
| /////////////////////////////////////////////////////////// |
| // |
| // The test code for WBNF |
| // |
| |
| #define CALL(fun) \ |
| if (fun()){ \ |
| printf("Pass: " #fun "\n");\ |
| } else { \ |
| printf("FAILED: !!! " #fun " !!!\n"); \ |
| } |
| |
| #define DUMP_R(fun, var, times) \ |
| {printf("\n========= " #fun " =============\n"); \ |
| for (int i=0; i<times; i++) { \ |
| const char * t = var.next();\ |
| fwrite(t,strlen(t),1,stdout); \ |
| printf("\n"); \ |
| } \ |
| printf("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n");} |
| |
| |
| |
| #if TEST_WBNF_TEST |
| static UBool TestQuote(){ |
| const char *const str = "This ' A !,z| qq [] .new\tline"; |
| //const char *const str_r = "This \\' A '!,'z'|' qq '[]' '.'new\tline"; |
| //// |
| //// :( we must quote our string to following C syntax |
| //// cannot type the literal here, it makes our code rather human unreadable |
| //// very very unconformable! |
| //// |
| ///* |
| //*/ |
| |
| //const char *const s1 = "ab'c"; |
| //const char (* s1_r1) [] = { "ab''c", // ab''c |
| // "ab\\'c", // ab\'c |
| // };// |
| ///* |
| // . '.' \. |
| // .. \.\. '.'\. '.'\. '..' // '.''.' wrong |
| //*/ |
| |
| //const char *const s2 = "a..'.b"; // a..'.b |
| //const char (*s2_r) [] = { "a'..''.'b" // a'..''.'b |
| // ,"a'..\\'.'b" // a'..\'.'b |
| // ,"a'..'\\''.'b" // a'..'\''.'b |
| // };// |
| |
| //const char *const s3 = "a..\\.b"; // a..\.b |
| //const char (*s3_r) [] = { "a'..\\\\.'b" // a'..\\.'b |
| // ,"a'..'\\\\'.'b" // a'..'\\'.'b |
| // };// |
| |
| // // no catact operation, no choice, must be compact |
| |
| srand((unsigned)time( NULL )); |
| |
| //Escaper l(Escaper::NO, Escaper::NO, Escaper::RAND_ESC); |
| Pick *p = new Literal(str); |
| Quote q(*p); |
| |
| DUMP_R(TestQuote, (*p), 1); |
| DUMP_R(TestQuote, q, 20); |
| return FALSE; |
| } |
| static UBool TestLiteral(){ |
| const char * s = "test string99."; |
| Literal n(s); |
| const char * r = n.next(); |
| return strcmp(s,r) == 0; |
| } |
| |
| static UBool TestSequence(){ |
| Sequence seq; |
| seq.append(new Literal("abc ")); |
| seq.append(new Literal(", s")); |
| |
| return strcmp(seq.next(), "abc , s") == 0; |
| } |
| static UBool TestAlternation(){ |
| srand((unsigned)time( NULL )); |
| Alternation alt; |
| alt.append(new Literal("aaa_10%"),10); |
| alt.append(new Literal("bbb_0%"),0); |
| alt.append(new Literal("ccc_10%"),10); |
| alt.append(new Literal("ddddddd_50%"),50); |
| |
| DUMP_R(TestAlternation, alt, 50); |
| |
| return FALSE; |
| } |
| |
| static UBool TestBuffer(){ |
| Buffer_int t; |
| t.append(1).append(0).append(5); |
| int s = t.content_size(); |
| for (int i=0; i<s; ++i){ |
| printf("%d\n", t[i]); |
| } |
| return FALSE; |
| } |
| |
| static UBool TestWeightedRand(){ |
| srand((unsigned)time( NULL )); |
| Buffer_int t; |
| t.append(1).append(0).append(5); |
| WeightedRand wr(&Buffer_int().append(10).append(0).append(50),4); |
| // WeightedRand wr(&t,3); |
| for (int i=0; i< 50; ++i){ |
| printf("%d\n", wr.next()); |
| } |
| return FALSE; |
| } |
| |
| static UBool TestRepeat(){ |
| srand((unsigned)time( NULL )); |
| Repeat rep(new Literal("aaa1-5 "), 1, 5); |
| DUMP_R(TestRepeat, rep, 50); |
| |
| Repeat r2(new Literal("b{1,3}1%0%5% "), 1, 3, &Buffer_int().append(1).append(0).append(5)); |
| DUMP_R(TestRepeat, r2, 50); |
| |
| Repeat r3(new Literal("aaa5-5 "), 5, 5); |
| DUMP_R(TestRepeat, r3, 50); |
| |
| return FALSE; |
| } |
| |
| static UBool TestVariable(){ |
| SymbolTable tab; |
| Pick * value = new Literal("string1"); |
| Variable var1(&tab, "x", value); |
| |
| Variable var2(&tab, "y"); |
| // tab.put(var2, value); // TOFIX: point alias/recursion problem |
| Pick * value2 = new Literal("string2"); |
| tab.put(var2, value2); |
| |
| Pick * value3 = new Literal("string3"); |
| Variable var3(&tab, "z"); |
| tab.put("z", value3); |
| |
| UBool pass; |
| pass = strcmp(var1.next(), value->next()) == 0; |
| pass = pass && strcmp(var2.next(), value2->next()) == 0; |
| pass = pass && strcmp(var3.next(), value3->next()) == 0; |
| return pass; |
| } |
| |
| static UBool TestSymbolTable(){ |
| Literal * n1 = new Literal("string1"); |
| Literal * n2 = new Literal("string2"); |
| SymbolTable t; |
| t.put("abc", n1); |
| t.put("$aaa", n2); |
| // t.put("alias", n1); // TOFIX: point alias/recursion problem |
| t.put("bbb"); |
| |
| UBool pass; |
| pass = t.find(NULL) == SymbolTable::EMPTY; |
| pass = pass && t.find("ccc") == SymbolTable::NO_VAR; |
| pass = pass && t.find("bbb") == SymbolTable::NO_REF; |
| pass = pass && t.find("abc") == SymbolTable::HAS_REF; |
| pass = pass && t.find("$aaa") == SymbolTable::HAS_REF; |
| |
| t.reset(); |
| pass = pass && t.find("abc") == SymbolTable::NO_VAR; |
| return pass; |
| } |
| |
| |
| static UBool TestScanner(void){ |
| //const char str1[] = "$root = $command{0,5} $reset $mostRules{1,20};"; |
| //const char str1_r[][20] = {"$root", "=", "$command", "{", "0", ",", "5", "}", |
| // "$reset", "$mostRules", "{", "1", ",", "20", "}", ";"}; |
| |
| const char str2[] = "$p2 =(\\\\ $s $string $s)? 25%;"; |
| const char str2_r[][20] = {"$p2", "=", "(", "\\", "$s", "$string", "$s", ")", "?", "25", "%", ";"}; |
| |
| const char *str = str2; |
| const char (*str_r)[20] = str2_r; |
| int tokenNum = sizeof(str2_r)/sizeof(char[20]); |
| |
| Scanner t(str); |
| UBool pass = TRUE; |
| t.getNextToken(); |
| int i = 0; |
| while (pass){ |
| if (t.tokenType == STREAM_END){ |
| pass = pass? i == tokenNum : FALSE; |
| break;//while |
| } else if (t.tokenType == ERROR){ |
| pass = FALSE; |
| break;//while |
| } else { |
| pass = strcmp( &(t.token[0]), str_r[i++]) == 0; |
| t.getNextToken(); |
| } |
| } |
| |
| //const char ts[] = "$commandList = '['" |
| //" ( alternate ' ' $alternateOptions" |
| //" | backwards ' 2'" |
| //" | normalization ' ' $onoff " |
| //" | caseLevel ' ' $onoff " |
| //" | hiraganaQ ' ' $onoff" |
| //" | caseFirst ' ' $caseFirstOptions" |
| //" | strength ' ' $strengthOptions" |
| //" ) ']';" ; |
| |
| //Scanner t2(ts); |
| //pass = TRUE; |
| //do { |
| // t2.getNextToken(); |
| // if (t2.tokenType == ERROR){ |
| // DumpScanner(t2); |
| // return FALSE; |
| // } |
| //}while (t.tokenType != STREAM_END); |
| |
| return pass; |
| } |
| |
| class TestParserT { |
| public: |
| UBool operator () (const char *const str, const int exp_error_offset = -1, const UBool dump = TRUE){ |
| Parser par(str); |
| if (par.rules()){ |
| if ( exp_error_offset == -1){ |
| return TRUE; |
| }else { |
| DumpScanner(par.s,dump); |
| return FALSE; |
| } |
| }else { |
| return DumpScanner(par.s, dump) == exp_error_offset; |
| } |
| } |
| }; |
| |
| UBool TestParser(){ |
| TestParserT test; |
| |
| UBool pass = TRUE; |
| pass = pass && test ("$s = ' ' ? 50%;"); |
| pass = pass && test("$x = ($var {1,2}) 3%;"); // legal |
| pass = pass && test("$x = $var {1,2} 3% | b 4%;"); // legal |
| pass = pass && test("$x = $var {1,2} 3%;"); // legal |
| pass = pass && test("$m = $c ? 2% 4% | $r 5% | $n 25%;"); // legal |
| pass = pass && test("$a = b ? 2% | c 5%;"); // legal |
| pass = pass && test("$x = A B 5% C 10% | D;", 8, FALSE); // illegal 5% |
| pass = pass && test("$x = aa 45% | bb 5% cc;", 19, FALSE);// illegal cc |
| pass = pass && test("$x = (b 5%) (c 6%);"); // legal |
| pass = pass && test("$x = (b 5%) c 6%;", 13, FALSE); // illegal 6% |
| pass = pass && test("$x = b 5% (c 6%);", 9, FALSE); // illegal (c 6%) |
| pass = pass && test("$x = b 5% c 6%;", 9, FALSE); // illegal c 6% |
| pass = pass && test("$x = b 5%;"); // legal |
| pass = pass && test("$x = aa 45% | bb 5% cc;", 19, FALSE);// illegal cc |
| pass = pass && test("$x = a | b | c 4% | d 5%;"); // legal |
| pass = pass && test("$s = ' ' ? 50% abc;"); // legal |
| pass = pass && test("$s = a | c d | e f;"); // legal |
| pass = pass && test( "$z = q 0% | p 1% | r 100%;"); // legal How to check parsed tree?? |
| |
| pass = pass && test("$s = ' ' ? 50%;"); |
| pass = pass && test("$relationList = '<' | '<<' | ';' | '<<<' | ',' | '=';"); |
| pass = pass && test("$p1 = ($string $s '|' $s)? 25%;"); |
| pass = pass && test("$p2 = (\\\\ $s $string $s)? 25%;"); |
| pass = pass && test("$rel2 = $p1 $string $s $p2;"); |
| pass = pass && test("$relation = $relationList $s ($rel1 | $rel2) $crlf;"); |
| pass = pass && test("$command = $commandList $crlf;"); |
| pass = pass && test("$reset = '&' $s ($beforeList $s)? 10% ($positionList 100% | $string 10%) $crlf;"); |
| pass = pass && test("$mostRules = $command 1% | $reset 5% | $relation 25%;"); |
| pass = pass && test("$root = $command{0,5} $reset $mostRules{1,20};"); |
| |
| const char collationBNF[] = |
| "$s = ' '? 50%;" |
| "$crlf = '\r\n';" |
| |
| "$alternateOptions = non'-'ignorable | shifted;" |
| "$onoff = on | off;" |
| "$caseFirstOptions = off | upper | lower;" |
| "$strengthOptions = '1' | '2' | '3' | '4' | 'I';" |
| "$commandList = '['" |
| " ( alternate ' ' $alternateOptions" |
| " | backwards ' 2'" |
| " | normalization ' ' $onoff " |
| " | caseLevel ' ' $onoff " |
| " | hiraganaQ ' ' $onoff" |
| " | caseFirst ' ' $caseFirstOptions" |
| " | strength ' ' $strengthOptions" |
| " ) ']';" |
| "$command = $commandList $crlf;" |
| |
| "$ignorableTypes = (tertiary | secondary | primary) ' ' ignorable;" |
| "$allTypes = variable | regular | implicit | trailing | $ignorableTypes;" |
| "$positionList = '[' (first | last) ' ' $allTypes ']';" |
| |
| "$beforeList = '[before ' ('1' | '2' | '3') ']';" |
| |
| "$relationList = (" |
| " '<'" |
| " | '<<'" |
| " | ';'" |
| " | '<<<'" |
| " | ','" |
| " | '='" |
| ");" |
| "$string = $magic;" |
| "$rel1 = '[variable top]' $s;" |
| "$p1 = ($string $s '|' $s)? 25%;" |
| "$p2 = (\\\\ $s $string $s)? 25%;" |
| "$rel2 = $p1 $string $s $p2;" |
| "$relation = $relationList $s ($rel1 | $rel2) $crlf;" |
| |
| "$reset = '&' $s ($beforeList $s)? 10% ($positionList 1% | $string 10%) $crlf;" |
| "$mostRules = $command 1% | $reset 5% | $relation 25%;" |
| "$root = $command{0,5} $reset $mostRules{1,20};" |
| ; |
| |
| pass = pass && test(collationBNF); |
| |
| |
| return pass; |
| } |
| |
| static UBool TestMorph(){ |
| srand((unsigned)time( NULL )); |
| |
| Alternation * alt = new Alternation(); |
| |
| (*alt) |
| .append(new Literal("a")).append(new Literal("b")).append(new Literal("c")) |
| .append(new Literal("d")).append(new Literal("e")).append(new Literal("f")) |
| .append(new Literal("g")).append(new Literal("h")).append(new Literal("i")) |
| .append(new Literal("j")).append(new Literal("k")).append(new Literal("l")) |
| .append(new Literal("m")).append(new Literal("n")).append(new Literal("o")) |
| ; |
| |
| Repeat * rep = new Repeat( alt ,5,5 ); |
| Morph m( *rep); |
| |
| // DUMP_R(TestMorph,(*rep),20); |
| DUMP_R(TestMorph,m,100); |
| |
| return FALSE; |
| } |
| |
| #endif |
| |
| static UBool TestLanguageGenerator(){ |
| //LanguageGenerator g; |
| //const char *const s = "$s = p 0% | q 1%;"; |
| //g.parseBNF(s, "$s"); |
| UBool pass; |
| //= strcmp("q", g.next()) == 0; |
| |
| const char *const def = |
| //"$a = $b;" |
| //"$b = $c;" |
| //"$c = $t;" |
| //"$t = abc $z{1,2};" |
| //"$k = a | b | c | d | e | f | g ;" |
| //"$z = q 0% | p 1% | r 1%;" |
| "$x = a ? 0%;" |
| ; // end of string |
| // const char * s = "abczz"; |
| // |
| // |
| LanguageGenerator g; |
| pass = g.parseBNF(def, "$x",TRUE); |
| //// LanguageGenerator g(collationBNF, "$root", "$magic", new MagicNode()); |
| // |
| if (pass != LanguageGenerator::OK) return FALSE; |
| |
| DUMP_R(TestLanguageGenerator, g, 20); |
| return pass; |
| |
| ////UBool pass = strcmp(s,r) == 0; |
| |
| //if (pass){ |
| // printf("TestRandomLanguageGenerator passed.\n"); |
| //} else { |
| // printf("TestRandomLanguageGenerator FAILED!!!\n"); |
| //} |
| //return pass; |
| } |
| |
| void TestWbnf(void){ |
| srand((unsigned)time( NULL )); |
| |
| //CALL(TestLiteral); |
| //CALL(TestSequence); |
| //CALL(TestSymbolTable); |
| //CALL(TestVariable); |
| |
| //TestRepeat(); |
| //TestAlternation(); |
| //TestMorph(); |
| |
| //TestQuote(); |
| //TestBuffer(); |
| //TestWeightedRand(); |
| |
| //CALL(TestScanner); |
| //CALL(TestParser); |
| CALL(TestLanguageGenerator); |
| } |
| |