main/classes/core/src/com/ibm/icu/impl/data/TokenIterator.java - external/github.com/unicode-org/icu - Git at Google

 /*
 **********************************************************************
 * Copyright (c) 2004-2011, International Business Machines
 * Corporation and others.  All Rights Reserved.
 **********************************************************************
 * Author: Alan Liu
 * Created: March 16 2004
 * Since: ICU 3.0
 **********************************************************************
 */
 package com.ibm.icu.impl.data;

 import java.io.IOException;

 import com.ibm.icu.impl.PatternProps;
 import com.ibm.icu.impl.Utility;
 import com.ibm.icu.text.UTF16;

 /**
  * An iterator class that returns successive string tokens from some
  * source.  String tokens are, in general, separated by Pattern_White_Space
  * in the source test.  Furthermore, they may be delimited by
  * either single or double quotes (opening and closing quotes must
  * match).  Escapes are processed using standard ICU unescaping.
  */
 public class TokenIterator {

     private ResourceReader reader;
     private String line;
     private StringBuffer buf;
     private boolean done;
     private int pos;
     private int lastpos;

     /**
      * Construct an iterator over the tokens returned by the given
      * ResourceReader, ignoring blank lines and comment lines (first
      * non-blank character is '#').  Note that trailing comments on a
      * line, beginning with the first unquoted '#', are recognized.
      */
     public TokenIterator(ResourceReader r) {
         reader = r;
         line = null;
         done = false;
         buf = new StringBuffer();
         pos = lastpos = -1;
     }

     /**
      * Return the next token from this iterator, or null if the last
      * token has been returned.
      */
     public String next() throws IOException {
         if (done) {
             return null;
         }
         for (;;) {
             if (line == null) {
                 line = reader.readLineSkippingComments();
                 if (line == null) {
                     done = true;
                     return null;
                 }
                 pos = 0;
             }
             buf.setLength(0);
             lastpos = pos;
             pos = nextToken(pos);
             if (pos < 0) {
                 line = null;
                 continue;
             }
             return buf.toString();
         }
     }

     /**
      * Return the one-based line number of the line of the last token returned by
      * next(). Should only be called
      * after a call to next(); otherwise the return
      * value is undefined.
      */
     public int getLineNumber() {
         return reader.getLineNumber();
     }

     /**
      * Return a string description of the position of the last line
      * returned by readLine() or readLineSkippingComments().
      */
     public String describePosition() {
         return reader.describePosition() + ':' + (lastpos+1);
     }

     /**
      * Read the next token from 'this.line' and append it to
      * 'this.buf'.  Tokens are separated by Pattern_White_Space.  Tokens
      * may also be delimited by double or single quotes.  The closing
      * quote must match the opening quote.  If a '#' is encountered,
      * the rest of the line is ignored, unless it is backslash-escaped
      * or within quotes.
      * @param position the offset into the string
      * @return offset to the next character to read from line, or if
      * the end of the line is reached without scanning a valid token,
      * -1
      */
     private int nextToken(int position) {
         position = PatternProps.skipWhiteSpace(line, position);
         if (position == line.length()) {
             return -1;
         }
         int startpos = position;
         char c = line.charAt(position++);
         char quote = 0;
         switch (c) {
         case '"':
         case '\'':
             quote = c;
             break;
         case '#':
             return -1;
         default:
             buf.append(c);
             break;
         }
         int[] posref = null;
         while (position < line.length()) {
             c = line.charAt(position); // 16-bit ok
             if (c == '\\') {
                 if (posref == null) {
                     posref = new int[1];
                 }
                 posref[0] = position+1;
                 int c32 = Utility.unescapeAt(line, posref);
                 if (c32 < 0) {
                     throw new RuntimeException("Invalid escape at " +
                                                reader.describePosition() + ':' +
                                                position);
                 }
                 UTF16.append(buf, c32);
                 position = posref[0];
             } else if ((quote != 0 && c == quote) ||
                        (quote == 0 && PatternProps.isWhiteSpace(c))) {
                 return ++position;
             } else if (quote == 0 && c == '#') {
                 return position; // do NOT increment
             } else {
                 buf.append(c);
                 ++position;
             }
         }
         if (quote != 0) {
             throw new RuntimeException("Unterminated quote at " +
                                        reader.describePosition() + ':' +
                                        startpos);
         }
         return position;
     }
 }
	/*
	**********************************************************************
	* Copyright (c) 2004-2011, International Business Machines
	* Corporation and others. All Rights Reserved.
	**********************************************************************
	* Author: Alan Liu
	* Created: March 16 2004
	* Since: ICU 3.0
	**********************************************************************
	*/
	package com.ibm.icu.impl.data;

	import java.io.IOException;

	import com.ibm.icu.impl.PatternProps;
	import com.ibm.icu.impl.Utility;
	import com.ibm.icu.text.UTF16;

	/**
	* An iterator class that returns successive string tokens from some
	* source. String tokens are, in general, separated by Pattern_White_Space
	* in the source test. Furthermore, they may be delimited by
	* either single or double quotes (opening and closing quotes must
	* match). Escapes are processed using standard ICU unescaping.
	*/
	public class TokenIterator {

	private ResourceReader reader;
	private String line;
	private StringBuffer buf;
	private boolean done;
	private int pos;
	private int lastpos;

	/**
	* Construct an iterator over the tokens returned by the given
	* ResourceReader, ignoring blank lines and comment lines (first
	* non-blank character is '#'). Note that trailing comments on a
	* line, beginning with the first unquoted '#', are recognized.
	*/
	public TokenIterator(ResourceReader r) {
	reader = r;
	line = null;
	done = false;
	buf = new StringBuffer();
	pos = lastpos = -1;
	}

	/**
	* Return the next token from this iterator, or null if the last
	* token has been returned.
	*/
	public String next() throws IOException {
	if (done) {
	return null;
	}
	for (;;) {
	if (line == null) {
	line = reader.readLineSkippingComments();
	if (line == null) {
	done = true;
	return null;
	}
	pos = 0;
	}
	buf.setLength(0);
	lastpos = pos;
	pos = nextToken(pos);
	if (pos < 0) {
	line = null;
	continue;
	}
	return buf.toString();
	}
	}

	/**
	* Return the one-based line number of the line of the last token returned by
	* next(). Should only be called
	* after a call to next(); otherwise the return
	* value is undefined.
	*/
	public int getLineNumber() {
	return reader.getLineNumber();
	}

	/**
	* Return a string description of the position of the last line
	* returned by readLine() or readLineSkippingComments().
	*/
	public String describePosition() {
	return reader.describePosition() + ':' + (lastpos+1);
	}

	/**
	* Read the next token from 'this.line' and append it to
	* 'this.buf'. Tokens are separated by Pattern_White_Space. Tokens
	* may also be delimited by double or single quotes. The closing
	* quote must match the opening quote. If a '#' is encountered,
	* the rest of the line is ignored, unless it is backslash-escaped
	* or within quotes.
	* @param position the offset into the string
	* @return offset to the next character to read from line, or if
	* the end of the line is reached without scanning a valid token,
	* -1
	*/
	private int nextToken(int position) {
	position = PatternProps.skipWhiteSpace(line, position);
	if (position == line.length()) {
	return -1;
	}
	int startpos = position;
	char c = line.charAt(position++);
	char quote = 0;
	switch (c) {
	case '"':
	case '\'':
	quote = c;
	break;
	case '#':
	return -1;
	default:
	buf.append(c);
	break;
	}
	int[] posref = null;
	while (position < line.length()) {
	c = line.charAt(position); // 16-bit ok
	if (c == '\\') {
	if (posref == null) {
	posref = new int[1];
	}
	posref[0] = position+1;
	int c32 = Utility.unescapeAt(line, posref);
	if (c32 < 0) {
	throw new RuntimeException("Invalid escape at " +
	reader.describePosition() + ':' +
	position);
	}
	UTF16.append(buf, c32);
	position = posref[0];
	} else if ((quote != 0 && c == quote) \|\|
	(quote == 0 && PatternProps.isWhiteSpace(c))) {
	return ++position;
	} else if (quote == 0 && c == '#') {
	return position; // do NOT increment
	} else {
	buf.append(c);
	++position;
	}
	}
	if (quote != 0) {
	throw new RuntimeException("Unterminated quote at " +
	reader.describePosition() + ':' +
	startpos);
	}
	return position;
	}
	}