source/i18n/regexcst.txt - external/github.com/unicode-org/icu - Git at Google


 #*****************************************************************************
 #
 #   Copyright (C) 2002-2007, International Business Machines Corporation and others.
 #   All Rights Reserved.
 #
 #*****************************************************************************
 #
 #  file:  regexcst.txt
 #  ICU Regular Expression Parser State Table
 #
 #     This state table is used when reading and parsing a regular expression pattern
 #     The pattern parser uses a state machine; the data in this file define the
 #     state transitions that occur for each input character.
 #
 #     *** This file defines the regex pattern grammar.   This is it.
 #     *** The determination of what is accepted is here.
 #
 #     This file is processed by a perl script "regexcst.pl" to produce initialized C arrays
 #     that are then built with the rule parser.
 #

 #
 # Here is the syntax of the state definitions in this file:
 #
 #
 #StateName:
 #   input-char           n next-state           ^push-state     action
 #   input-char           n next-state           ^push-state     action
 #       |                |   |                      |             |
 #       |                |   |                      |             |--- action to be performed by state machine
 #       |                |   |                      |                  See function RBBIRuleScanner::doParseActions()
 #       |                |   |                      |
 #       |                |   |                      |--- Push this named state onto the state stack.
 #       |                |   |                           Later, when next state is specified as "pop",
 #       |                |   |                           the pushed state will become the current state.
 #       |                |   |
 #       |                |   |--- Transition to this state if the current input character matches the input
 #       |                |        character or char class in the left hand column.  "pop" causes the next
 #       |                |        state to be popped from the state stack.
 #       |                |
 #       |                |--- When making the state transition specified on this line, advance to the next
 #       |                     character from the input only if 'n' appears here.
 #       |
 #       |--- Character or named character classes to test for.  If the current character being scanned
 #            matches, peform the actions and go to the state specified on this line.
 #            The input character is tested sequentally, in the order written.  The characters and
 #            character classes tested for do not need to be mutually exclusive.  The first match wins.
 #


 #
 #  start state, scan position is at the beginning of the pattern.
 #
 start:
    default                 term                                     doPatStart


 #
 #  term.  At a position where we can accept the start most items in a pattern.
 #
 term:
     quoted               n expr-quant                               doLiteralChar
     rule_char            n expr-quant                               doLiteralChar
     '['                  n set-open       ^set-finish               doSetBegin
     '('                  n open-paren
     '.'                  n expr-quant                               doDotAny
     '^'                  n expr-quant                               doCaret
     '$'                  n expr-quant                               doDollar
     '\'                  n backslash
     '|'                  n  term                                    doOrOperator
     ')'                  n  pop                                     doCloseParen
     eof	                   term                                     doPatFinish
     default                errorDeath                               doRuleError


 #
 #   expr-quant    We've just finished scanning a term, now look for the optional
 #                 trailing quantifier - *, +, ?, *?,  etc.
 #
 expr-quant:
     '*'                  n  quant-star
     '+'                  n  quant-plus
     '?'                  n  quant-opt
     '{'                  n  interval-open                          doIntervalInit
     '('                  n  open-paren-quant
     default                 expr-cont


 #
 #  expr-cont      Expression, continuation.  At a point where additional terms are
 #                                            allowed, but not required.  No Quantifiers
 #
 expr-cont:
     '|'                  n  term                                    doOrOperator
     ')'                  n  pop                                     doCloseParen
     default                 term


 #
 #   open-paren-quant   Special case handling for comments appearing before a quantifier,
 #                        e.g.   x(?#comment )*
 #                      Open parens from expr-quant come here; anything but a (?# comment
 #                      branches into the normal parenthesis sequence as quickly as possible.
 #
 open-paren-quant:
     '?'                  n  open-paren-quant2                      doSuppressComments
     default                 open-paren

 open-paren-quant2:
     '#'                  n  paren-comment   ^expr-quant
     default                 open-paren-extended


 #
 #   open-paren    We've got an open paren.  We need to scan further to
 #                 determine what kind of quantifier it is - plain (, (?:, (?>, or whatever.
 #
 open-paren:
     '?'                  n  open-paren-extended                     doSuppressComments
     default                 term            ^expr-quant             doOpenCaptureParen

 open-paren-extended:
     ':'                  n  term            ^expr-quant             doOpenNonCaptureParen  #  (?:
     '>'                  n  term            ^expr-quant             doOpenAtomicParen      #  (?>
     '='                  n  term            ^expr-cont              doOpenLookAhead        #  (?=
     '!'                  n  term            ^expr-cont              doOpenLookAheadNeg     #  (?!
     '<'                  n  open-paren-lookbehind
     '#'                  n  paren-comment   ^term
     'i'                     paren-flag                              doBeginMatchMode
     'd'                     paren-flag                              doBeginMatchMode
     'm'                     paren-flag                              doBeginMatchMode
     's'                     paren-flag                              doBeginMatchMode
     'u'                     paren-flag                              doBeginMatchMode
     'w'                     paren-flag                              doBeginMatchMode
     'x'                     paren-flag                              doBeginMatchMode
     '-'                     paren-flag                              doBeginMatchMode
     '('                  n  errorDeath                              doConditionalExpr
     '{'                  n  errorDeath                              doPerlInline
     default                 errorDeath                              doBadOpenParenType

 open-paren-lookbehind:
     '='                  n  term            ^expr-cont              doOpenLookBehind       #  (?<=
     '!'                  n  term            ^expr-cont              doOpenLookBehindNeg    #  (?<!
     default                 errorDeath                              doBadOpenParenType


 #
 #   paren-comment    We've got a (?# ... )  style comment.  Eat pattern text till we get to the ')'
 #
 paren-comment:
     ')'                  n  pop
     eof		                errorDeath                              doMismatchedParenErr
     default              n  paren-comment

 #
 #  paren-flag    Scanned a (?ismx-ismx  flag setting
 #
 paren-flag:
     'i'                  n  paren-flag                              doMatchMode
     'd'                  n  paren-flag                              doMatchMode
     'm'                  n  paren-flag                              doMatchMode
     's'                  n  paren-flag                              doMatchMode
     'u'                  n  paren-flag                              doMatchMode
     'w'                  n  paren-flag                              doMatchMode
     'x'                  n  paren-flag                              doMatchMode
     '-'                  n  paren-flag                              doMatchMode
     ')'                  n  term                                    doSetMatchMode
     ':'                  n  term              ^expr-quant           doMatchModeParen
     default                 errorDeath                              doBadModeFlag


 #
 #  quant-star     Scanning a '*' quantifier.  Need to look ahead to decide
 #                 between plain '*', '*?', '*+'
 #
 quant-star:
      '?'                 n  expr-cont                               doNGStar               #  *?
      '+'                 n  expr-cont                               doPossessiveStar       #  *+
      default                expr-cont                               doStar


 #
 #  quant-plus     Scanning a '+' quantifier.  Need to look ahead to decide
 #                 between plain '+', '+?', '++'
 #
 quant-plus:
      '?'                 n  expr-cont                               doNGPlus               #  *?
      '+'                 n  expr-cont                               doPossessivePlus       #  *+
      default                expr-cont                               doPlus


 #
 #  quant-opt  Scanning a '?' quantifier.  Need to look ahead to decide
 #                  between plain '?', '??', '?+'
 #
 quant-opt:
      '?'                 n  expr-cont                               doNGOpt                 #  ??
      '+'                 n  expr-cont                               doPossessiveOpt         #  ?+
      default                expr-cont                               doOpt                   #  ?


 #
 #   Interval         scanning a '{', the opening delimiter for an interval specification
 #                                   {number} or {min, max} or {min,}
 #
 interval-open:
     digit_char              interval-lower
     default                 errorDeath                              doIntervalError

 interval-lower:
     digit_char           n  interval-lower                          doIntevalLowerDigit
     ','			         n  interval-upper
     '}'                  n  interval-type                           doIntervalSame             # {n}
     default                 errorDeath                              doIntervalError

 interval-upper:
     digit_char           n  interval-upper                          doIntervalUpperDigit
     '}'                  n  interval-type
     default                 errorDeath                              doIntervalError

 interval-type:
     '?'                  n  expr-cont                               doNGInterval                # {n,m}?
     '+'                  n  expr-cont                               doPossessiveInterval        # {n,m}+
     default                 expr-cont                               doInterval                  # {m,n}


 #
 #  backslash        #  Backslash.  Figure out which of the \thingies we have encountered.
 #                                  The low level next-char function will have preprocessed
 #                                  some of them already; those won't come here.
 backslash:
    'A'                   n  term                                    doBackslashA
    'B'                   n  term                                    doBackslashB
    'b'                   n  term                                    doBackslashb
    'd'                   n  expr-quant                              doBackslashd
    'D'                   n  expr-quant                              doBackslashD
    'G'                   n  term                                    doBackslashG
    'N'                      expr-quant                              doNamedChar      #   \N{NAME}  named char
    'p'                      expr-quant                              doProperty       #   \p{Lu}  style property
    'P'                      expr-quant                              doProperty
    'Q'                   n  term                                    doEnterQuoteMode
    'S'                   n  expr-quant                              doBackslashS
    's'                   n  expr-quant                              doBackslashs
    'W'                   n  expr-quant                              doBackslashW
    'w'                   n  expr-quant                              doBackslashw
    'X'                   n  expr-quant                              doBackslashX
    'Z'                   n  term                                    doBackslashZ
    'z'                   n  term                                    doBackslashz
    digit_char            n  expr-quant                              doBackRef         #  Will scan multiple digits
    eof                      errorDeath                              doEscapeError
    default               n  expr-quant                              doEscapedLiteralChar


 #
 # [set expression] parsing,
 #    All states involved in parsing set expressions have names beginning with "set-"
 #

 set-open:
    '^'                   n  set-open2                               doSetNegate
    ':'                      set-posix                               doSetPosixProp
    default                  set-open2

 set-open2:
    ']'                   n  set-after-lit                           doSetLiteral
    default                  set-start

 #  set-posix:
 #                  scanned a '[:'  If it really is a [:property:], doSetPosixProp will have
 #                  moved the scan to the closing ']'.  If it wasn't a property
 #                  expression, the scan will still be at the opening ':', which should
 #                  be interpreted as a normal set expression.
 set-posix:
     ']'                  n   pop                                    doSetEnd
     ':'                      set-start
     default                  errorDeath                             doRuleError  # should not be possible.

 #
 #   set-start   after the [ and special case leading characters (^ and/or ]) but before
 #               everything else.   A '-' is literal at this point.
 #
 set-start:
     ']'                  n  pop                                     doSetEnd
     '['                  n  set-open      ^set-after-set            doSetBeginUnion
     '\'                  n  set-escape
     '-'                  n  set-start-dash
     '&'                  n  set-start-amp
     default              n  set-after-lit                           doSetLiteral

 #    set-start-dash    Turn "[--" into a syntax error.
 #                           "[-x" is good, - and x are literals.
 #
 set-start-dash:
     '-'                     errorDeath                              doRuleError
     default                 set-after-lit                           doSetAddDash

 #    set-start-amp     Turn "[&&" into a syntax error.
 #                           "[&x" is good, & and x are literals.
 #
 set-start-amp:
     '&'                     errorDeath                              doRuleError
     default                 set-after-lit                           doSetAddAmp

 #
 #   set-after-lit    The last thing scanned was a literal character within a set.
 #                    Can be followed by anything.  Single '-' or '&' are
 #                    literals in this context, not operators.
 set-after-lit:
     ']'                  n  pop                                     doSetEnd
     '['                  n  set-open      ^set-after-set            doSetBeginUnion
     '-'                  n  set-lit-dash
     '&'                  n  set-lit-amp
     '\'                  n  set-escape
     eof                     errorDeath                              doSetNoCloseError
     default              n  set-after-lit                           doSetLiteral

 set-after-set:
     ']'                  n  pop                                     doSetEnd
     '['                  n  set-open      ^set-after-set            doSetBeginUnion
     '-'                  n  set-set-dash
     '&'                  n  set-set-amp
     '\'                  n  set-escape
     eof                     errorDeath                              doSetNoCloseError
     default              n  set-after-lit                           doSetLiteral

 set-after-range:
     ']'                  n  pop                                     doSetEnd
     '['                  n  set-open      ^set-after-set            doSetBeginUnion
     '-'                  n  set-range-dash
     '&'                  n  set-range-amp
     '\'                  n  set-escape
     eof                     errorDeath                              doSetNoCloseError
     default              n  set-after-lit                           doSetLiteral


 # set-after-op
 #     After a --  or &&
 #     It is an error to close a set at this point.
 #
 set-after-op:
     '['                  n  set-open         ^set-after-set         doSetBeginUnion
     ']'                     errorDeath                              doSetOpError
     '\'                  n  set-escape
     default              n  set-after-lit                           doSetLiteral

 #
 #   set-set-amp
 #      Have scanned [[set]&
 #      Could be a '&' intersection operator, if a set follows.
 #      Could be the start of a '&&' operator.
 #      Otherewise is a literal.
 set-set-amp:
     '['                  n  set-open      ^set-after-set           doSetBeginIntersection1
     '&'                  n  set-after-op                           doSetIntersection2
     default                 set-after-lit                          doSetAddAmp


 # set-lit-amp   Have scanned "[literals&"
 #               Could be a start of "&&" operator or a literal
 #               In [abc&[def]],   the '&' is a literal
 #
 set-lit-amp:
     '&'                  n  set-after-op                            doSetIntersection2
     default                 set-after-lit                           doSetAddAmp


 #
 #  set-set-dash
 #      Have scanned [set]-
 #      Could be a '-' difference operator, if a [set] follows.
 #      Could be the start of a '--' operator.
 #      Otherewise is a literal.
 set-set-dash:
     '['                  n  set-open      ^set-after-set           doSetBeginDifference1
     '-'                  n  set-after-op                           doSetDifference2
     default                 set-after-lit                          doSetAddDash


 #
 #  set-range-dash
 #      scanned  a-b-  or \w-
 #         any set or range like item where the trailing single '-' should
 #         be literal, not a set difference operation.
 #         A trailing "--" is still a difference operator.
 set-range-dash:
     '-'                  n  set-after-op                           doSetDifference2
     default                 set-after-lit                          doSetAddDash


 set-range-amp:
     '&'                  n  set-after-op                           doSetIntersection2
     default                 set-after-lit                          doSetAddAmp


 #  set-lit-dash
 #     Have scanned "[literals-" Could be a range or a -- operator or a literal
 #     In [abc-[def]], the '-' is a literal (confirmed with a Java test)
 #        [abc-\p{xx}  the '-' is an error
 #        [abc-]       the '-' is a literal
 #        [ab-xy]      the '-' is a range
 #
 set-lit-dash:
     '-'                  n  set-after-op                            doSetDifference2
     '['                     set-after-lit                           doSetAddDash
     ']'                     set-after-lit                           doSetAddDash
     '\'                  n  set-lit-dash-escape
     default              n  set-after-range                         doSetRange

 # set-lit-dash-escape
 #
 #    scanned "[literal-\"
 #    Could be a range, if the \ introduces an escaped literal char or a named char.
 #    Otherwise it is an error.
 #
 set-lit-dash-escape:
    's'                      errorDeath                             doSetOpError
    'S'                      errorDeath                             doSetOpError
    'w'                      errorDeath                             doSetOpError
    'W'                      errorDeath                             doSetOpError
    'd'                      errorDeath                             doSetOpError
    'D'                      errorDeath                             doSetOpError
    'N'                      set-after-range                        doSetNamedRange
    default               n  set-after-range                        doSetRange


 #
 #  set-escape
 #       Common back-slash escape processing within set expressions
 #
 set-escape:
    'p'                      set-after-set                           doSetProp
    'P'                      set-after-set                           doSetProp
    'N'                      set-after-lit                           doSetNamedChar
    's'                   n  set-after-range                         doSetBackslash_s
    'S'                   n  set-after-range                         doSetBackslash_S
    'w'                   n  set-after-range                         doSetBackslash_w
    'W'                   n  set-after-range                         doSetBackslash_W
    'd'                   n  set-after-range                         doSetBackslash_d
    'D'                   n  set-after-range                         doSetBackslash_D
    default               n  set-after-lit                           doSetLiteralEscaped

 #
 # set-finish
 #     Have just encountered the final ']' that completes a [set], and
 #     arrived here via a pop.  From here, we exit the set parsing world, and go
 #     back to generic regular expression parsing.
 #
 set-finish:
     default                 expr-quant                              doSetFinish


 #
 # errorDeath.   This state is specified as the next state whenever a syntax error
 #               in the source rules is detected.  Barring bugs, the state machine will never
 #               actually get here, but will stop because of the action associated with the error.
 #               But, just in case, this state asks the state machine to exit.
 errorDeath:
     default              n errorDeath                               doExit

	#*****************************************************************************
	#
	# Copyright (C) 2002-2007, International Business Machines Corporation and others.
	# All Rights Reserved.
	#
	#*****************************************************************************
	#
	# file: regexcst.txt
	# ICU Regular Expression Parser State Table
	#
	# This state table is used when reading and parsing a regular expression pattern
	# The pattern parser uses a state machine; the data in this file define the
	# state transitions that occur for each input character.
	#
	# *** This file defines the regex pattern grammar. This is it.
	# *** The determination of what is accepted is here.
	#
	# This file is processed by a perl script "regexcst.pl" to produce initialized C arrays
	# that are then built with the rule parser.
	#

	#
	# Here is the syntax of the state definitions in this file:
	#
	#
	#StateName:
	# input-char n next-state ^push-state action
	# input-char n next-state ^push-state action
	# \| \| \| \| \|
	# \| \| \| \| \|--- action to be performed by state machine
	# \| \| \| \| See function RBBIRuleScanner::doParseActions()
	# \| \| \| \|
	# \| \| \| \|--- Push this named state onto the state stack.
	# \| \| \| Later, when next state is specified as "pop",
	# \| \| \| the pushed state will become the current state.
	# \| \| \|
	# \| \| \|--- Transition to this state if the current input character matches the input
	# \| \| character or char class in the left hand column. "pop" causes the next
	# \| \| state to be popped from the state stack.
	# \| \|
	# \| \|--- When making the state transition specified on this line, advance to the next
	# \| character from the input only if 'n' appears here.
	# \|
	# \|--- Character or named character classes to test for. If the current character being scanned
	# matches, peform the actions and go to the state specified on this line.
	# The input character is tested sequentally, in the order written. The characters and
	# character classes tested for do not need to be mutually exclusive. The first match wins.
	#




	#
	# start state, scan position is at the beginning of the pattern.
	#
	start:
	default term doPatStart




	#
	# term. At a position where we can accept the start most items in a pattern.
	#
	term:
	quoted n expr-quant doLiteralChar
	rule_char n expr-quant doLiteralChar
	'[' n set-open ^set-finish doSetBegin
	'(' n open-paren
	'.' n expr-quant doDotAny
	'^' n expr-quant doCaret
	'$' n expr-quant doDollar
	'\' n backslash
	'\|' n term doOrOperator
	')' n pop doCloseParen
	eof term doPatFinish
	default errorDeath doRuleError



	#
	# expr-quant We've just finished scanning a term, now look for the optional
	# trailing quantifier - , +, ?, ?, etc.
	#
	expr-quant:
	'*' n quant-star
	'+' n quant-plus
	'?' n quant-opt
	'{' n interval-open doIntervalInit
	'(' n open-paren-quant
	default expr-cont


	#
	# expr-cont Expression, continuation. At a point where additional terms are
	# allowed, but not required. No Quantifiers
	#
	expr-cont:
	'\|' n term doOrOperator
	')' n pop doCloseParen
	default term


	#
	# open-paren-quant Special case handling for comments appearing before a quantifier,
	# e.g. x(?#comment )*
	# Open parens from expr-quant come here; anything but a (?# comment
	# branches into the normal parenthesis sequence as quickly as possible.
	#
	open-paren-quant:
	'?' n open-paren-quant2 doSuppressComments
	default open-paren

	open-paren-quant2:
	'#' n paren-comment ^expr-quant
	default open-paren-extended


	#
	# open-paren We've got an open paren. We need to scan further to
	# determine what kind of quantifier it is - plain (, (?:, (?>, or whatever.
	#
	open-paren:
	'?' n open-paren-extended doSuppressComments
	default term ^expr-quant doOpenCaptureParen

	open-paren-extended:
	':' n term ^expr-quant doOpenNonCaptureParen # (?:
	'>' n term ^expr-quant doOpenAtomicParen # (?>
	'=' n term ^expr-cont doOpenLookAhead # (?=
	'!' n term ^expr-cont doOpenLookAheadNeg # (?!
	'<' n open-paren-lookbehind
	'#' n paren-comment ^term
	'i' paren-flag doBeginMatchMode
	'd' paren-flag doBeginMatchMode
	'm' paren-flag doBeginMatchMode
	's' paren-flag doBeginMatchMode
	'u' paren-flag doBeginMatchMode
	'w' paren-flag doBeginMatchMode
	'x' paren-flag doBeginMatchMode
	'-' paren-flag doBeginMatchMode
	'(' n errorDeath doConditionalExpr
	'{' n errorDeath doPerlInline
	default errorDeath doBadOpenParenType

	open-paren-lookbehind:
	'=' n term ^expr-cont doOpenLookBehind # (?<=
	'!' n term ^expr-cont doOpenLookBehindNeg # (?<!
	default errorDeath doBadOpenParenType


	#
	# paren-comment We've got a (?# ... ) style comment. Eat pattern text till we get to the ')'
	#
	paren-comment:
	')' n pop
	eof errorDeath doMismatchedParenErr
	default n paren-comment

	#
	# paren-flag Scanned a (?ismx-ismx flag setting
	#
	paren-flag:
	'i' n paren-flag doMatchMode
	'd' n paren-flag doMatchMode
	'm' n paren-flag doMatchMode
	's' n paren-flag doMatchMode
	'u' n paren-flag doMatchMode
	'w' n paren-flag doMatchMode
	'x' n paren-flag doMatchMode
	'-' n paren-flag doMatchMode
	')' n term doSetMatchMode
	':' n term ^expr-quant doMatchModeParen
	default errorDeath doBadModeFlag


	#
	# quant-star Scanning a '*' quantifier. Need to look ahead to decide
	# between plain '', '?', '*+'
	#
	quant-star:
	'?' n expr-cont doNGStar # *?
	'+' n expr-cont doPossessiveStar # *+
	default expr-cont doStar


	#
	# quant-plus Scanning a '+' quantifier. Need to look ahead to decide
	# between plain '+', '+?', '++'
	#
	quant-plus:
	'?' n expr-cont doNGPlus # *?
	'+' n expr-cont doPossessivePlus # *+
	default expr-cont doPlus


	#
	# quant-opt Scanning a '?' quantifier. Need to look ahead to decide
	# between plain '?', '??', '?+'
	#
	quant-opt:
	'?' n expr-cont doNGOpt # ??
	'+' n expr-cont doPossessiveOpt # ?+
	default expr-cont doOpt # ?


	#
	# Interval scanning a '{', the opening delimiter for an interval specification
	# {number} or {min, max} or {min,}
	#
	interval-open:
	digit_char interval-lower
	default errorDeath doIntervalError

	interval-lower:
	digit_char n interval-lower doIntevalLowerDigit
	',' n interval-upper
	'}' n interval-type doIntervalSame # {n}
	default errorDeath doIntervalError

	interval-upper:
	digit_char n interval-upper doIntervalUpperDigit
	'}' n interval-type
	default errorDeath doIntervalError

	interval-type:
	'?' n expr-cont doNGInterval # {n,m}?
	'+' n expr-cont doPossessiveInterval # {n,m}+
	default expr-cont doInterval # {m,n}


	#
	# backslash # Backslash. Figure out which of the \thingies we have encountered.
	# The low level next-char function will have preprocessed
	# some of them already; those won't come here.
	backslash:
	'A' n term doBackslashA
	'B' n term doBackslashB
	'b' n term doBackslashb
	'd' n expr-quant doBackslashd
	'D' n expr-quant doBackslashD
	'G' n term doBackslashG
	'N' expr-quant doNamedChar # \N{NAME} named char
	'p' expr-quant doProperty # \p{Lu} style property
	'P' expr-quant doProperty
	'Q' n term doEnterQuoteMode
	'S' n expr-quant doBackslashS
	's' n expr-quant doBackslashs
	'W' n expr-quant doBackslashW
	'w' n expr-quant doBackslashw
	'X' n expr-quant doBackslashX
	'Z' n term doBackslashZ
	'z' n term doBackslashz
	digit_char n expr-quant doBackRef # Will scan multiple digits
	eof errorDeath doEscapeError
	default n expr-quant doEscapedLiteralChar



	#
	# [set expression] parsing,
	# All states involved in parsing set expressions have names beginning with "set-"
	#

	set-open:
	'^' n set-open2 doSetNegate
	':' set-posix doSetPosixProp
	default set-open2

	set-open2:
	']' n set-after-lit doSetLiteral
	default set-start

	# set-posix:
	# scanned a '[:' If it really is a [:property:], doSetPosixProp will have
	# moved the scan to the closing ']'. If it wasn't a property
	# expression, the scan will still be at the opening ':', which should
	# be interpreted as a normal set expression.
	set-posix:
	']' n pop doSetEnd
	':' set-start
	default errorDeath doRuleError # should not be possible.

	#
	# set-start after the [ and special case leading characters (^ and/or ]) but before
	# everything else. A '-' is literal at this point.
	#
	set-start:
	']' n pop doSetEnd
	'[' n set-open ^set-after-set doSetBeginUnion
	'\' n set-escape
	'-' n set-start-dash
	'&' n set-start-amp
	default n set-after-lit doSetLiteral

	# set-start-dash Turn "[--" into a syntax error.
	# "[-x" is good, - and x are literals.
	#
	set-start-dash:
	'-' errorDeath doRuleError
	default set-after-lit doSetAddDash

	# set-start-amp Turn "[&&" into a syntax error.
	# "[&x" is good, & and x are literals.
	#
	set-start-amp:
	'&' errorDeath doRuleError
	default set-after-lit doSetAddAmp

	#
	# set-after-lit The last thing scanned was a literal character within a set.
	# Can be followed by anything. Single '-' or '&' are
	# literals in this context, not operators.
	set-after-lit:
	']' n pop doSetEnd
	'[' n set-open ^set-after-set doSetBeginUnion
	'-' n set-lit-dash
	'&' n set-lit-amp
	'\' n set-escape
	eof errorDeath doSetNoCloseError
	default n set-after-lit doSetLiteral

	set-after-set:
	']' n pop doSetEnd
	'[' n set-open ^set-after-set doSetBeginUnion
	'-' n set-set-dash
	'&' n set-set-amp
	'\' n set-escape
	eof errorDeath doSetNoCloseError
	default n set-after-lit doSetLiteral

	set-after-range:
	']' n pop doSetEnd
	'[' n set-open ^set-after-set doSetBeginUnion
	'-' n set-range-dash
	'&' n set-range-amp
	'\' n set-escape
	eof errorDeath doSetNoCloseError
	default n set-after-lit doSetLiteral


	# set-after-op
	# After a -- or &&
	# It is an error to close a set at this point.
	#
	set-after-op:
	'[' n set-open ^set-after-set doSetBeginUnion
	']' errorDeath doSetOpError
	'\' n set-escape
	default n set-after-lit doSetLiteral

	#
	# set-set-amp
	# Have scanned [[set]&
	# Could be a '&' intersection operator, if a set follows.
	# Could be the start of a '&&' operator.
	# Otherewise is a literal.
	set-set-amp:
	'[' n set-open ^set-after-set doSetBeginIntersection1
	'&' n set-after-op doSetIntersection2
	default set-after-lit doSetAddAmp


	# set-lit-amp Have scanned "[literals&"
	# Could be a start of "&&" operator or a literal
	# In [abc&[def]], the '&' is a literal
	#
	set-lit-amp:
	'&' n set-after-op doSetIntersection2
	default set-after-lit doSetAddAmp


	#
	# set-set-dash
	# Have scanned [set]-
	# Could be a '-' difference operator, if a [set] follows.
	# Could be the start of a '--' operator.
	# Otherewise is a literal.
	set-set-dash:
	'[' n set-open ^set-after-set doSetBeginDifference1
	'-' n set-after-op doSetDifference2
	default set-after-lit doSetAddDash


	#
	# set-range-dash
	# scanned a-b- or \w-
	# any set or range like item where the trailing single '-' should
	# be literal, not a set difference operation.
	# A trailing "--" is still a difference operator.
	set-range-dash:
	'-' n set-after-op doSetDifference2
	default set-after-lit doSetAddDash


	set-range-amp:
	'&' n set-after-op doSetIntersection2
	default set-after-lit doSetAddAmp


	# set-lit-dash
	# Have scanned "[literals-" Could be a range or a -- operator or a literal
	# In [abc-[def]], the '-' is a literal (confirmed with a Java test)
	# [abc-\p{xx} the '-' is an error
	# [abc-] the '-' is a literal
	# [ab-xy] the '-' is a range
	#
	set-lit-dash:
	'-' n set-after-op doSetDifference2
	'[' set-after-lit doSetAddDash
	']' set-after-lit doSetAddDash
	'\' n set-lit-dash-escape
	default n set-after-range doSetRange

	# set-lit-dash-escape
	#
	# scanned "[literal-\"
	# Could be a range, if the \ introduces an escaped literal char or a named char.
	# Otherwise it is an error.
	#
	set-lit-dash-escape:
	's' errorDeath doSetOpError
	'S' errorDeath doSetOpError
	'w' errorDeath doSetOpError
	'W' errorDeath doSetOpError
	'd' errorDeath doSetOpError
	'D' errorDeath doSetOpError
	'N' set-after-range doSetNamedRange
	default n set-after-range doSetRange


	#
	# set-escape
	# Common back-slash escape processing within set expressions
	#
	set-escape:
	'p' set-after-set doSetProp
	'P' set-after-set doSetProp
	'N' set-after-lit doSetNamedChar
	's' n set-after-range doSetBackslash_s
	'S' n set-after-range doSetBackslash_S
	'w' n set-after-range doSetBackslash_w
	'W' n set-after-range doSetBackslash_W
	'd' n set-after-range doSetBackslash_d
	'D' n set-after-range doSetBackslash_D
	default n set-after-lit doSetLiteralEscaped

	#
	# set-finish
	# Have just encountered the final ']' that completes a [set], and
	# arrived here via a pop. From here, we exit the set parsing world, and go
	# back to generic regular expression parsing.
	#
	set-finish:
	default expr-quant doSetFinish


	#
	# errorDeath. This state is specified as the next state whenever a syntax error
	# in the source rules is detected. Barring bugs, the state machine will never
	# actually get here, but will stop because of the action associated with the error.
	# But, just in case, this state asks the state machine to exit.
	errorDeath:
	default n errorDeath doExit