ICU-20618 Regex nested lookaround expressions, clean up active match region handling.
diff --git a/icu4c/source/i18n/regexcmp.cpp b/icu4c/source/i18n/regexcmp.cpp
index 5d1eb62..cacc069 100644
--- a/icu4c/source/i18n/regexcmp.cpp
+++ b/icu4c/source/i18n/regexcmp.cpp
@@ -561,7 +561,7 @@
         //               sequence; don't change without making updates there too.
         //
         // Compiles to
-        //    1    START_LA     dataLoc     Saves SP, Input Pos
+        //    1    LA_START     dataLoc     Saves SP, Input Pos, Active input region.
         //    2.   STATE_SAVE   4            on failure of lookahead, goto 4
         //    3    JMP          6           continue ...
         //
@@ -575,10 +575,14 @@
         //    8.     code for parenthesized stuff.
         //    9.   LA_END
         //
-        //  Two data slots are reserved, for saving the stack ptr and the input position.
+        //  Four data slots are reserved, for saving state on entry to the look-around
+        //    0:   stack pointer on entry.
+        //    1:   input position on entry.
+        //    2:   fActiveStart, the active bounds start on entry.
+        //    3:   fActiveLimit, the active bounds limit on entry.
         {
             fixLiterals();
-            int32_t dataLoc = allocateData(2);
+            int32_t dataLoc = allocateData(4);
             appendOp(URX_LA_START, dataLoc);
             appendOp(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+ 2);
             appendOp(URX_JMP, fRXPat->fCompiledPat->size()+ 3);
@@ -599,18 +603,23 @@
     case doOpenLookAheadNeg:
         // Negated Lookahead.   (?! stuff )
         // Compiles to
-        //    1.    START_LA    dataloc
+        //    1.    LA_START    dataloc
         //    2.    SAVE_STATE  7         // Fail within look-ahead block restores to this state,
         //                                //   which continues with the match.
         //    3.    NOP                   // Std. Open Paren sequence, for possible '|'
         //    4.       code for parenthesized stuff.
-        //    5.    END_LA                // Cut back stack, remove saved state from step 2.
+        //    5.    LA_END                // Cut back stack, remove saved state from step 2.
         //    6.    BACKTRACK             // code in block succeeded, so neg. lookahead fails.
         //    7.    END_LA                // Restore match region, in case look-ahead was using
         //                                        an alternate (transparent) region.
+        //  Four data slots are reserved, for saving state on entry to the look-around
+        //    0:   stack pointer on entry.
+        //    1:   input position on entry.
+        //    2:   fActiveStart, the active bounds start on entry.
+        //    3:   fActiveLimit, the active bounds limit on entry.
         {
             fixLiterals();
-            int32_t dataLoc = allocateData(2);
+            int32_t dataLoc = allocateData(4);
             appendOp(URX_LA_START, dataLoc);
             appendOp(URX_STATE_SAVE, 0);    // dest address will be patched later.
             appendOp(URX_NOP, 0);
@@ -644,14 +653,16 @@
             //          Allocate a block of matcher data, to contain (when running a match)
             //              0:    Stack ptr on entry
             //              1:    Input Index on entry
-            //              2:    Start index of match current match attempt.
-            //              3:    Original Input String len.
+            //              2:    fActiveStart, the active bounds start on entry.
+            //              3:    fActiveLimit, the active bounds limit on entry.
+            //              4:    Start index of match current match attempt.
+            //          The first four items must match the layout of data for LA_START / LA_END
 
             // Generate match code for any pending literals.
             fixLiterals();
 
             // Allocate data space
-            int32_t dataLoc = allocateData(4);
+            int32_t dataLoc = allocateData(5);
 
             // Emit URX_LB_START
             appendOp(URX_LB_START, dataLoc);
@@ -696,14 +707,16 @@
             //          Allocate a block of matcher data, to contain (when running a match)
             //              0:    Stack ptr on entry
             //              1:    Input Index on entry
-            //              2:    Start index of match current match attempt.
-            //              3:    Original Input String len.
+            //              2:    fActiveStart, the active bounds start on entry.
+            //              3:    fActiveLimit, the active bounds limit on entry.
+            //              4:    Start index of match current match attempt.
+            //          The first four items must match the layout of data for LA_START / LA_END
 
             // Generate match code for any pending literals.
             fixLiterals();
 
             // Allocate data space
-            int32_t dataLoc = allocateData(4);
+            int32_t dataLoc = allocateData(5);
 
             // Emit URX_LB_START
             appendOp(URX_LB_START, dataLoc);
diff --git a/icu4c/source/i18n/regeximp.h b/icu4c/source/i18n/regeximp.h
index 51db882..590d216 100644
--- a/icu4c/source/i18n/regeximp.h
+++ b/icu4c/source/i18n/regeximp.h
@@ -123,7 +123,7 @@
                                //     saved input position, FAIL rather than taking
                                //     the JMP
      URX_LA_START      = 37,   // Starting a LookAround expression.
-                               //   Save InputPos and SP in static data.
+                               //   Save InputPos, SP and active region in static data.
                                //   Operand:  Static data offset for the save
      URX_LA_END        = 38,   // Ending a Lookaround expression.
                                //   Restore InputPos and Stack to saved values.
diff --git a/icu4c/source/i18n/rematch.cpp b/icu4c/source/i18n/rematch.cpp
index ef06c21..6d6ea0f 100644
--- a/icu4c/source/i18n/rematch.cpp
+++ b/icu4c/source/i18n/rematch.cpp
@@ -3805,11 +3805,13 @@
 
         case URX_LA_START:
             {
-                // Entering a lookahead block.
+                // Entering a look around block.
                 // Save Stack Ptr, Input Pos.
-                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
+                U_ASSERT(opValue>=0 && opValue+3<fPattern->fDataSize);
                 fData[opValue]   = fStack->size();
                 fData[opValue+1] = fp->fInputIdx;
+                fData[opValue+2] = fActiveStart;
+                fData[opValue+3] = fActiveLimit;
                 fActiveStart     = fLookStart;          // Set the match region change for
                 fActiveLimit     = fLookLimit;          //   transparent bounds.
             }
@@ -3819,7 +3821,7 @@
             {
                 // Leaving a look-ahead block.
                 //  restore Stack Ptr, Input Pos to positions they had on entry to block.
-                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
+                U_ASSERT(opValue>=0 && opValue+3<fPattern->fDataSize);
                 int32_t stackSize = fStack->size();
                 int32_t newStackSize =(int32_t)fData[opValue];
                 U_ASSERT(stackSize >= newStackSize);
@@ -3839,8 +3841,10 @@
 
                 // Restore the active region bounds in the input string; they may have
                 //    been changed because of transparent bounds on a Region.
-                fActiveStart = fRegionStart;
-                fActiveLimit = fRegionLimit;
+                fActiveStart = fData[opValue+2];
+                fActiveLimit = fData[opValue+3];
+                U_ASSERT(fActiveStart >= 0);
+                U_ASSERT(fActiveLimit <= fInputLength);
             }
             break;
 
@@ -3916,17 +3920,19 @@
         case URX_LB_START:
             {
                 // Entering a look-behind block.
-                // Save Stack Ptr, Input Pos.
+                // Save Stack Ptr, Input Pos and active input region.
                 //   TODO:  implement transparent bounds.  Ticket #6067
-                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
+                U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
                 fData[opValue]   = fStack->size();
                 fData[opValue+1] = fp->fInputIdx;
-                // Init the variable containing the start index for attempted matches.
-                fData[opValue+2] = -1;
                 // Save input string length, then reset to pin any matches to end at
                 //   the current position.
+                fData[opValue+2] = fActiveStart;
                 fData[opValue+3] = fActiveLimit;
+                fActiveStart     = fRegionStart;
                 fActiveLimit     = fp->fInputIdx;
+                // Init the variable containing the start index for attempted matches.
+                fData[opValue+4] = -1;
             }
             break;
 
@@ -3949,8 +3955,8 @@
                 U_ASSERT(minML >= 0);
 
                 // Fetch (from data) the last input index where a match was attempted.
-                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
-                int64_t  &lbStartIdx = fData[opValue+2];
+                U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
+                int64_t  &lbStartIdx = fData[opValue+4];
                 if (lbStartIdx < 0) {
                     // First time through loop.
                     lbStartIdx = fp->fInputIdx - minML;
@@ -3976,10 +3982,10 @@
                     //  getting a match.  Backtrack out, and out of the
                     //   Look Behind altogether.
                     fp = (REStackFrame *)fStack->popFrame(fFrameSize);
-                    int64_t restoreInputLen = fData[opValue+3];
-                    U_ASSERT(restoreInputLen >= fActiveLimit);
-                    U_ASSERT(restoreInputLen <= fInputLength);
-                    fActiveLimit = restoreInputLen;
+                    fActiveStart = fData[opValue+2];
+                    fActiveLimit = fData[opValue+3];
+                    U_ASSERT(fActiveStart >= 0);
+                    U_ASSERT(fActiveLimit <= fInputLength);
                     break;
                 }
 
@@ -3993,7 +3999,7 @@
         case URX_LB_END:
             // End of a look-behind block, after a successful match.
             {
-                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
+                U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
                 if (fp->fInputIdx != fActiveLimit) {
                     //  The look-behind expression matched, but the match did not
                     //    extend all the way to the point that we are looking behind from.
@@ -4004,13 +4010,13 @@
                     break;
                 }
 
-                // Look-behind match is good.  Restore the orignal input string length,
+                // Look-behind match is good.  Restore the orignal input string region,
                 //   which had been truncated to pin the end of the lookbehind match to the
                 //   position being looked-behind.
-                int64_t originalInputLen = fData[opValue+3];
-                U_ASSERT(originalInputLen >= fActiveLimit);
-                U_ASSERT(originalInputLen <= fInputLength);
-                fActiveLimit = originalInputLen;
+                fActiveStart = fData[opValue+2];
+                fActiveLimit = fData[opValue+3];
+                U_ASSERT(fActiveStart >= 0);
+                U_ASSERT(fActiveLimit <= fInputLength);
             }
             break;
 
@@ -4035,8 +4041,8 @@
                 U_ASSERT(continueLoc > fp->fPatIdx);
 
                 // Fetch (from data) the last input index where a match was attempted.
-                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
-                int64_t  &lbStartIdx = fData[opValue+2];
+                U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
+                int64_t  &lbStartIdx = fData[opValue+4];
                 if (lbStartIdx < 0) {
                     // First time through loop.
                     lbStartIdx = fp->fInputIdx - minML;
@@ -4061,10 +4067,10 @@
                     // We have tried all potential match starting points without
                     //  getting a match, which means that the negative lookbehind as
                     //  a whole has succeeded.  Jump forward to the continue location
-                    int64_t restoreInputLen = fData[opValue+3];
-                    U_ASSERT(restoreInputLen >= fActiveLimit);
-                    U_ASSERT(restoreInputLen <= fInputLength);
-                    fActiveLimit = restoreInputLen;
+                    fActiveStart = fData[opValue+2];
+                    fActiveLimit = fData[opValue+3];
+                    U_ASSERT(fActiveStart >= 0);
+                    U_ASSERT(fActiveLimit <= fInputLength);
                     fp->fPatIdx = continueLoc;
                     break;
                 }
@@ -4079,7 +4085,7 @@
         case URX_LBN_END:
             // End of a negative look-behind block, after a successful match.
             {
-                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
+                U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
                 if (fp->fInputIdx != fActiveLimit) {
                     //  The look-behind expression matched, but the match did not
                     //    extend all the way to the point that we are looking behind from.
@@ -4096,10 +4102,10 @@
                 //   Restore the orignal input string length, which had been truncated
                 //   inorder to pin the end of the lookbehind match
                 //   to the position being looked-behind.
-                int64_t originalInputLen = fData[opValue+3];
-                U_ASSERT(originalInputLen >= fActiveLimit);
-                U_ASSERT(originalInputLen <= fInputLength);
-                fActiveLimit = originalInputLen;
+                fActiveStart = fData[opValue+2];
+                fActiveLimit = fData[opValue+3];
+                U_ASSERT(fActiveStart >= 0);
+                U_ASSERT(fActiveLimit <= fInputLength);
 
                 // Restore original stack position, discarding any state saved
                 //   by the successful pattern match.
@@ -5336,11 +5342,13 @@
 
         case URX_LA_START:
             {
-                // Entering a lookahead block.
+                // Entering a look around block.
                 // Save Stack Ptr, Input Pos.
-                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
+                U_ASSERT(opValue>=0 && opValue+3<fPattern->fDataSize);
                 fData[opValue]   = fStack->size();
                 fData[opValue+1] = fp->fInputIdx;
+                fData[opValue+2] = fActiveStart;
+                fData[opValue+3] = fActiveLimit;
                 fActiveStart     = fLookStart;          // Set the match region change for
                 fActiveLimit     = fLookLimit;          //   transparent bounds.
             }
@@ -5348,9 +5356,9 @@
 
         case URX_LA_END:
             {
-                // Leaving a look-ahead block.
+                // Leaving a look around block.
                 //  restore Stack Ptr, Input Pos to positions they had on entry to block.
-                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
+                U_ASSERT(opValue>=0 && opValue+3<fPattern->fDataSize);
                 int32_t stackSize = fStack->size();
                 int32_t newStackSize = (int32_t)fData[opValue];
                 U_ASSERT(stackSize >= newStackSize);
@@ -5370,8 +5378,10 @@
 
                 // Restore the active region bounds in the input string; they may have
                 //    been changed because of transparent bounds on a Region.
-                fActiveStart = fRegionStart;
-                fActiveLimit = fRegionLimit;
+                fActiveStart = fData[opValue+2];
+                fActiveLimit = fData[opValue+3];
+                U_ASSERT(fActiveStart >= 0);
+                U_ASSERT(fActiveLimit <= fInputLength);
             }
             break;
 
@@ -5434,17 +5444,19 @@
         case URX_LB_START:
             {
                 // Entering a look-behind block.
-                // Save Stack Ptr, Input Pos.
+                // Save Stack Ptr, Input Pos and active input region.
                 //   TODO:  implement transparent bounds.  Ticket #6067
-                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
+                U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
                 fData[opValue]   = fStack->size();
                 fData[opValue+1] = fp->fInputIdx;
-                // Init the variable containing the start index for attempted matches.
-                fData[opValue+2] = -1;
                 // Save input string length, then reset to pin any matches to end at
                 //   the current position.
+                fData[opValue+2] = fActiveStart;
                 fData[opValue+3] = fActiveLimit;
+                fActiveStart     = fRegionStart;
                 fActiveLimit     = fp->fInputIdx;
+                // Init the variable containing the start index for attempted matches.
+                fData[opValue+4] = -1;
             }
             break;
 
@@ -5462,8 +5474,8 @@
                 U_ASSERT(minML >= 0);
 
                 // Fetch (from data) the last input index where a match was attempted.
-                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
-                int64_t  &lbStartIdx = fData[opValue+2];
+                U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
+                int64_t  &lbStartIdx = fData[opValue+4];
                 if (lbStartIdx < 0) {
                     // First time through loop.
                     lbStartIdx = fp->fInputIdx - minML;
@@ -5485,10 +5497,10 @@
                     //  getting a match.  Backtrack out, and out of the
                     //   Look Behind altogether.
                     fp = (REStackFrame *)fStack->popFrame(fFrameSize);
-                    int64_t restoreInputLen = fData[opValue+3];
-                    U_ASSERT(restoreInputLen >= fActiveLimit);
-                    U_ASSERT(restoreInputLen <= fInputLength);
-                    fActiveLimit = restoreInputLen;
+                    fActiveStart = fData[opValue+2];
+                    fActiveLimit = fData[opValue+3];
+                    U_ASSERT(fActiveStart >= 0);
+                    U_ASSERT(fActiveLimit <= fInputLength);
                     break;
                 }
 
@@ -5502,7 +5514,7 @@
         case URX_LB_END:
             // End of a look-behind block, after a successful match.
             {
-                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
+                U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
                 if (fp->fInputIdx != fActiveLimit) {
                     //  The look-behind expression matched, but the match did not
                     //    extend all the way to the point that we are looking behind from.
@@ -5513,13 +5525,13 @@
                     break;
                 }
 
-                // Look-behind match is good.  Restore the orignal input string length,
+                // Look-behind match is good.  Restore the orignal input string region,
                 //   which had been truncated to pin the end of the lookbehind match to the
                 //   position being looked-behind.
-                int64_t originalInputLen = fData[opValue+3];
-                U_ASSERT(originalInputLen >= fActiveLimit);
-                U_ASSERT(originalInputLen <= fInputLength);
-                fActiveLimit = originalInputLen;
+                fActiveStart = fData[opValue+2];
+                fActiveLimit = fData[opValue+3];
+                U_ASSERT(fActiveStart >= 0);
+                U_ASSERT(fActiveLimit <= fInputLength);
             }
             break;
 
@@ -5539,8 +5551,8 @@
                 U_ASSERT(continueLoc > fp->fPatIdx);
 
                 // Fetch (from data) the last input index where a match was attempted.
-                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
-                int64_t  &lbStartIdx = fData[opValue+2];
+                U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
+                int64_t  &lbStartIdx = fData[opValue+4];
                 if (lbStartIdx < 0) {
                     // First time through loop.
                     lbStartIdx = fp->fInputIdx - minML;
@@ -5561,10 +5573,10 @@
                     // We have tried all potential match starting points without
                     //  getting a match, which means that the negative lookbehind as
                     //  a whole has succeeded.  Jump forward to the continue location
-                    int64_t restoreInputLen = fData[opValue+3];
-                    U_ASSERT(restoreInputLen >= fActiveLimit);
-                    U_ASSERT(restoreInputLen <= fInputLength);
-                    fActiveLimit = restoreInputLen;
+                    fActiveStart = fData[opValue+2];
+                    fActiveLimit = fData[opValue+3];
+                    U_ASSERT(fActiveStart >= 0);
+                    U_ASSERT(fActiveLimit <= fInputLength);
                     fp->fPatIdx = continueLoc;
                     break;
                 }
@@ -5579,7 +5591,7 @@
         case URX_LBN_END:
             // End of a negative look-behind block, after a successful match.
             {
-                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
+                U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
                 if (fp->fInputIdx != fActiveLimit) {
                     //  The look-behind expression matched, but the match did not
                     //    extend all the way to the point that we are looking behind from.
@@ -5596,10 +5608,10 @@
                 //   Restore the orignal input string length, which had been truncated
                 //   inorder to pin the end of the lookbehind match
                 //   to the position being looked-behind.
-                int64_t originalInputLen = fData[opValue+3];
-                U_ASSERT(originalInputLen >= fActiveLimit);
-                U_ASSERT(originalInputLen <= fInputLength);
-                fActiveLimit = originalInputLen;
+                fActiveStart = fData[opValue+2];
+                fActiveLimit = fData[opValue+3];
+                U_ASSERT(fActiveStart >= 0);
+                U_ASSERT(fActiveLimit <= fInputLength);
 
                 // Restore original stack position, discarding any state saved
                 //   by the successful pattern match.
diff --git a/icu4c/source/test/intltest/regextst.cpp b/icu4c/source/test/intltest/regextst.cpp
index db66048..b6391d2 100644
--- a/icu4c/source/test/intltest/regextst.cpp
+++ b/icu4c/source/test/intltest/regextst.cpp
@@ -3525,11 +3525,16 @@
         }
     }
     parseMatcher->appendTail(deTaggedInput);
-    REGEX_ASSERT_L(groupStarts.size() == groupEnds.size(), line);
+
+    if (groupStarts.size() != groupEnds.size()) {
+        errln("Error at line %d: mismatched <n> group tags in expected results.", line);
+        failed = true;
+        goto cleanupAndReturn;
+    }
     if ((regionStart>=0 || regionEnd>=0) && (regionStart<0 || regionStart>regionEnd)) {
-      errln("mismatched <r> tags");
-      failed = TRUE;
-      goto cleanupAndReturn;
+        errln("mismatched <r> tags");
+        failed = TRUE;
+        goto cleanupAndReturn;
     }
 
     //
diff --git a/icu4c/source/test/testdata/regextst.txt b/icu4c/source/test/testdata/regextst.txt
index 759a196..9135612 100644
--- a/icu4c/source/test/testdata/regextst.txt
+++ b/icu4c/source/test/testdata/regextst.txt
@@ -89,6 +89,28 @@
 "abc(?!def)"              b    "<r><0>abc</0></r>xyz"
 
 #
+# Nested Lookahead / Behind
+#
+"one(?=(?:(?!<out>).)*</out>)"  "<out><0>one</0> stuff</out>"
+"one(?=(?:(?!<out>).)*</out>)"  "<out>one  <out></out>"
+
+# More nesting lookaround: pattern matches "qq" when not preceded by 'a' and followed by 'z'
+"(?<!a(?!...z))qq"               "<0>qq</0>c"
+"(?<!a(?!...z))qq"               "f<0>qq</0>c"
+"(?<!a(?!...z))qq"               "aqqz"
+
+# More nested lookaround: match any two chars preceded and followed by an upper case letter.
+# With gratuitious nesting of look-arounds and capture from the look-arounds.
+
+"(?=(?<=(\p{Lu})(?=..(\p{Lu})))).."     "<1>A</1><0>jk</0><2>B</2>"
+"(?=(?<=(\p{Lu})(?=..(\p{Lu})))).."     "ajkB"
+"(?=(?<=(\p{Lu})(?=..(\p{Lu})))).."     "Ajkb"
+
+# Nested lookaround cases from bug ICU-20564
+"(?<=(?<=((?=)){0}+))"         "<0></0>abc"
+"(?<=c(?<=c((?=c)){1}+))"      "c<0><1></1></0>cc"
+
+#
 #  Anchoring Bounds
 #
 "^def$"                        "abc<r><0>def</0></r>ghi"           # anchoring (default) bounds
@@ -1456,11 +1478,14 @@
 "abc(?=de(?=f))...g"               "<0>abcdefg</0>"
 "abc(?=de(?=f))...g"               "abcdxfg"
 
+# Bug ICU-20618 Assertion failure with nested look-around expressions.
+#
+"(?<=(?<=b?(?=a)))"               "hello, world."
+
 
 #  Random debugging, Temporary
 #
 
-
 #
 #  Regexps from http://www.regexlib.com
 #