Bug 599854: fix repeated matching of empty groups, r=cdleary
authorDavid Mandelin <dmandelin@mozilla.com>
Thu, 13 Jan 2011 18:43:39 -0800
changeset 61066 ccd420e49864c24f889afa6ef0521f446b8a01ce
parent 61065 33313f209f4d1a161648732b25c34271d120eb64
child 61067 64274de90e2d279d923b89199d761d197b1feec8
push idunknown
push userunknown
push dateunknown
reviewerscdleary
bugs599854
milestone2.0b10pre
Bug 599854: fix repeated matching of empty groups, r=cdleary
js/src/jit-test/tests/basic/bug599854.js
js/src/yarr/pcre/pcre_exec.cpp
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/basic/bug599854.js
@@ -0,0 +1,19 @@
+function assertEqArray(actual, expected) {
+    if (actual.length != expected.length) {
+        throw new Error(
+            "array lengths not equal: got " +
+            uneval(actual) + ", expected " + uneval(expected));
+    }
+
+    for (var i = 0; i < actual.length; ++i) {
+        if (actual[i] != expected[i]) {
+        throw new Error(
+            "arrays not equal at element " + i + ": got " +
+            uneval(actual) + ", expected " + uneval(expected));
+        }
+    }
+}
+
+assertEqArray(/(?:(?:(")(c)")?)*/.exec('"c"'), [ '"c"', '"', "c" ]);
+assertEqArray(/(?:(?:a*?(")(c)")?)*/.exec('"c"'), [ '"c"', '"', "c" ]);
+assertEqArray(/<script\s*(?![^>]*type=['"]?(?:dojo\/|text\/html\b))(?:[^>]*?(?:src=(['"]?)([^>]*?)\1[^>]*)?)*>([\s\S]*?)<\/script>/gi.exec('<script type="text/javascript" src="..."></script>'), ['<script type="text/javascript" src="..."></script>', '"', "...", ""]);
--- a/js/src/yarr/pcre/pcre_exec.cpp
+++ b/js/src/yarr/pcre/pcre_exec.cpp
@@ -718,16 +718,20 @@ RECURSE:
                 /* For a non-repeating ket, just continue at this level. This also
                  happens for a repeating ket if no characters were matched in the group.
                  This is the forcible breaking of infinite loops as implemented in Perl
                  5.005. If there is an options reset, it will get obeyed in the normal
                  course of events. */
                 
                 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
                     DPRINTF(("non-repeating ket or empty match\n"));
+                    if (stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction && stack.currentFrame->args.groupMatched) {
+                        DPRINTF(("empty string while group already matched; bailing"));
+                        RRETURN_NO_MATCH;
+                    }
                     stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
                     NEXT_OPCODE;
                 }
                 
                 /* The repeating kets try the rest of the pattern or restart from the
                  preceding bracket, in the appropriate order. */
                 
                 stack.currentFrame->extractBrackets(LOCALS(instructionPtrAtStartOfOnce));
@@ -1261,17 +1265,17 @@ RECURSE:
                     }
                     
                     if (min == stack.currentFrame->locals.max)
                         NEXT_OPCODE;
                     
                     if (minimize) {
                         stack.currentFrame->locals.repeatOthercase = othercase;
                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
-                            RECURSIVE_MATCH(28, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain, false);
+                            RECURSIVE_MATCH(28, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain, stack.currentFrame->args.groupMatched);
                             if (isMatch)
                                 RRETURN;
                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
                                 RRETURN;
                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.repeatOthercase)
                                 RRETURN;
                             ++stack.currentFrame->args.subjectPtr;
                         }
@@ -1303,17 +1307,17 @@ RECURSE:
                         stack.currentFrame->args.subjectPtr += 2;
                     }
                     
                     if (min == stack.currentFrame->locals.max)
                         NEXT_OPCODE;
                     
                     if (minimize) {
                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
-                            RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain, false);
+                            RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain, stack.currentFrame->args.groupMatched);
                             if (isMatch)
                                 RRETURN;
                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
                                 RRETURN;
                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
                                 RRETURN;
                             stack.currentFrame->args.subjectPtr += 2;
                         }
@@ -1323,17 +1327,17 @@ RECURSE:
                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
                             if (stack.currentFrame->args.subjectPtr > md.endSubject - 2)
                                 break;
                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
                                 break;
                             stack.currentFrame->args.subjectPtr += 2;
                         }
                         while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
-                            RECURSIVE_MATCH(31, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain, false);
+                            RECURSIVE_MATCH(31, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain, stack.currentFrame->args.groupMatched);
                             if (isMatch)
                                 RRETURN;
                             stack.currentFrame->args.subjectPtr -= 2;
                         }
                         RRETURN_NO_MATCH;
                     }
                     /* Control never reaches here */
                 }
@@ -1419,17 +1423,17 @@ RECURSE:
                             RRETURN_NO_MATCH;
                     }
                     
                     if (min == stack.currentFrame->locals.max)
                         NEXT_OPCODE;      
                     
                     if (minimize) {
                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
-                            RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain, false);
+                            RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain, stack.currentFrame->args.groupMatched);
                             if (isMatch)
                                 RRETURN;
                             int d = *stack.currentFrame->args.subjectPtr++;
                             if (d < 128)
                                 d = toLowerCase(d);
                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
                                 RRETURN;
                         }
@@ -1447,17 +1451,17 @@ RECURSE:
                             int d = *stack.currentFrame->args.subjectPtr;
                             if (d < 128)
                                 d = toLowerCase(d);
                             if (stack.currentFrame->locals.fc == d)
                                 break;
                             ++stack.currentFrame->args.subjectPtr;
                         }
                         for (;;) {
-                            RECURSIVE_MATCH(40, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain, false);
+                            RECURSIVE_MATCH(40, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain, stack.currentFrame->args.groupMatched);
                             if (isMatch)
                                 RRETURN;
                             if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
                                 break;        /* Stop if tried at original pos */
                         }
                         
                         RRETURN;
                     }
@@ -1473,17 +1477,17 @@ RECURSE:
                             RRETURN_NO_MATCH;
                     }
 
                     if (min == stack.currentFrame->locals.max)
                         NEXT_OPCODE;
                     
                     if (minimize) {
                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
-                            RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain, false);
+                            RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain, stack.currentFrame->args.groupMatched);
                             if (isMatch)
                                 RRETURN;
                             int d = *stack.currentFrame->args.subjectPtr++;
                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
                                 RRETURN;
                         }
                         /* Control never reaches here */
                     }
@@ -1497,17 +1501,17 @@ RECURSE:
                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
                                 break;
                             int d = *stack.currentFrame->args.subjectPtr;
                             if (stack.currentFrame->locals.fc == d)
                                 break;
                             ++stack.currentFrame->args.subjectPtr;
                         }
                         for (;;) {
-                            RECURSIVE_MATCH(44, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain, false);
+                            RECURSIVE_MATCH(44, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain, stack.currentFrame->args.groupMatched);
                             if (isMatch)
                                 RRETURN;
                             if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
                                 break;        /* Stop if tried at original pos */
                         }
 
                         RRETURN;
                     }