Bug 465656: word-at-a-time compiled regexp matching, r=gal
authorDavid Mandelin <dmandelin@mozilla.com>
Fri, 06 Feb 2009 13:41:21 -0800
changeset 24866 d55ca83118bebd6f97a4b21f73ac10065287d74d
parent 24865 df3f23fa5a1603ceaa4a268fe6872fb402d2d0c8
child 24867 ecb76b0982023f9497645f99343d5fe36fd8553b
push idunknown
push userunknown
push dateunknown
reviewersgal
bugs465656
milestone1.9.2a1pre
Bug 465656: word-at-a-time compiled regexp matching, r=gal
js/src/jsregexp.cpp
--- a/js/src/jsregexp.cpp
+++ b/js/src/jsregexp.cpp
@@ -2114,16 +2114,56 @@ class RegExpNativeCompiler {
             fails.add(lir->insBranch(LIR_jf, lir->ins2(LIR_eq, comp_ch, lir->insImm(ch2)), 0));
             if (!targetCurrentPoint(to_ok))
                 return NULL;
         }
 
         return lir->ins2(LIR_piadd, pos, lir->insImm(2));
     }
 
+    LIns* compileFlatDoubleChar(jschar ch1, jschar ch2, LIns* pos, 
+                                LInsList& fails) 
+    {
+        uint32 word = (ch2 << 16) | ch1;
+        /* 
+         * Fast case-insensitive test for ASCII letters: convert text
+         * char to lower case by bit-or-ing in 32 and compare.
+         */
+        JSBool useFastCI = JS_FALSE;
+        union { jschar c[2]; uint32 i; } mask;
+        if (cs->flags & JSREG_FOLD) {
+            JSBool mask1 = (L'A' <= ch1 && ch1 <= L'Z') || (L'a' <= ch1 && ch1 <= L'z');
+            JSBool mask2 = (L'A' <= ch2 && ch2 <= L'Z') || (L'a' <= ch2 && ch2 <= L'z');
+            if ((!mask1 && JS_TOLOWER(ch1) != ch1) || (!mask2 && JS_TOLOWER(ch2) != ch2)) {
+                pos = compileFlatSingleChar(ch1, pos, fails);
+                if (!pos) return NULL;
+                return compileFlatSingleChar(ch2, pos, fails);
+            }
+            if (mask1)
+                mask.c[0] |= 0x0020;
+            if (mask2)
+                mask.c[1] |= 0x0020;
+
+            if (mask.i) {
+                word |= mask.i;
+                useFastCI = JS_TRUE;
+            }
+        }
+
+        LIns* to_fail = lir->insBranch(LIR_jf, lir->ins2(LIR_lt, pos, cpend), 0);
+        fails.add(to_fail);
+        LIns* text_word = lir->insLoad(LIR_ld, pos, lir->insImm(0));
+        LIns* comp_word = useFastCI ? 
+            lir->ins2(LIR_or, text_word, lir->insImm(mask.i)) :
+            text_word;
+        fails.add(lir->insBranch(LIR_jf, lir->ins2(LIR_eq, comp_word, lir->insImm(word)), 0));
+
+        return lir->ins2(LIR_piadd, pos, lir->insImm(4));
+    }
+
     LIns* compileClass(RENode* node, LIns* pos, LInsList& fails) 
     {
         if (!node->u.ucclass.sense)
             return JS_FALSE;
         /* 
          * If we share generated native code, we need to make a copy
          * of the bitmap because the original regexp's copy is destroyed
          * when that regexp is. 
@@ -2189,24 +2229,37 @@ class RegExpNativeCompiler {
                 return JS_FALSE;
 
             switch (node->op) {
             case REOP_EMPTY:
                 pos = compileEmpty(node, pos, fails);
                 break;
             case REOP_FLAT:
                 if (node->u.flat.length == 1) {
-                    pos = compileFlatSingleChar(node->u.flat.chr, pos, fails);
+                    if (node->next && node->next->op == REOP_FLAT && 
+                        node->next->u.flat.length == 1) {
+                        pos = compileFlatDoubleChar(node->u.flat.chr,
+                                                    node->next->u.flat.chr,
+                                                    pos, fails);
+                        node = node->next;
+                    } else {
+                        pos = compileFlatSingleChar(node->u.flat.chr, pos, fails);
+                    }
                 } else {
-                    for (size_t i = 0; i < node->u.flat.length; ++i) {
-                        if (fragment->lirbuf->outOMem()) 
-                            return JS_FALSE;
-                        pos = compileFlatSingleChar(((jschar*) node->kid)[i], pos, fails);
-                        if (!pos) break;
-                    }
+                   size_t i;
+                   for (i = 0; i < node->u.flat.length - 1; i += 2) {
+                       if (fragment->lirbuf->outOMem()) 
+                           return JS_FALSE;
+                       pos = compileFlatDoubleChar(((jschar*) node->kid)[i], 
+                                                   ((jschar*) node->kid)[i+1], 
+                                                   pos, fails);
+                       if (!pos) break;
+                   }
+                   if (pos && i == node->u.flat.length - 1)
+                       pos = compileFlatSingleChar(((jschar*) node->kid)[i], pos, fails);
                 }
                 break;
             case REOP_ALT:
             case REOP_ALTPREREQ:
                 pos = compileAlt(node, pos, fails);
                 break;
             case REOP_CLASS:
                 pos = compileClass(node, pos, fails);