Bug 276757 - Find in page should match curly quotes to straight quotes [r=jst]
authorMatt Brubeck <mbrubeck@mozilla.com>
Tue, 17 Sep 2013 10:24:23 -0700
changeset 147429 7a710c502b49d2330cac3fa582aff0ddb98eba44
parent 147428 b78facfd753cc005e46902ad99f6cdf6975f3c5c
child 147430 83c1b9a4fa8b79ad5c628127d3815697ab295165
push id2706
push usermbrubeck@mozilla.com
push dateTue, 17 Sep 2013 17:24:45 +0000
treeherderfx-team@7a710c502b49 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjst
bugs276757
milestone27.0a1
Bug 276757 - Find in page should match curly quotes to straight quotes [r=jst]
embedding/components/find/src/nsFind.cpp
embedding/test/test_nsFind.html
--- a/embedding/components/find/src/nsFind.cpp
+++ b/embedding/components/find/src/nsFind.cpp
@@ -31,16 +31,23 @@
 using namespace mozilla;
 
 // Yikes!  Casting a char to unichar can fill with ones!
 #define CHAR_TO_UNICHAR(c) ((PRUnichar)(const unsigned char)c)
 
 static NS_DEFINE_CID(kCContentIteratorCID, NS_CONTENTITERATOR_CID);
 static NS_DEFINE_CID(kCPreContentIteratorCID, NS_PRECONTENTITERATOR_CID);
 
+#define CH_QUOTE ((PRUnichar) 0x22)
+#define CH_APOSTROPHE ((PRUnichar) 0x27)
+#define CH_LEFT_SINGLE_QUOTE ((PRUnichar) 0x2018)
+#define CH_RIGHT_SINGLE_QUOTE ((PRUnichar) 0x2019)
+#define CH_LEFT_DOUBLE_QUOTE ((PRUnichar) 0x201C)
+#define CH_RIGHT_DOUBLE_QUOTE ((PRUnichar) 0x201D)
+
 #define CH_SHY ((PRUnichar) 0xAD)
 
 // nsFind::Find casts CH_SHY to char before calling StripChars
 // This works correctly if and only if CH_SHY <= 255
 PR_STATIC_ASSERT(CH_SHY <= 255);
 
 // -----------------------------------------------------------------------
 // nsFindContentIterator is a special iterator that also goes through
@@ -1096,19 +1103,42 @@ nsFind::Find(const PRUnichar *aPatText, 
     }
     if (!inWhitespace && IsSpace(patc))
       inWhitespace = true;
 
     // convert to lower case if necessary
     else if (!inWhitespace && !mCaseSensitive && IsUpperCase(c))
       c = ToLowerCase(c);
 
-    // ignore soft hyphens in the document
-    if (c == CH_SHY)
-      continue;
+    switch (c) {
+      // ignore soft hyphens in the document
+      case CH_SHY:
+        continue;
+      // treat curly and straight quotes as identical
+      case CH_LEFT_SINGLE_QUOTE:
+      case CH_RIGHT_SINGLE_QUOTE:
+        c = CH_APOSTROPHE;
+        break;
+      case CH_LEFT_DOUBLE_QUOTE:
+      case CH_RIGHT_DOUBLE_QUOTE:
+        c = CH_QUOTE;
+        break;
+    }
+
+    switch (patc) {
+      // treat curly and straight quotes as identical
+      case CH_LEFT_SINGLE_QUOTE:
+      case CH_RIGHT_SINGLE_QUOTE:
+        patc = CH_APOSTROPHE;
+        break;
+      case CH_LEFT_DOUBLE_QUOTE:
+      case CH_RIGHT_DOUBLE_QUOTE:
+        patc = CH_QUOTE;
+        break;
+    }
 
     // a '\n' between CJ characters is ignored
     if (pindex != (mFindBackward ? patLen : 0) && c != patc && !inWhitespace) {
       if (c == '\n' && t2b && IS_CJ_CHAR(prevChar)) {
         int32_t nindex = findex + incr;
         if (mFindBackward ? (nindex >= 0) : (nindex < fragLen)) {
           if (IS_CJ_CHAR(t2b[nindex]))
             continue;
--- a/embedding/test/test_nsFind.html
+++ b/embedding/test/test_nsFind.html
@@ -1,21 +1,23 @@
 <!DOCTYPE HTML>
 <html>
 <!--
 https://bugzilla.mozilla.org/show_bug.cgi?id=450048
 -->
 <head>
+  <meta charset="UTF-8">
   <title>Test for nsFind::Find()</title>
   <script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
   <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
 </head>
 <body>
 <a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=450048">Mozilla Bug 450048</a>
 <p id="display">This is the text to search i<b>n&shy;t</b>o</p>
+<p id="quotes">"straight" and &ldquo;curly&rdquo; and &lsquo;didn't&rsquo; and 'doesn&rsquo;t'</p>
 <div id="content" style="display: none">
   
 </div>
 <pre id="test">
 <script type="application/javascript">
 
 /** Test for Bug 450048 **/
 
@@ -130,12 +132,94 @@ https://bugzilla.mozilla.org/show_bug.cg
   retRange = rf.Find(searchValue, searchRange, startPt, endPt);
   ok(!retRange, "\"" + searchValue + "\" found (forward)");
 
   rf.findBackwards = true;
 
   // searchValue = "the";
   retRange = rf.Find(searchValue, searchRange, startPt, endPt);
   ok(retRange, "\"" + searchValue + "\" not found (backward)");
+
+
+  // Curly quotes and straight quotes should match.
+
+  rf.caseSensitive = false;
+  rf.findBackwards = false;
+
+  function find(node, searchValue) {
+    var range = document.createRange();
+    range.setStart(node, 0);
+    range.setEnd(node, node.childNodes.length);
+    return rf.Find(searchValue, range, range, range);
+  }
+
+  function assertFound(node, searchValue) {
+    ok(find(node, searchValue), "\"" + searchValue + "\" not found");
+  }
+
+  function assertNotFound(node, searchValue) {
+    ok(!find(node, searchValue), "\"" + searchValue + "\" found");
+  }
+
+  var quotes = document.getElementById("quotes");
+
+  assertFound(quotes, "\"straight\"");
+  assertFound(quotes, "\u201Cstraight\u201D");
+
+  assertNotFound(quotes, "'straight'");
+  assertNotFound(quotes, "\u2018straight\u2019");
+  assertNotFound(quotes, "\u2019straight\u2018");
+  assertNotFound(quotes, ".straight.");
+
+  assertFound(quotes, "\"curly\"");
+  assertFound(quotes, "\u201Ccurly\u201D");
+
+  assertNotFound(quotes, "'curly'");
+  assertNotFound(quotes, "\u2018curly\u2019");
+  assertNotFound(quotes, ".curly.");
+
+  assertFound(quotes, "didn't");
+  assertFound(quotes, "didn\u2018t");
+  assertFound(quotes, "didn\u2019t");
+
+  assertNotFound(quotes, "didnt");
+  assertNotFound(quotes, "didn t");
+  assertNotFound(quotes, "didn.t");
+
+  assertFound(quotes, "'didn't'");
+  assertFound(quotes, "'didn\u2018t'");
+  assertFound(quotes, "'didn\u2019t'");
+  assertFound(quotes, "\u2018didn't\u2019");
+  assertFound(quotes, "\u2019didn't\u2018");
+  assertFound(quotes, "\u2018didn't\u2018");
+  assertFound(quotes, "\u2019didn't\u2019");
+  assertFound(quotes, "\u2018didn\u2019t\u2019");
+  assertFound(quotes, "\u2019didn\u2018t\u2019");
+  assertFound(quotes, "\u2018didn\u2019t\u2018");
+
+  assertNotFound(quotes, "\"didn't\"");
+  assertNotFound(quotes, "\u201Cdidn't\u201D");
+
+  assertFound(quotes, "doesn't");
+  assertFound(quotes, "doesn\u2018t");
+  assertFound(quotes, "doesn\u2019t");
+
+  assertNotFound(quotes, "doesnt");
+  assertNotFound(quotes, "doesn t");
+  assertNotFound(quotes, "doesn.t");
+
+  assertFound(quotes, "'doesn't'");
+  assertFound(quotes, "'doesn\u2018t'");
+  assertFound(quotes, "'doesn\u2019t'");
+  assertFound(quotes, "\u2018doesn't\u2019");
+  assertFound(quotes, "\u2019doesn't\u2018");
+  assertFound(quotes, "\u2018doesn't\u2018");
+  assertFound(quotes, "\u2019doesn't\u2019");
+  assertFound(quotes, "\u2018doesn\u2019t\u2019");
+  assertFound(quotes, "\u2019doesn\u2018t\u2019");
+  assertFound(quotes, "\u2018doesn\u2019t\u2018");
+
+  assertNotFound(quotes, "\"doesn't\"");
+  assertNotFound(quotes, "\u201Cdoesn't\u201D");
 </script>
 </pre>
 </body>
 </html>