Bug 1505911 - Use IsAlpha/IsDigit instead of IsAsciiAlpha/IsAsciiDigit for full Unicode support. r=valentin
authorSimon Montagu <smontagu@smontagu.org>
Sun, 01 Sep 2013 12:23:43 +0300
changeset 508301 0c40ca38958fc08fb4289dd1eb45b7fd7258ff99
parent 508300 7fc73687c1ef6d72850b1e8241104708fd1bca35
child 508302 f64d8eeab965ad962b09952d5e3978db0d4b8199
push id1905
push userffxbld-merge
push dateMon, 21 Jan 2019 12:33:13 +0000
treeherdermozilla-release@c2fca1944d8c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersvalentin
bugs1505911
milestone65.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1505911 - Use IsAlpha/IsDigit instead of IsAsciiAlpha/IsAsciiDigit for full Unicode support. r=valentin
netwerk/streamconv/converters/mozTXTToHTMLConv.cpp
--- a/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp
+++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp
@@ -2,31 +2,29 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "mozilla/TextUtils.h"
 #include "mozTXTToHTMLConv.h"
 #include "nsNetUtil.h"
 #include "nsUnicharUtils.h"
+#include "nsUnicodeProperties.h"
 #include "nsCRT.h"
 #include "nsIExternalProtocolHandler.h"
 #include "nsIIOService.h"
 #include "nsIURI.h"
 
 #include <algorithm>
 
 #ifdef DEBUG_BenB_Perf
 #include "prtime.h"
 #include "prinrval.h"
 #endif
 
-using mozilla::IsAsciiAlpha;
-using mozilla::IsAsciiDigit;
-
 const double growthRate = 1.2;
 
 // Bug 183111, editor now replaces multiple spaces with leading
 // 0xA0's and a single ending space, so need to treat 0xA0's as spaces.
 // 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)"
 // Also recognize the Japanese ideographic space 0x3000 as a space.
 static inline bool IsSpace(const char16_t aChar) {
   return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000);
@@ -502,16 +500,24 @@ bool mozTXTToHTMLConv::FindURL(const cha
         replaceAfter = resultReplaceAfter;
         state[check] = success;
       }
     }  // if
   }    // for
   return state[check] == success;
 }
 
+static inline bool IsAlpha(const uint32_t aChar) {
+  return mozilla::unicode::GetGenCategory(aChar) == nsUGenCategory::kLetter;
+}
+
+static inline bool IsDigit(const uint32_t aChar) {
+  return mozilla::unicode::GetGenCategory(aChar) == nsUGenCategory::kNumber;
+}
+
 bool mozTXTToHTMLConv::ItMatchesDelimited(const char16_t* aInString,
                                           int32_t aInLength,
                                           const char16_t* rep, int32_t aRepLen,
                                           LIMTYPE before, LIMTYPE after) {
   // this little method gets called a LOT. I found we were spending a
   // lot of time just calculating the length of the variable "rep"
   // over and over again every time we called it. So we're now passing
   // an integer in here.
@@ -520,27 +526,27 @@ bool mozTXTToHTMLConv::ItMatchesDelimite
   if (((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER)) &&
        textLen < aRepLen) ||
       ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER)) &&
        textLen < aRepLen + 1) ||
       (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER &&
        textLen < aRepLen + 2))
     return false;
 
-  char16_t text0 = aInString[0];
-  char16_t textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)];
+  uint32_t text0 = aInString[0];
+  uint32_t textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)];
 
-  if ((before == LT_ALPHA && !IsAsciiAlpha(text0)) ||
-      (before == LT_DIGIT && !IsAsciiDigit(text0)) ||
+  if ((before == LT_ALPHA && !IsAlpha(text0)) ||
+      (before == LT_DIGIT && !IsDigit(text0)) ||
       (before == LT_DELIMITER &&
-       (IsAsciiAlpha(text0) || IsAsciiDigit(text0) || text0 == *rep)) ||
-      (after == LT_ALPHA && !IsAsciiAlpha(textAfterPos)) ||
-      (after == LT_DIGIT && !IsAsciiDigit(textAfterPos)) ||
+       (IsAlpha(text0) || IsDigit(text0) || text0 == *rep)) ||
+      (after == LT_ALPHA && !IsAlpha(textAfterPos)) ||
+      (after == LT_DIGIT && !IsDigit(textAfterPos)) ||
       (after == LT_DELIMITER &&
-       (IsAsciiAlpha(textAfterPos) || IsAsciiDigit(textAfterPos) ||
+       (IsAlpha(textAfterPos) || IsDigit(textAfterPos) ||
         textAfterPos == *rep)) ||
       !Substring(Substring(aInString, aInString + aInLength),
                  (before == LT_IGNORE ? 0 : 1), aRepLen)
            .Equals(Substring(rep, rep + aRepLen),
                    nsCaseInsensitiveStringComparator()))
     return false;
 
   return true;