Bug 714613 - Simplify nsShiftJISToUnicode::Convert; r=smontagu
authorMs2ger <ms2ger@gmail.com>
Sun, 15 Jan 2012 09:13:12 +0100
changeset 85707 e3cd4ae3e9e1678324322356668fffd8a39350bf
parent 85706 1d487061009b1f7e93e1e27dd2b5a9978f7e0de0
child 85708 2b995693ad47281738e0df71443025ea50f67afc
push id805
push userakeybl@mozilla.com
push dateWed, 01 Feb 2012 18:17:35 +0000
treeherdermozilla-aurora@6fb3bf232436 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmontagu
bugs714613
milestone12.0a1
Bug 714613 - Simplify nsShiftJISToUnicode::Convert; r=smontagu
intl/uconv/ucvja/japanese.map
intl/uconv/ucvja/nsJapaneseToUnicode.cpp
--- a/intl/uconv/ucvja/japanese.map
+++ b/intl/uconv/ucvja/japanese.map
@@ -72,33 +72,39 @@ static const PRUint16 gIndexJis0208[] = 
     6486,   6580,   6674,   6768,   6862,   6956,   7050,   7144,
     7238,   7332,   7426,   7520,   7614,      0,      0,      0,
        0,   7708,   7802,   7896,   7990,      0,      0, 0xFFFD,
 };
 static const PRUint16 * const gIndex[] = {
   gIndexShiftJis, gIndexJis0208
 };
 
+// IE-compatible handling of undefined codepoints:
+// 0x80 --> U+0080
+// 0xa0 --> U+F8F0
+// 0xfd --> U+F8F1
+// 0xfe --> U+F8F2
+// 0xff --> U+F8F3
 static const PRUint16 gCP932IndexShiftJis[] = {
   0xFFFD,   8648,    376,    564,    752,      0,      0,    940,
     1128,   1316,   1504,   1692,   1880,   2068,   2256,   2444,
     2632,   2820,   3008,   3196,   3384,   3572,   3760,   3948,
     4136,   4324,   4512,   4700,   4888,   5076,   5264,   5452,
-  0xFFFD, 0xFF61, 0xFF62, 0xFF63, 0xFF64, 0xFF65, 0xFF66, 0xFF67,
+  0xF8F0, 0xFF61, 0xFF62, 0xFF63, 0xFF64, 0xFF65, 0xFF66, 0xFF67,
   0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F,
   0xFF70, 0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF76, 0xFF77,
   0xFF78, 0xFF79, 0xFF7A, 0xFF7B, 0xFF7C, 0xFF7D, 0xFF7E, 0xFF7F,
   0xFF80, 0xFF81, 0xFF82, 0xFF83, 0xFF84, 0xFF85, 0xFF86, 0xFF87,
   0xFF88, 0xFF89, 0xFF8A, 0xFF8B, 0xFF8C, 0xFF8D, 0xFF8E, 0xFF8F,
   0xFF90, 0xFF91, 0xFF92, 0xFF93, 0xFF94, 0xFF95, 0xFF96, 0xFF97,
   0xFF98, 0xFF99, 0xFF9A, 0xFF9B, 0xFF9C, 0xFF9D, 0xFF9E, 0xFF9F,
     5640,   5828,   6016,   6204,   6392,   6580,   6768,   6956,
     7144,   7332,   7520,      0,      0,   7708,   7896,      0,
   0xE000, 0xE0BC, 0xE178, 0xE234, 0xE2F0, 0xE3AC, 0xE468, 0xE524,
-  0xE5E0, 0xE69C,   8084,   8272,   8460, 0xFFFD, 0xFFFD, 0xFFFD,
+  0xE5E0, 0xE69C,   8084,   8272,   8460, 0xF8F1, 0xF8F2, 0xF8F3,
 };
 static const PRUint16 gCP932IndexJis0208[] = {
   0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
   0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
   0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
   0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
   0xFFFD,   8648,   8742,    376,    470,    564,    658,    752,
      846,      0,      0,      0,      0,    940,      0,      0,
@@ -117,28 +123,28 @@ static const PRUint16 * const gCP932Inde
   gCP932IndexShiftJis, gCP932IndexJis0208
 };
 
 static const PRUint16 gIBM943IndexShiftJis[] = {
   0xFFFD,   8836,    376,    564,    752,      0,      0,    940,
     1128,   1316,   1504,   1692,   1880,   2068,   2256,   2444,
     2632,   2820,   3008,   3196,   3384,   3572,   3760,   3948,
     4136,   4324,   4512,   4700,   4888,   5076,   5264,   5452,
-  0xFFFD, 0xFF61, 0xFF62, 0xFF63, 0xFF64, 0xFF65, 0xFF66, 0xFF67,
+  0xF8F0, 0xFF61, 0xFF62, 0xFF63, 0xFF64, 0xFF65, 0xFF66, 0xFF67,
   0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F,
   0xFF70, 0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF76, 0xFF77,
   0xFF78, 0xFF79, 0xFF7A, 0xFF7B, 0xFF7C, 0xFF7D, 0xFF7E, 0xFF7F,
   0xFF80, 0xFF81, 0xFF82, 0xFF83, 0xFF84, 0xFF85, 0xFF86, 0xFF87,
   0xFF88, 0xFF89, 0xFF8A, 0xFF8B, 0xFF8C, 0xFF8D, 0xFF8E, 0xFF8F,
   0xFF90, 0xFF91, 0xFF92, 0xFF93, 0xFF94, 0xFF95, 0xFF96, 0xFF97,
   0xFF98, 0xFF99, 0xFF9A, 0xFF9B, 0xFF9C, 0xFF9D, 0xFF9E, 0xFF9F,
     5640,   5828,   6016,   6204,   6392,   6580,   6768,   6956,
     7144,   7332,   7520,      0,      0,   7708,   9024,      0,
   0xE000, 0xE0BC, 0xE178, 0xE234, 0xE2F0, 0xE3AC, 0xE468, 0xE524,
-  0xE5E0, 0xE69C,   9212,   8272,   8460, 0xFFFD, 0xFFFD, 0xFFFD,
+  0xE5E0, 0xE69C,   9212,   8272,   8460, 0xF8F1, 0xF8F2, 0xF8F3,
 };
 static const PRUint16 gIBM943IndexJis0208[] = {
   0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
   0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
   0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
   0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
   0xFFFD,   8836,   8742,    376,    470,    564,    658,    752,
      846,      0,      0,      0,      0,    940,      0,      0,
--- a/intl/uconv/ucvja/nsJapaneseToUnicode.cpp
+++ b/intl/uconv/ucvja/nsJapaneseToUnicode.cpp
@@ -37,16 +37,19 @@
 #include "nsJapaneseToUnicode.h"
 
 #include "nsUCSupport.h"
 
 #include "japanese.map"
 
 #include "nsICharsetConverterManager.h"
 #include "nsIServiceManager.h"
+
+#include "mozilla/Assertions.h"
+
 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
 
 #ifdef XP_OS2
   // HTML5-incompliant behavior for OS/2, see bug 108136
   // This is bogus. The right fix would be working around the font problems
   // in OS/2 gfx, since this "fix" introduces script-visible DOM differences
   // between the platforms.
   #define SJIS_INDEX gIBM943Index[0]
@@ -100,80 +103,43 @@ NS_IMETHODIMP nsShiftJISToUnicode::Conve
       175,  176,  177,  178,  179,  180,  181,  182,  /* 0xf0 */
       183,  184,  185,  186,  187, 0xFF, 0xFF, 0xFF,  /* 0xf8 */
    };
 
    const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
    const unsigned char* src =(unsigned char*) aSrc;
    PRUnichar* destEnd = aDest + *aDestLen;
    PRUnichar* dest = aDest;
-   while((src < srcEnd))
-   {
-       switch(mState)
-       {
-
+   while (src < srcEnd) {
+       switch (mState) {
           case 0:
-          if(*src & 0x80)
-          {
-            mData = SJIS_INDEX[*src & 0x7F];
-            if(mData < 0xE000 )
-            {
-               mState = 1; // two bytes 
-            } else {
-               if( mData > 0xFF00)
-               {
-                 if(0xFFFD == mData) {
-                   // IE-compatible handling of undefined codepoints:
-                   // 0x80 --> U+0080
-                   // 0xa0 --> U+F8F0
-                   // 0xfd --> U+F8F1
-                   // 0xfe --> U+F8F2
-                   // 0xff --> U+F8F3
-                   switch (*src) {
-                     case 0x80:
-                       *dest++ = (PRUnichar) *src;
-                       break;
-
-                     case 0xa0:
-                       *dest++ = (PRUnichar) 0xf8f0;
-                       break;
-
-                     case 0xfd:
-                     case 0xfe:
-                     case 0xff:
-                       *dest++ = (PRUnichar) 0xf8f1 + 
-                                   (*src - (unsigned char)(0xfd));
-                       break;
-
-                     default:
-                       if (mErrBehavior == kOnError_Signal)
-                         goto error_invalidchar;
-                       *dest++ = SJIS_UNMAPPED;
-                   }
-                   if(dest >= destEnd)
-                     goto error1;
-                 } else {
-                   *dest++ = mData; // JIS 0201
-                   if(dest >= destEnd)
-                     goto error1;
-                 }
-               } else {
-                 mState = 2; // EUDC
-               }
+          if (*src <= 0x80) {
+            // ASCII
+            *dest++ = (PRUnichar) *src;
+            if (dest >= destEnd) {
+              goto error1;
             }
           } else {
-            // ASCII
-            *dest++ = (PRUnichar) *src;
-            if(dest >= destEnd)
-              goto error1;
+            mData = SJIS_INDEX[*src & 0x7F];
+            if (mData < 0xE000) {
+              mState = 1; // two bytes
+            } else if (mData < 0xF000) {
+              mState = 2; // EUDC
+            } else {
+              *dest++ = mData; // JIS 0201
+              if (dest >= destEnd) {
+                goto error1;
+              }
+            }
           }
           break;
 
           case 1: // Index to table
           {
+            MOZ_ASSERT(mData < 0xE000);
             PRUint8 off = sbIdx[*src];
 
             // Error handling: in the case where the second octet is not in the
             // valid ranges 0x40-0x7E 0x80-0xFC, unconsume the invalid octet and
             // interpret it as the ASCII value. In the case where the second
             // octet is in the valid range but there is no mapping for the
             // 2-octet sequence, do not unconsume.
             if(0xFF == off) {
@@ -193,16 +159,17 @@ NS_IMETHODIMP nsShiftJISToUnicode::Conve
             mState = 0;
             if(dest >= destEnd)
               goto error1;
           }
           break;
 
           case 2: // EUDC
           {
+            MOZ_ASSERT(0xE000 <= mData && mData < 0xF000);
             PRUint8 off = sbIdx[*src];
 
             // Error handling as in case 1
             if(0xFF == off) {
                src--;
                if (mErrBehavior == kOnError_Signal)
                  goto error_invalidchar;