Resynchronize the Shift_JIS converter after unrecognized sequences. Bug 690225, r=emk, a1.9.2.24=dveditz CAMINO_2_1_MINIBRANCH
authorSimon Montagu <smontagu@smontagu.org>
Wed, 19 Oct 2011 07:58:41 +0200
branchCAMINO_2_1_MINIBRANCH
changeset 35203 6cfca3193a6b7dae95d3e55508bb0d31bd75ee5c
parent 35202 51e2a51c4387502a232bf090fe7e1114f8e6f735
child 35204 26582fb8c4195fad2e05a440a04da309353a9cba
push id1985
push useralqahira@ardisson.org
push dateThu, 27 Oct 2011 03:42:45 +0000
reviewersemk
bugs690225
milestone1.9.2.23
Resynchronize the Shift_JIS converter after unrecognized sequences. Bug 690225, r=emk, a1.9.2.24=dveditz
intl/uconv/tests/unit/test_bug116882.js
intl/uconv/ucvja/nsJapaneseToUnicode.cpp
layout/reftests/bugs/116882-1-ref.html
--- a/intl/uconv/tests/unit/test_bug116882.js
+++ b/intl/uconv/tests/unit/test_bug116882.js
@@ -1,14 +1,14 @@
 /* Tests conversion of undefined and illegal sequences from Shift-JIS
  *  to Unicode (bug 116882)
  */
 
 const inText = "\xfd\xfe\xff\x81\x20\x81\x3f\x86\x3c";
-const expectedText = "\uf8f1\uf8f2\uf8f3\u30fb\u30fb\u30fb";
+const expectedText = "\uf8f1\uf8f2\uf8f3\ufffd \ufffd?\ufffd<";
 const charset = "Shift_JIS";
     
 function run_test() {
     var ScriptableUnicodeConverter =
 	Components.Constructor("@mozilla.org/intl/scriptableunicodeconverter",
 			       "nsIScriptableUnicodeConverter");
 
     var converter = new ScriptableUnicodeConverter();
--- a/intl/uconv/ucvja/nsJapaneseToUnicode.cpp
+++ b/intl/uconv/ucvja/nsJapaneseToUnicode.cpp
@@ -46,16 +46,17 @@
 #include "nsICharsetConverterManager.h"
 #include "nsIServiceManager.h"
 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
 
 #define SJIS_INDEX mMapIndex[0]
 #define JIS0208_INDEX mMapIndex[1]
 #define JIS0212_INDEX gJIS0212Index
 #define SJIS_UNMAPPED	0x30fb
+#define UNICODE_REPLACEMENT_CHARACTER 0xfffd
 
 void nsJapaneseToUnicode::setMapMode()
 {
   nsresult res;
 
   mMapIndex = gIndex;
 
   nsCOMPtr<nsIPrefBranch> prefBranch = do_GetService(NS_PREFSERVICE_CONTRACTID);
@@ -175,20 +176,25 @@ NS_IMETHODIMP nsShiftJISToUnicode::Conve
             if(dest >= destEnd)
               goto error1;
           }
           break;
 
           case 1: // Index to table
           {
             PRUint8 off = sbIdx[*src];
+
+            // Error handling: in the case where the second octet is not in the
+            // valid ranges 0x40-0x7E 0x80-0xFC, unconsume the invalid octet and
+            // interpret it as the ASCII value. In the case where the second
+            // octet is in the valid range but there is no mapping for the
+            // 2-octet sequence, do not unconsume.
             if(0xFF == off) {
-               if (mErrBehavior == kOnError_Signal)
-                 goto error_invalidchar;
-               *dest++ = SJIS_UNMAPPED;
+               src--;
+               *dest++ = UNICODE_REPLACEMENT_CHARACTER;
             } else {
                PRUnichar ch = gJapaneseMap[mData+off];
                if(ch == 0xfffd) {
                  if (mErrBehavior == kOnError_Signal)
                    goto error_invalidchar;
                  ch = SJIS_UNMAPPED;
                }
                *dest++ = ch;
@@ -197,21 +203,21 @@ NS_IMETHODIMP nsShiftJISToUnicode::Conve
             if(dest >= destEnd)
               goto error1;
           }
           break;
 
           case 2: // EUDC
           {
             PRUint8 off = sbIdx[*src];
+
+            // Error handling as in case 1
             if(0xFF == off) {
-               if (mErrBehavior == kOnError_Signal)
-                 goto error_invalidchar;
-
-               *dest++ = SJIS_UNMAPPED;
+               src--;
+               *dest++ = UNICODE_REPLACEMENT_CHARACTER;
             } else {
                *dest++ = mData + off;
             }
             mState = 0;
             if(dest >= destEnd)
               goto error1;
           }
           break;
@@ -335,45 +341,45 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Conver
               goto error1;
           }
           break;
 
           case 1: // Index to table
           {
             PRUint8 off = sbIdx[*src];
             if(0xFF == off) {
-              if (mErrBehavior == kOnError_Signal)
-                goto error_invalidchar;
-              *dest++ = 0xFFFD;
                // if the first byte is valid for EUC-JP but the second 
                // is not while being a valid US-ASCII, save it
                // instead of eating it up !
               if ( (PRUint8)*src < (PRUint8)0x7f )
                 --src;
+              if (mErrBehavior == kOnError_Signal)
+                goto error_invalidchar;
+              *dest++ = 0xFFFD;
             } else {
                *dest++ = gJapaneseMap[mData+off];
             }
             mState = 0;
             if(dest >= destEnd)
               goto error1;
           }
           break;
 
           case 2: // JIS 0201
           {
             if((0xA1 <= *src) && (*src <= 0xDF)) {
               *dest++ = (0xFF61-0x00A1) + *src;
             } else {
-              if (mErrBehavior == kOnError_Signal)
-                goto error_invalidchar;
-              *dest++ = 0xFFFD;             
               // if 0x8e is not followed by a valid JIS X 0201 byte
               // but by a valid US-ASCII, save it instead of eating it up.
               if ( (PRUint8)*src < (PRUint8)0x7f )
                 --src;
+              if (mErrBehavior == kOnError_Signal)
+                goto error_invalidchar;
+              *dest++ = 0xFFFD;             
             }
             mState = 0;
             if(dest >= destEnd)
               goto error1;
           }
           break;
 
           case 3: // JIS 0212
--- a/layout/reftests/bugs/116882-1-ref.html
+++ b/layout/reftests/bugs/116882-1-ref.html
@@ -1,11 +1,11 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
  "http://www.w3.org/TR/REC-html401-19991224/strict.dtd">
 <html>
  <head>
   <title>Shift_JIS illegal sequences</title>
   <meta HTTP-equiv="content-type" content="text/html; charset=shift_jis">
  </head>
  <body>
-  <p>&#xf8f1;&#xf8f2;&#xf8f3;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;&#x30fb;</p>
+  <p>&#xf8f1;&#xf8f2;&#xf8f3;&#xfffd; &#xfffd;!&#xfffd;"&#xfffd;#&#xfffd;$&#xfffd;%&#xfffd;&amp;&#xfffd;'&#xfffd;(&#xfffd;)&#xfffd;*&#xfffd;+&#xfffd;,&#xfffd;-&#xfffd;.&#xfffd;/&#xfffd;0&#xfffd;1&#xfffd;2&#xfffd;3&#xfffd;4&#xfffd;5&#xfffd;6&#xfffd;7&#xfffd;8&#xfffd;9&#xfffd;:&#xfffd;;&#xfffd;&lt;&#xfffd;=&#xfffd;&gt;&#xfffd;?&#x30fb;</p>
  </body>
 </html>