Bug 174351: Encoding errors aren't treated as fatal XML errors. r=smontagu, sr=peterv
authorMakoto Kato <m_kato@ga2.so-net.ne.jp>
Mon, 16 Feb 2009 04:22:47 -0800
changeset 25035 5953efc48779883d396022e960cb60f4ef0f7fc6
parent 25034 1e74329357e3b0da9247e790d12c42d97622f110
child 25036 566e513f5bcd1391d227611dc19c2871818020a2
push idunknown
push userunknown
push dateunknown
reviewerssmontagu, peterv
bugs174351
milestone1.9.2a1pre
Bug 174351: Encoding errors aren't treated as fatal XML errors. r=smontagu, sr=peterv
intl/uconv/native/nsNativeUConvService.cpp
intl/uconv/native/nsWinCEUConvService.cpp
intl/uconv/public/nsIUnicodeDecoder.h
intl/uconv/ucvja/nsJapaneseToUnicode.cpp
intl/uconv/ucvja/nsJapaneseToUnicode.h
intl/uconv/util/nsUCSupport.cpp
intl/uconv/util/nsUCSupport.h
intl/uconv/util/nsUnicodeDecodeHelper.cpp
intl/uconv/util/nsUnicodeDecodeHelper.h
parser/htmlparser/src/nsExpatDriver.cpp
parser/htmlparser/src/nsScanner.cpp
parser/htmlparser/src/nsScanner.h
parser/htmlparser/tests/mochitest/Makefile.in
parser/htmlparser/tests/mochitest/invalidchar.xml
parser/htmlparser/tests/mochitest/test_bug174351.html
--- a/intl/uconv/native/nsNativeUConvService.cpp
+++ b/intl/uconv/native/nsNativeUConvService.cpp
@@ -73,16 +73,20 @@ public:
                        PRInt32 * aSrcLength, 
                        PRUnichar * aDest, 
                        PRInt32 * aDestLength);
     
     NS_IMETHOD GetMaxLength(const char * aSrc, 
                             PRInt32 aSrcLength, 
                             PRInt32 * aDestLength);
     NS_IMETHOD Reset();
+
+    virtual void SetInputErrorBehavior(PRInt32 aBehavior);
+
+    virtual PRUnichar GetCharacterForUnMapped();
     
     // Encoder methods:
     
     NS_IMETHOD Convert(const PRUnichar * aSrc, 
                        PRInt32 * aSrcLength, 
                        char * aDest, 
                        PRInt32 * aDestLength);
     
@@ -204,16 +208,28 @@ IConvAdaptor::Reset()
 
 #ifdef DEBUG
     printf(" * IConvAdaptor - - Reset\n");
 #endif
     return NS_OK;
 }
 
 
+void
+IConvAdaptor::SetInputErrorBehavior(PRInt32 aBehavior)
+{
+}
+
+
+PRUnichar
+IConvAdaptor::GetCharacterForUnMapped()
+{
+    return PRUnichar(0xfffd); // Unicode REPLACEMENT CHARACTER
+}
+
 // convert unicode data into some charset.
 nsresult 
 IConvAdaptor::Convert(const PRUnichar * aSrc, 
                      PRInt32 * aSrcLength, 
                      char * aDest, 
                      PRInt32 * aDestLength)
 {
     return ConvertInternal( (void*) aSrc, 
--- a/intl/uconv/native/nsWinCEUConvService.cpp
+++ b/intl/uconv/native/nsWinCEUConvService.cpp
@@ -93,16 +93,19 @@ public:
                      PRUnichar * aDest, 
                      PRInt32 * aDestLength);
   
   NS_IMETHOD GetMaxLength(const char * aSrc, 
                           PRInt32 aSrcLength, 
                           PRInt32 * aDestLength);
   NS_IMETHOD Reset();
   
+  virtual void SetInputErrorBehavior(PRInt32 aBehavior);
+  virtual PRUnichar GetCharacterForUnMapped();
+
   // Encoder methods:
   
   NS_IMETHOD Convert(const PRUnichar * aSrc, 
                      PRInt32 * aSrcLength, 
                      char * aDest, 
                      PRInt32 * aDestLength);
   
   
@@ -307,16 +310,27 @@ WinCEUConvAdapter::GetMaxLength(const ch
 }
 
 NS_IMETHODIMP
 WinCEUConvAdapter::Reset()
 {
   return NS_OK;
 }
 
+void
+WinCEUConvAdapter::SetInputErrorBehavior(PRInt32 aBehavior)
+{
+}
+
+PRUnichar
+WinCEUConvAdapter::GetCharacterForUnMapped()
+{
+  return PRUnichar(0xfffd); // Unicode REPLACEMENT CHARACTER
+}
+
 // Encoder methods:
 
 NS_IMETHODIMP
 WinCEUConvAdapter::Convert(const PRUnichar * aSrc, 
                            PRInt32 * aSrcLength, 
                            char * aDest, 
                            PRInt32 * aDestLength)
 {
--- a/intl/uconv/public/nsIUnicodeDecoder.h
+++ b/intl/uconv/public/nsIUnicodeDecoder.h
@@ -37,23 +37,23 @@
 
 #ifndef nsIUnicodeDecoder_h___
 #define nsIUnicodeDecoder_h___
 
 #include "nscore.h"
 #include "nsISupports.h"
 
 // Interface ID for our Unicode Decoder interface
-// {B2F178E1-832A-11d2-8A8E-00600811A836}
+// {25359602-FC70-4d13-A9AB-8086D3827C0D}
 //NS_DECLARE_ID(kIUnicodeDecoderIID,
-//  0xb2f178e1, 0x832a, 0x11d2, 0x8a, 0x8e, 0x0, 0x60, 0x8, 0x11, 0xa8, 0x36);
+//  0x25359602, 0xfc70, 0x4d13, 0xa9, 0xab, 0x80, 0x86, 0xd3, 0x82, 0x7c, 0xd);
 
 #define NS_IUNICODEDECODER_IID	\
-	{ 0xb2f178e1, 0x832a, 0x11d2,	\
-		{ 0x8a, 0x8e, 0x0, 0x60, 0x8, 0x11, 0xa8, 0x36 }}
+	{ 0x25359602, 0xfc70, 0x4d13,	\
+		{ 0xa9, 0xab, 0x80, 0x86, 0xd3, 0x82, 0x7c, 0xd }}
 
 // XXX deprecated
 /*---------- BEGIN DEPRECATED */ 
 #define NS_EXACT_LENGTH \
   NS_ERROR_GENERATE_SUCCESS(NS_ERROR_MODULE_UCONV, 11)
 
 #define NS_PARTIAL_MORE_INPUT \
   NS_ERROR_GENERATE_SUCCESS(NS_ERROR_MODULE_UCONV, 12)
@@ -163,13 +163,27 @@ public:
   NS_IMETHOD GetMaxLength(const char * aSrc, PRInt32 aSrcLength, 
       PRInt32 * aDestLength) = 0;
 
   /**
    * Resets the charset converter so it may be recycled for a completely 
    * different and urelated buffer of data.
    */
   NS_IMETHOD Reset() = 0;
+
+  /**
+   * Specify what to do when a character cannot be mapped into unicode
+   *
+   * @param aBehavior [IN] the desired behavior
+   * @see kOnError_Recover
+   * @see kOnError_Signal
+   */
+  virtual void SetInputErrorBehavior(PRInt32 aBehavior) = 0;
+
+  /**
+   * return the UNICODE character for unmapped character
+   */
+  virtual PRUnichar GetCharacterForUnMapped() = 0;
 };
 
 NS_DEFINE_STATIC_IID_ACCESSOR(nsIUnicodeDecoder, NS_IUNICODEDECODER_IID)
 
 #endif /* nsIUnicodeDecoder_h___ */
--- a/intl/uconv/ucvja/nsJapaneseToUnicode.cpp
+++ b/intl/uconv/ucvja/nsJapaneseToUnicode.cpp
@@ -45,16 +45,17 @@
 
 #include "nsICharsetConverterManager.h"
 #include "nsIServiceManager.h"
 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
 
 #define SJIS_INDEX mMapIndex[0]
 #define JIS0208_INDEX mMapIndex[1]
 #define JIS0212_INDEX gJIS0212Index
+#define SJIS_UNMAPPED	0x30fb
 
 void nsJapaneseToUnicode::setMapMode()
 {
   nsresult res;
 
   mMapIndex = gIndex;
 
   nsCOMPtr<nsIPrefBranch> prefBranch = do_GetService(NS_PREFSERVICE_CONTRACTID);
@@ -148,17 +149,19 @@ NS_IMETHODIMP nsShiftJISToUnicode::Conve
                      case 0xfd:
                      case 0xfe:
                      case 0xff:
                        *dest++ = (PRUnichar) 0xf8f1 + 
                                    (*src - (unsigned char)(0xfd));
                        break;
 
                      default:
-                       *dest++ = 0x30FB;
+                       if (mErrBehavior == kOnError_Signal)
+                         goto error_invalidchar;
+                       *dest++ = SJIS_UNMAPPED;
                    }
                    if(dest >= destEnd)
                      goto error1;
                  } else {
                    *dest++ = mData; // JIS 0201
                    if(dest >= destEnd)
                      goto error1;
                  }
@@ -173,60 +176,75 @@ NS_IMETHODIMP nsShiftJISToUnicode::Conve
               goto error1;
           }
           break;
 
           case 1: // Index to table
           {
             PRUint8 off = sbIdx[*src];
             if(0xFF == off) {
-               *dest++ = 0x30FB;
+               if (mErrBehavior == kOnError_Signal)
+                 goto error_invalidchar;
+               *dest++ = SJIS_UNMAPPED;
             } else {
                PRUnichar ch = gJapaneseMap[mData+off];
-               if(ch == 0xfffd) 
-                 ch = 0x30fb;
+               if(ch == 0xfffd) {
+                 if (mErrBehavior == kOnError_Signal)
+                   goto error_invalidchar;
+                 ch = SJIS_UNMAPPED;
+               }
                *dest++ = ch;
             }
             mState = 0;
             if(dest >= destEnd)
               goto error1;
           }
           break;
 
           case 2: // EUDC
           {
             PRUint8 off = sbIdx[*src];
             if(0xFF == off) {
-               *dest++ = 0x30fb;
+               if (mErrBehavior == kOnError_Signal)
+                 goto error_invalidchar;
+
+               *dest++ = SJIS_UNMAPPED;
             } else {
                *dest++ = mData + off;
             }
             mState = 0;
             if(dest >= destEnd)
               goto error1;
           }
           break;
 
        }
        src++;
    }
    *aDestLen = dest - aDest;
    return NS_OK;
+error_invalidchar:
+   *aDestLen = dest - aDest;
+   *aSrcLen = src - (const unsigned char*)aSrc;
+   return NS_ERROR_ILLEGAL_INPUT;
 error1:
-   *aDestLen = dest-aDest;
+   *aDestLen = dest - aDest;
    src++;
    if ((mState == 0) && (src == srcEnd)) {
      return NS_OK;
    }
    *aSrcLen = src - (const unsigned char*)aSrc;
    return NS_OK_UDEC_MOREOUTPUT;
 }
 
-
-
+PRUnichar
+nsShiftJISToUnicode::GetCharacterForUnMapped()
+{
+  return PRUnichar(SJIS_UNMAPPED);
+}
 
 NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
    const char * aSrc, PRInt32 * aSrcLen,
      PRUnichar * aDest, PRInt32 * aDestLen)
 {
    static const PRUint8 sbIdx[256] =
    {
 /* 0x0X */
@@ -298,16 +316,18 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Conver
                if( 0x8e == *src) {
                  // JIS 0201
                  mState = 2; // JIS0201
                } else if(0x8f == *src) {
                  // JIS 0212
                  mState = 3; // JIS0212
                } else {
                  // others 
+                 if (mErrBehavior == kOnError_Signal)
+                   goto error_invalidchar;
                  *dest++ = 0xFFFD;
                  if(dest >= destEnd)
                    goto error1;
                }
             }
           } else {
             // ASCII
             *dest++ = (PRUnichar) *src;
@@ -315,16 +335,18 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Conver
               goto error1;
           }
           break;
 
           case 1: // Index to table
           {
             PRUint8 off = sbIdx[*src];
             if(0xFF == off) {
+              if (mErrBehavior == kOnError_Signal)
+                goto error_invalidchar;
               *dest++ = 0xFFFD;
                // if the first byte is valid for EUC-JP but the second 
                // is not while being a valid US-ASCII(i.e. < 0xc0), save it
                // instead of eating it up !
                if ( ! (*src & 0xc0)  )
                  *dest++ = (PRUnichar) *src;;
             } else {
                *dest++ = gJapaneseMap[mData+off];
@@ -335,16 +357,18 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Conver
           }
           break;
 
           case 2: // JIS 0201
           {
             if((0xA1 <= *src) && (*src <= 0xDF)) {
               *dest++ = (0xFF61-0x00A1) + *src;
             } else {
+              if (mErrBehavior == kOnError_Signal)
+                goto error_invalidchar;
               *dest++ = 0xFFFD;             
               // if 0x8e is not followed by a valid JIS X 0201 byte
               // but by a valid US-ASCII, save it instead of eating it up.
               if ( (PRUint8)*src < (PRUint8)0x7f )
                  *dest++ = (PRUnichar) *src;
             }
             mState = 0;
             if(dest >= destEnd)
@@ -367,40 +391,48 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Conver
               mState = 5; // error
             }
           }
           break;
           case 4:
           {
             PRUint8 off = sbIdx[*src];
             if(0xFF == off) {
+              if (mErrBehavior == kOnError_Signal)
+                goto error_invalidchar;
                *dest++ = 0xFFFD;
             } else {
                *dest++ = gJapaneseMap[mData+off];
             }
             mState = 0;
             if(dest >= destEnd)
               goto error1;
           }
           break;
           case 5: // two bytes undefined
           {
+            if (mErrBehavior == kOnError_Signal)
+              goto error_invalidchar;
             *dest++ = 0xFFFD;
             mState = 0;
             if(dest >= destEnd)
               goto error1;
           }
           break;
        }
        src++;
    }
    *aDestLen = dest - aDest;
    return NS_OK;
+error_invalidchar:
+   *aDestLen = dest - aDest;
+   *aSrcLen = src - (const unsigned char*)aSrc;
+   return NS_ERROR_ILLEGAL_INPUT;
 error1:
-   *aDestLen = dest-aDest;
+   *aDestLen = dest - aDest;
    src++;
    if ((mState == 0) && (src == srcEnd)) {
      return NS_OK;
    } 
    *aSrcLen = src - (const unsigned char*)aSrc;
    return NS_OK_UDEC_MOREOUTPUT;
 }
 
@@ -541,16 +573,18 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Co
                 *dest++ = 0xFFFD;
               }
               mRunLength = 0;
             } else if ('J' == *src)  {
               mState = mState_JISX0201_1976Roman;
               if (mRunLength == 0 && mLastLegalState != mState_ASCII) {
                 if((dest+1) >= destEnd)
                   goto error1;
+                if (mErrBehavior == kOnError_Signal)
+                  goto error2;
                 *dest++ = 0xFFFD;
               }
               mRunLength = 0;
             } else if ('I' == *src)  {
               mState = mState_JISX0201_1976Kana;
               mRunLength = 0;
             } else  {
               if((dest+3) >= destEnd)
@@ -914,20 +948,20 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Co
           break;
 
        } // switch
        src++;
    }
    *aDestLen = dest - aDest;
    return NS_OK;
 error1:
-   *aDestLen = dest-aDest;
+   *aDestLen = dest - aDest;
    src++;
    if ((mState == 0) && (src == srcEnd)) {
      return NS_OK;
    }
    *aSrcLen = src - (const unsigned char*)aSrc;
    return NS_OK_UDEC_MOREOUTPUT;
 error2:
    *aSrcLen = src - (const unsigned char*)aSrc;
-   *aDestLen = dest-aDest;
+   *aDestLen = dest - aDest;
    return NS_ERROR_UNEXPECTED;
 }
--- a/intl/uconv/ucvja/nsJapaneseToUnicode.h
+++ b/intl/uconv/ucvja/nsJapaneseToUnicode.h
@@ -71,16 +71,18 @@ public:
      }
  NS_IMETHOD Reset()
      {
         mState = 0;
         setMapMode();
         return NS_OK;
      }
 
+  virtual PRUnichar GetCharacterForUnMapped();
+
 private:
 
 private:
  PRInt32  mState;
  PRInt32 mData;
 };
 
 class nsEUCJPToUnicodeV2 : public nsJapaneseToUnicode
--- a/intl/uconv/util/nsUCSupport.cpp
+++ b/intl/uconv/util/nsUCSupport.cpp
@@ -46,16 +46,17 @@
 #define DEFAULT_BUFFER_CAPACITY 16
 
 // XXX review the buffer growth limitation code
 
 //----------------------------------------------------------------------
 // Class nsBasicDecoderSupport [implementation]
 
 nsBasicDecoderSupport::nsBasicDecoderSupport() 
+  : mErrBehavior(kOnError_Recover)
 {
 }
 
 nsBasicDecoderSupport::~nsBasicDecoderSupport() 
 {
 }
 
 //----------------------------------------------------------------------
@@ -67,16 +68,30 @@ NS_IMPL_RELEASE(nsBasicDecoderSupport)
 NS_IMPL_QUERY_INTERFACE2(nsBasicDecoderSupport, nsIUnicodeDecoder, nsIBasicDecoder)
 #else
 NS_IMPL_QUERY_INTERFACE1(nsBasicDecoderSupport, nsIUnicodeDecoder)
 #endif
 
 //----------------------------------------------------------------------
 // Interface nsIUnicodeDecoder [implementation]
 
+void
+nsBasicDecoderSupport::SetInputErrorBehavior(PRInt32 aBehavior)
+{
+  NS_ABORT_IF_FALSE(aBehavior == kOnError_Recover || aBehavior == kOnError_Signal,
+                    "Unknown behavior for SetInputErrorBehavior");
+  mErrBehavior = aBehavior;
+}
+
+PRUnichar
+nsBasicDecoderSupport::GetCharacterForUnMapped()
+{
+  return PRUnichar(0xfffd); // Unicode REPLACEMENT CHARACTER
+}
+
 //----------------------------------------------------------------------
 // Class nsBufferDecoderSupport [implementation]
 
 nsBufferDecoderSupport::nsBufferDecoderSupport(PRUint32 aMaxLengthFactor) 
   : nsBasicDecoderSupport(),
     mMaxLengthFactor(aMaxLengthFactor)
 {
   mBufferCapacity = DEFAULT_BUFFER_CAPACITY;
@@ -139,16 +154,21 @@ NS_IMETHODIMP nsBufferDecoderSupport::Co
     FillBuffer(&src, srcEnd - src);
 
     // convert that buffer
     bcr = mBufferLength;
     bcw = destEnd - dest;
     res = ConvertNoBuff(mBuffer, &bcr, dest, &bcw);
     dest += bcw;
 
+    // Detect invalid input character
+    if (res == NS_ERROR_ILLEGAL_INPUT && mErrBehavior == kOnError_Signal) {
+      break;
+    }
+
     if ((res == NS_OK_UDEC_MOREINPUT) && (bcw == 0)) {
         res = NS_ERROR_UNEXPECTED;
 #if defined(DEBUG_yokoyama) || defined(DEBUG_ftang)
         NS_ASSERTION(0, "This should not happen. Internal buffer may be corrupted.");
 #endif
         break;
     } else {
       if (bcr < buffLen) {
@@ -232,17 +252,18 @@ nsTableDecoderSupport::~nsTableDecoderSu
 NS_IMETHODIMP nsTableDecoderSupport::ConvertNoBuff(const char * aSrc, 
                                                    PRInt32 * aSrcLength, 
                                                    PRUnichar * aDest, 
                                                    PRInt32 * aDestLength)
 {
   return nsUnicodeDecodeHelper::ConvertByTable(aSrc, aSrcLength,
                                                aDest, aDestLength,
                                                mScanClass, 
-                                               mShiftInTable, mMappingTable);
+                                               mShiftInTable, mMappingTable,
+                                               mErrBehavior == kOnError_Signal);
 }
 
 //----------------------------------------------------------------------
 // Class nsMultiTableDecoderSupport [implementation]
 
 nsMultiTableDecoderSupport::nsMultiTableDecoderSupport(
                             PRInt32 aTableCount,
                             const uRange * aRangeArray, 
@@ -268,17 +289,18 @@ NS_IMETHODIMP nsMultiTableDecoderSupport
                                                         PRInt32 * aSrcLength, 
                                                         PRUnichar * aDest, 
                                                         PRInt32 * aDestLength)
 {
   return nsUnicodeDecodeHelper::ConvertByMultiTable(aSrc, aSrcLength, 
                                                     aDest, aDestLength, 
                                                     mTableCount, mRangeArray,
                                                     mScanClassArray,
-                                                    mMappingTable);
+                                                    mMappingTable,
+                                                    mErrBehavior == kOnError_Signal);
 }
 
 //----------------------------------------------------------------------
 // Class nsOneByteDecoderSupport [implementation]
 
 nsOneByteDecoderSupport::nsOneByteDecoderSupport(
                          uMappingTable  * aMappingTable) 
 : nsBasicDecoderSupport()
@@ -304,17 +326,18 @@ NS_IMETHODIMP nsOneByteDecoderSupport::C
                        mMappingTable, mFastTable, ONE_BYTE_TABLE_SIZE);
     if (NS_FAILED(res)) return res;
     mFastTableCreated = PR_TRUE;
   }
 
   return nsUnicodeDecodeHelper::ConvertByFastTable(aSrc, aSrcLength, 
                                                    aDest, aDestLength, 
                                                    mFastTable,
-                                                   ONE_BYTE_TABLE_SIZE);
+                                                   ONE_BYTE_TABLE_SIZE,
+                                                   mErrBehavior == kOnError_Signal);
 }
 
 NS_IMETHODIMP nsOneByteDecoderSupport::GetMaxLength(const char * aSrc, 
                                                     PRInt32 aSrcLength, 
                                                     PRInt32 * aDestLength)
 {
   // single byte to Unicode converter
   *aDestLength = aSrcLength;
--- a/intl/uconv/util/nsUCSupport.h
+++ b/intl/uconv/util/nsUCSupport.h
@@ -105,16 +105,22 @@ public:
 
   /**
    * Class destructor.
    */
   virtual ~nsBasicDecoderSupport();
 
   //--------------------------------------------------------------------
   // Interface nsIUnicodeDecoder [declaration]
+
+  virtual void SetInputErrorBehavior(PRInt32 aBehavior);
+  virtual PRUnichar GetCharacterForUnMapped();
+
+protected:
+  PRInt32   mErrBehavior;
 };
 
 //----------------------------------------------------------------------
 // Class nsBufferDecoderSupport [declaration]
 
 /**
  * Support class for the Unicode decoders. 
  *
--- a/intl/uconv/util/nsUnicodeDecodeHelper.cpp
+++ b/intl/uconv/util/nsUnicodeDecodeHelper.cpp
@@ -44,17 +44,18 @@
 // Class nsUnicodeDecodeHelper [implementation]
 nsresult nsUnicodeDecodeHelper::ConvertByTable(
                                      const char * aSrc, 
                                      PRInt32 * aSrcLength, 
                                      PRUnichar * aDest, 
                                      PRInt32 * aDestLength, 
                                      uScanClassID aScanClass,
                                      uShiftInTable * aShiftInTable, 
-                                     uMappingTable  * aMappingTable)
+                                     uMappingTable  * aMappingTable,
+                                     PRBool aErrorSignal)
 {
   const char * src = aSrc;
   PRInt32 srcLen = *aSrcLength;
   PRUnichar * dest = aDest;
   PRUnichar * destEnd = aDest + *aDestLength;
 
   PRUnichar med;
   PRInt32 bcr; // byte count for read
@@ -77,16 +78,20 @@ nsresult nsUnicodeDecodeHelper::ConvertB
       break;
     }
 
     if (!uMapCode((uTable*) aMappingTable, static_cast<PRUint16>(med), reinterpret_cast<PRUint16*>(dest))) {
       if (med < 0x20) {
         // somehow some table miss the 0x00 - 0x20 part
         *dest = med;
       } else {
+        if (aErrorSignal) {
+          res = NS_ERROR_ILLEGAL_INPUT;
+          break;
+        }
         // Unicode replacement value for unmappable chars
         *dest = 0xfffd;
       }
     }
 
     src += bcr;
     srcLen -= bcr;
     dest++;
@@ -102,17 +107,18 @@ nsresult nsUnicodeDecodeHelper::ConvertB
 nsresult nsUnicodeDecodeHelper::ConvertByMultiTable(
                                      const char * aSrc, 
                                      PRInt32 * aSrcLength, 
                                      PRUnichar * aDest, 
                                      PRInt32 * aDestLength, 
                                      PRInt32 aTableCount, 
                                      const uRange * aRangeArray, 
                                      uScanClassID * aScanClassArray,
-                                     uMappingTable ** aMappingTable)
+                                     uMappingTable ** aMappingTable,
+                                     PRBool aErrorSignal)
 {
   PRUint8 * src = (PRUint8 *)aSrc;
   PRInt32 srcLen = *aSrcLength;
   PRUnichar * dest = aDest;
   PRUnichar * destEnd = aDest + *aDestLength;
 
   PRUnichar med;
   PRInt32 bcr; // byte count for read
@@ -138,17 +144,18 @@ nsresult nsUnicodeDecodeHelper::ConvertB
                           static_cast<PRUint16>(med), 
                           reinterpret_cast<PRUint16*>(dest)); 
         } // if (uScan ... )
       } // if Range
     } // for loop
 
     if(passRangeCheck && (! passScan))
     {
-      res = NS_OK_UDEC_MOREINPUT;
+      if (res != NS_ERROR_ILLEGAL_INPUT)
+        res = NS_OK_UDEC_MOREINPUT;
       break;
     }
     if(! done)
     {
       bcr = 1;
       if ((PRUint8)*src < 0x20) {
         // somehow some table miss the 0x00 - 0x20 part
         *dest = *src;
@@ -177,17 +184,25 @@ nsresult nsUnicodeDecodeHelper::ConvertB
                    break; 
                  }
                }
                break;
             }
           }
         }
         // treat it as NSBR if bcr == 1 and it is 0xa0
-        *dest = ((1==bcr)&&(*src == (PRUint8)0xa0 )) ? 0x00a0 : 0xfffd;
+        if ((1==bcr)&&(*src == (PRUint8)0xa0 )) {
+          *dest = 0x00a0;
+        } else {
+          if (aErrorSignal) {
+            res = NS_ERROR_ILLEGAL_INPUT;
+            break;
+          }
+          *dest = 0xfffd;
+        }
       }
     }
 
     src += bcr;
     srcLen -= bcr;
     dest++;
   } // while
 
@@ -199,32 +214,40 @@ nsresult nsUnicodeDecodeHelper::ConvertB
 }
 
 nsresult nsUnicodeDecodeHelper::ConvertByFastTable(
                                      const char * aSrc, 
                                      PRInt32 * aSrcLength, 
                                      PRUnichar * aDest, 
                                      PRInt32 * aDestLength, 
                                      const PRUnichar * aFastTable, 
-                                     PRInt32 aTableSize)
+                                     PRInt32 aTableSize,
+                                     PRBool aErrorSignal)
 {
   PRUint8 * src = (PRUint8 *)aSrc;
   PRUint8 * srcEnd = src;
   PRUnichar * dest = aDest;
 
   nsresult res;
   if (*aSrcLength > *aDestLength) {
     srcEnd += (*aDestLength);
     res = NS_PARTIAL_MORE_OUTPUT;
   } else {
     srcEnd += (*aSrcLength);
     res = NS_OK;
   }
 
-  for (; src<srcEnd;) *dest++ = aFastTable[*src++];
+  for (; src<srcEnd;) {
+    *dest = aFastTable[*src++];
+    if (*dest == 0xfffd && aErrorSignal) {
+      res = NS_ERROR_ILLEGAL_INPUT;
+      break;
+    }
+    dest++;
+  }
 
   *aSrcLength = src - (PRUint8 *)aSrc;
   *aDestLength  = dest - aDest;
   return res;
 }
 
 nsresult nsUnicodeDecodeHelper::CreateFastTable(
                                      uMappingTable  * aMappingTable,
--- a/intl/uconv/util/nsUnicodeDecodeHelper.h
+++ b/intl/uconv/util/nsUnicodeDecodeHelper.h
@@ -52,32 +52,33 @@ class nsUnicodeDecodeHelper
 public:
   /**
    * Converts data using a lookup table and optional shift table
    */
   static nsresult ConvertByTable(const char * aSrc, PRInt32 * aSrcLength, 
                                  PRUnichar * aDest, PRInt32 * aDestLength,
                                  uScanClassID aScanClass,
                                  uShiftInTable * aShiftInTable,
-                                 uMappingTable  * aMappingTable);
+                                 uMappingTable  * aMappingTable,
+                                 PRBool aErrorSignal = PR_FALSE);
 
   /**
    * Converts data using a set of lookup tables.
    */
   static nsresult ConvertByMultiTable(const char * aSrc, PRInt32 * aSrcLength,
       PRUnichar * aDest, PRInt32 * aDestLength, PRInt32 aTableCount, 
       const uRange * aRangeArray, uScanClassID * aScanClassArray,
-      uMappingTable ** aMappingTable);
+      uMappingTable ** aMappingTable, PRBool aErrorSignal = PR_FALSE);
 
   /**
    * Converts data using a fast lookup table.
    */
   static nsresult ConvertByFastTable(const char * aSrc, PRInt32 * aSrcLength, 
       PRUnichar * aDest, PRInt32 * aDestLength, const PRUnichar * aFastTable, 
-      PRInt32 aTableSize);
+      PRInt32 aTableSize, PRBool aErrorSignal);
 
   /**
    * Create a cache-like fast lookup table from a normal one.
    */
   static nsresult CreateFastTable(uMappingTable * aMappingTable,
       PRUnichar * aFastTable,  PRInt32 aTableSize);
 };
 
--- a/parser/htmlparser/src/nsExpatDriver.cpp
+++ b/parser/htmlparser/src/nsExpatDriver.cpp
@@ -1281,16 +1281,19 @@ nsExpatDriver::WillBuildModel(const CPar
                                      Driver_HandleUnparsedEntityDecl);
     XML_SetNotationDeclHandler(mExpatParser,
                                Driver_HandleNotationDecl);
   }
 
   // Set up the user data.
   XML_SetUserData(mExpatParser, this);
 
+  // XML must detect invalid character convertion
+  aParserContext.mScanner->OverrideReplacementCharacter(0xffff);
+
   return aSink->WillBuildModel();
 }
 
 NS_IMETHODIMP
 nsExpatDriver::BuildModel(nsIParser* aParser,
                           nsITokenizer* aTokenizer,
                           nsITokenObserver* anObserver,
                           nsIContentSink* aSink)
--- a/parser/htmlparser/src/nsScanner.cpp
+++ b/parser/htmlparser/src/nsScanner.cpp
@@ -103,16 +103,18 @@ nsScanner::nsScanner(const nsAString& an
     /* XXX see hack below, re: bug 182067 */
     memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
     mEndPosition = mCurrentPosition;
   }
   mMarkPosition = mCurrentPosition;
   mIncremental = PR_FALSE;
   mUnicodeDecoder = 0;
   mCharsetSource = kCharsetUninitialized;
+  mHasInvalidCharacter = PR_FALSE;
+  mReplacementCharacter = PRUnichar(0x0);
 }
 
 /**
  *  Use this constructor if you want i/o to be based on strings 
  *  the scanner receives. If you pass a null filename, you
  *  can still provide data to the scanner via append.
  *
  *  @update  gess 5/12/98
@@ -138,16 +140,18 @@ nsScanner::nsScanner(nsString& aFilename
   mEndPosition = mCurrentPosition;
 
   mIncremental = PR_TRUE;
   mFirstNonWhitespacePosition = -1;
   mCountRemaining = 0;
 
   mUnicodeDecoder = 0;
   mCharsetSource = kCharsetUninitialized;
+  mHasInvalidCharacter = PR_FALSE;
+  mReplacementCharacter = PRUnichar(0x0);
   SetDocumentCharset(aCharset, aSource);
 }
 
 nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , PRInt32 aSource)
 {
   if (aSource < mCharsetSource) // priority is lower the the current one , just
     return NS_OK;
 
@@ -179,18 +183,26 @@ nsresult nsScanner::SetDocumentCharset(c
     mCharset.Assign(charsetName);
   }
 
   mCharsetSource = aSource;
 
   NS_ASSERTION(nsParser::GetCharsetConverterManager(),
                "Must have the charset converter manager!");
 
-  return nsParser::GetCharsetConverterManager()->
+  res = nsParser::GetCharsetConverterManager()->
     GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
+  if (NS_SUCCEEDED(res) && mUnicodeDecoder)
+  {
+     // We need to detect conversion error of character to support XML
+     // encoding error.
+     mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
+  }
+
+  return res;
 }
 
 
 /**
  *  default destructor
  *  
  *  @update  gess 3/25/98
  *  @param   
@@ -298,35 +310,43 @@ nsresult nsScanner::Append(const char* a
     PRInt32 unicharBufLen = 0;
     mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen);
     nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1);
     NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY);
     start = unichars = buffer->DataStart();
 
     PRInt32 totalChars = 0;
     PRInt32 unicharLength = unicharBufLen;
+    PRInt32 errorPos = -1;
+
     do {
       PRInt32 srcLength = aLen;
       res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength);
 
       totalChars += unicharLength;
       // Continuation of failure case
       if(NS_FAILED(res)) {
-        // if we failed, we consume one byte, replace it with U+FFFD
-        // and try the conversion again.
+        // if we failed, we consume one byte, replace it with the replacement
+        // character and try the conversion again.
 
         // This is only needed because some decoders don't follow the
         // nsIUnicodeDecoder contract: they return a failure when *aDestLength
         // is 0 rather than the correct NS_OK_UDEC_MOREOUTPUT.  See bug 244177
         if ((unichars + unicharLength) >= buffer->DataEnd()) {
           NS_ERROR("Unexpected end of destination buffer");
           break;
         }
 
-        unichars[unicharLength++] = (PRUnichar)0xFFFD;
+        if (mReplacementCharacter == 0x0 && errorPos == -1) {
+          errorPos = totalChars;
+        }
+        unichars[unicharLength++] = mReplacementCharacter == 0x0 ?
+                                    mUnicodeDecoder->GetCharacterForUnMapped() :
+                                    mReplacementCharacter;
+
         unichars = unichars + unicharLength;
         unicharLength = unicharBufLen - (++totalChars);
 
         mUnicodeDecoder->Reset();
 
         if(((PRUint32) (srcLength + 1)) > aLen) {
           srcLength = aLen;
         }
@@ -339,17 +359,17 @@ nsresult nsScanner::Append(const char* a
       }
     } while (NS_FAILED(res) && (aLen > 0));
 
     buffer->SetDataLength(totalChars);
     // Don't propagate return code of unicode decoder
     // since it doesn't reflect on our success or failure
     // - Ref. bug 87110
     res = NS_OK; 
-    if (!AppendToBuffer(buffer, aRequest))
+    if (!AppendToBuffer(buffer, aRequest, errorPos))
       res = NS_ERROR_OUT_OF_MEMORY;
   }
   else {
     NS_WARNING("No decoder found.");
     res = NS_ERROR_FAILURE;
   }
 
   return res;
@@ -1138,17 +1158,18 @@ void nsScanner::ReplaceCharacter(nsScann
                                  PRUnichar aChar)
 {
   if (mSlidingBuffer) {
     mSlidingBuffer->ReplaceCharacter(aPosition, aChar);
   }
 }
 
 PRBool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf,
-                                 nsIRequest *aRequest)
+                                 nsIRequest *aRequest,
+                                 PRInt32 aErrorPos)
 {
   if (nsParser::sParserDataListeners && mParser &&
       NS_FAILED(mParser->DataAdded(Substring(aBuf->DataStart(),
                                              aBuf->DataEnd()), aRequest))) {
     // Don't actually append on failure.
 
     return mSlidingBuffer != nsnull;
   }
@@ -1166,16 +1187,22 @@ PRBool nsScanner::AppendToBuffer(nsScann
     mSlidingBuffer->AppendBuffer(aBuf);
     if (mCurrentPosition == mEndPosition) {
       mSlidingBuffer->BeginReading(mCurrentPosition);
     }
     mSlidingBuffer->EndReading(mEndPosition);
     mCountRemaining += aBuf->DataLength();
   }
 
+  if (aErrorPos != -1 && !mHasInvalidCharacter) {
+    mHasInvalidCharacter = PR_TRUE;
+    mFirstInvalidPosition = mCurrentPosition;
+    mFirstInvalidPosition.advance(aErrorPos);
+  }
+
   if (mFirstNonWhitespacePosition == -1) {
     nsScannerIterator iter(mCurrentPosition);
     nsScannerIterator end(mEndPosition);
 
     while (iter != end) {
       if (!nsCRT::IsAsciiSpace(*iter)) {
         mFirstNonWhitespacePosition = Distance(mCurrentPosition, iter);
 
@@ -1230,10 +1257,17 @@ nsString& nsScanner::GetFilename(void) {
  *  @return  
  */
 
 void nsScanner::SelfTest(void) {
 #ifdef _DEBUG
 #endif
 }
 
+void nsScanner::OverrideReplacementCharacter(PRUnichar aReplacementCharacter)
+{
+  mReplacementCharacter = aReplacementCharacter;
 
+  if (mHasInvalidCharacter) {
+    ReplaceCharacter(mFirstInvalidPosition, mReplacementCharacter);
+  }
+}
 
--- a/parser/htmlparser/src/nsScanner.h
+++ b/parser/htmlparser/src/nsScanner.h
@@ -310,36 +310,49 @@ class nsScanner {
         return mFirstNonWhitespacePosition;
       }
 
       void SetParser(nsParser *aParser)
       {
         mParser = aParser;
       }
 
+
+      /**
+       * Override replacement character used by nsIUnicodeDecoder.
+       * Default behavior is that it uses nsIUnicodeDecoder's mapping.
+       *
+       * @param aReplacementCharacter the replacement character
+       *        XML (expat) parser uses 0xffff
+       */
+      void OverrideReplacementCharacter(PRUnichar aReplacementCharacter);
+
   protected:
 
-      PRBool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest);
+      PRBool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest, PRInt32 aErrorPos = -1);
       PRBool AppendToBuffer(const nsAString& aStr)
       {
         nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr);
         if (!buf)
           return PR_FALSE;
         AppendToBuffer(buf, nsnull);
         return PR_TRUE;
       }
 
       nsScannerString*             mSlidingBuffer;
       nsScannerIterator            mCurrentPosition; // The position we will next read from in the scanner buffer
       nsScannerIterator            mMarkPosition;    // The position last marked (we may rewind to here)
       nsScannerIterator            mEndPosition;     // The current end of the scanner buffer
+      nsScannerIterator            mFirstInvalidPosition; // The position of the first invalid character that was detected
       nsString        mFilename;
       PRUint32        mCountRemaining; // The number of bytes still to be read
                                        // from the scanner buffer
       PRPackedBool    mIncremental;
+      PRPackedBool    mHasInvalidCharacter;
+      PRUnichar       mReplacementCharacter;
       PRInt32         mFirstNonWhitespacePosition;
       PRInt32         mCharsetSource;
       nsCString       mCharset;
       nsCOMPtr<nsIUnicodeDecoder> mUnicodeDecoder;
       nsParser        *mParser;
 
   private:
       nsScanner &operator =(const nsScanner &); // Not implemented.
--- a/parser/htmlparser/tests/mochitest/Makefile.in
+++ b/parser/htmlparser/tests/mochitest/Makefile.in
@@ -46,19 +46,21 @@ include $(topsrcdir)/config/rules.mk
 
 _TEST_FILES =	parser_datreader.js \
 		parser_web_testrunner.js \
 		html5lib_tree_dat1.txt \
 		html5lib_tree_dat2.txt \
 		html5lib_tree_dat3.txt \
 		html5_tree_construction_exceptions.js \
 		test_html5_tree_construction.html \
+		test_bug174351.html \
 	 	test_bug339350.xhtml \
 		test_bug358797.html \
 		test_bug396568.html \
 		test_bug418464.html \
 		test_bug460437.xhtml \
 		test_compatmode.html \
 		regressions.txt \
+		invalidchar.xml \
 		$(NULL)
 
 libs:: $(_TEST_FILES)
 	$(INSTALL) $(foreach f,$^,"$f") $(DEPTH)/_tests/testing/mochitest/tests/$(relativesrcdir)
new file mode 100644
--- /dev/null
+++ b/parser/htmlparser/tests/mochitest/invalidchar.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<root>
+ <fail> This is an invalid byte in UTF-8:  </fail>
+</root>
new file mode 100644
--- /dev/null
+++ b/parser/htmlparser/tests/mochitest/test_bug174351.html
@@ -0,0 +1,32 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=174351
+-->
+<head>
+  <title>Test for Bug 174351</title>
+  <script type="text/javascript" src="/MochiKit/MochiKit.js"></script>
+  <script type="text/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" />
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=174351">Mozilla Bug 174351</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script class="testbody" type="text/javascript">
+  var iframe = document.createElement('iframe');
+  iframe.src = "invalidchar.xml";
+  iframe.onload = function () {
+    var doc = document.getElementById('test').childNodes[1].contentDocument;
+    ok(doc.documentElement.tagName != "root", "Since XML has invalid enconding, must throw error");
+  };
+
+  document.getElementById('test').appendChild(iframe);
+</script>
+</pre>
+</body>
+</html>
+