Bug 539887 - Make document.written meta charset affect the charset of document.opened documents in the HTML5 parser. r=bnewman.
authorHenri Sivonen <hsivonen@iki.fi>
Tue, 09 Mar 2010 14:39:32 +0200
changeset 39511 28513d79140d64e4f6a4c14ed2e492990524397a
parent 39510 1024d83e76614cf7247c68e4e4d1ec9686f8c706
child 39512 ee56d65affa35502fe19f3d97061878aa7753ef3
push id12240
push userhsivonen@iki.fi
push dateWed, 17 Mar 2010 08:29:32 +0000
treeherdermozilla-central@8606d3764288 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbnewman
bugs539887
milestone1.9.3a4pre
Bug 539887 - Make document.written meta charset affect the charset of document.opened documents in the HTML5 parser. r=bnewman.
parser/html/nsAHtml5EncodingDeclarationHandler.h
parser/html/nsHtml5AttributeName.cpp
parser/html/nsHtml5AttributeName.h
parser/html/nsHtml5ElementName.cpp
parser/html/nsHtml5ElementName.h
parser/html/nsHtml5HtmlAttributes.cpp
parser/html/nsHtml5HtmlAttributes.h
parser/html/nsHtml5MetaScanner.cpp
parser/html/nsHtml5MetaScanner.h
parser/html/nsHtml5Parser.cpp
parser/html/nsHtml5Parser.h
parser/html/nsHtml5Portability.h
parser/html/nsHtml5StackNode.cpp
parser/html/nsHtml5StackNode.h
parser/html/nsHtml5StateSnapshot.cpp
parser/html/nsHtml5StateSnapshot.h
parser/html/nsHtml5StreamParser.cpp
parser/html/nsHtml5StreamParser.h
parser/html/nsHtml5Tokenizer.cpp
parser/html/nsHtml5Tokenizer.h
parser/html/nsHtml5TreeBuilderCppSupplement.h
parser/html/nsHtml5TreeBuilderHSupplement.h
parser/html/nsHtml5TreeOpExecutor.cpp
parser/html/nsHtml5TreeOpExecutor.h
parser/html/nsHtml5TreeOperation.cpp
parser/html/nsHtml5TreeOperation.h
parser/html/nsHtml5UTF16Buffer.cpp
parser/html/nsHtml5UTF16Buffer.h
new file mode 100644
--- /dev/null
+++ b/parser/html/nsAHtml5EncodingDeclarationHandler.h
@@ -0,0 +1,50 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2010
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef nsAHtml5EncodingDeclarationHandler_h_
+#define nsAHtml5EncodingDeclarationHandler_h_
+
+class nsAHtml5EncodingDeclarationHandler {
+  public:
+  
+    virtual void internalEncodingDeclaration(nsString* aEncoding) = 0;
+    
+    virtual ~nsAHtml5EncodingDeclarationHandler() {
+    }
+};
+
+#endif /* nsAHtml5EncodingDeclarationHandler_h_ */
--- a/parser/html/nsHtml5AttributeName.cpp
+++ b/parser/html/nsHtml5AttributeName.cpp
@@ -38,16 +38,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 #include "nsHtml5Tokenizer.h"
 #include "nsHtml5TreeBuilder.h"
 #include "nsHtml5MetaScanner.h"
 #include "nsHtml5ElementName.h"
 #include "nsHtml5HtmlAttributes.h"
 #include "nsHtml5StackNode.h"
 #include "nsHtml5UTF16Buffer.h"
--- a/parser/html/nsHtml5AttributeName.h
+++ b/parser/html/nsHtml5AttributeName.h
@@ -39,16 +39,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 class nsHtml5StreamParser;
 class nsHtml5SpeculativeLoader;
 
 class nsHtml5Tokenizer;
 class nsHtml5TreeBuilder;
 class nsHtml5MetaScanner;
 class nsHtml5ElementName;
--- a/parser/html/nsHtml5ElementName.cpp
+++ b/parser/html/nsHtml5ElementName.cpp
@@ -38,16 +38,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 #include "nsHtml5Tokenizer.h"
 #include "nsHtml5TreeBuilder.h"
 #include "nsHtml5MetaScanner.h"
 #include "nsHtml5AttributeName.h"
 #include "nsHtml5HtmlAttributes.h"
 #include "nsHtml5StackNode.h"
 #include "nsHtml5UTF16Buffer.h"
--- a/parser/html/nsHtml5ElementName.h
+++ b/parser/html/nsHtml5ElementName.h
@@ -39,16 +39,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 class nsHtml5StreamParser;
 class nsHtml5SpeculativeLoader;
 
 class nsHtml5Tokenizer;
 class nsHtml5TreeBuilder;
 class nsHtml5MetaScanner;
 class nsHtml5AttributeName;
--- a/parser/html/nsHtml5HtmlAttributes.cpp
+++ b/parser/html/nsHtml5HtmlAttributes.cpp
@@ -39,16 +39,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 #include "nsHtml5Tokenizer.h"
 #include "nsHtml5TreeBuilder.h"
 #include "nsHtml5MetaScanner.h"
 #include "nsHtml5AttributeName.h"
 #include "nsHtml5ElementName.h"
 #include "nsHtml5StackNode.h"
 #include "nsHtml5UTF16Buffer.h"
--- a/parser/html/nsHtml5HtmlAttributes.h
+++ b/parser/html/nsHtml5HtmlAttributes.h
@@ -40,16 +40,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 class nsHtml5StreamParser;
 class nsHtml5SpeculativeLoader;
 
 class nsHtml5Tokenizer;
 class nsHtml5TreeBuilder;
 class nsHtml5MetaScanner;
 class nsHtml5AttributeName;
--- a/parser/html/nsHtml5MetaScanner.cpp
+++ b/parser/html/nsHtml5MetaScanner.cpp
@@ -39,16 +39,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 #include "nsHtml5Tokenizer.h"
 #include "nsHtml5TreeBuilder.h"
 #include "nsHtml5AttributeName.h"
 #include "nsHtml5ElementName.h"
 #include "nsHtml5HtmlAttributes.h"
 #include "nsHtml5StackNode.h"
 #include "nsHtml5UTF16Buffer.h"
--- a/parser/html/nsHtml5MetaScanner.h
+++ b/parser/html/nsHtml5MetaScanner.h
@@ -40,16 +40,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 class nsHtml5StreamParser;
 class nsHtml5SpeculativeLoader;
 
 class nsHtml5Tokenizer;
 class nsHtml5TreeBuilder;
 class nsHtml5AttributeName;
 class nsHtml5ElementName;
--- a/parser/html/nsHtml5Parser.cpp
+++ b/parser/html/nsHtml5Parser.cpp
@@ -55,16 +55,19 @@
 #include "nsIScriptGlobalObjectOwner.h"
 #include "nsIScriptSecurityManager.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5Tokenizer.h"
 #include "nsHtml5UTF16Buffer.h"
 #include "nsHtml5TreeBuilder.h"
 #include "nsHtml5Parser.h"
 #include "nsHtml5AtomTable.h"
+#include "nsICharsetAlias.h"
+
+static NS_DEFINE_CID(kCharsetAliasCID, NS_CHARSETALIAS_CID);
 
 NS_INTERFACE_TABLE_HEAD(nsHtml5Parser)
   NS_INTERFACE_TABLE2(nsHtml5Parser, nsIParser, nsISupportsWeakReference)
   NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5Parser)
 NS_INTERFACE_MAP_END
 
 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5Parser)
 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5Parser)
@@ -134,17 +137,18 @@ nsHtml5Parser::SetCommand(eParserCommand
 
 NS_IMETHODIMP_(void)
 nsHtml5Parser::SetDocumentCharset(const nsACString& aCharset, PRInt32 aCharsetSource)
 {
   NS_PRECONDITION(!mExecutor->HasStarted(),
                   "Document charset set too late.");
   NS_PRECONDITION(mStreamParser, "Tried to set charset on a script-only parser.");
   mStreamParser->SetDocumentCharset(aCharset, aCharsetSource);
-  mExecutor->SetDocumentCharset((nsACString&)aCharset);
+  mExecutor->SetDocumentCharsetAndSource((nsACString&)aCharset, aCharsetSource);
+  mCharsetSource = aCharsetSource; // used for the document.open() case only
 }
 
 NS_IMETHODIMP_(void)
 nsHtml5Parser::SetParserFilter(nsIParserFilter* aFilter)
 {
   NS_ERROR("Attempt to set a parser filter on HTML5 parser.");
 }
 
@@ -261,17 +265,19 @@ nsHtml5Parser::Parse(const nsAString& aS
   // required grips to these, too.
   nsRefPtr<nsHtml5StreamParser> streamKungFuDeathGrip(mStreamParser);
   nsRefPtr<nsHtml5TreeOpExecutor> treeOpKungFuDeathGrip(mExecutor);
 
   // Return early if the parser has processed EOF
   if (!mExecutor->HasStarted()) {
     NS_ASSERTION(!mStreamParser,
                  "Had stream parser but document.write started life cycle.");
+    // This is the first document.write() on a document.open()ed document
     mExecutor->SetParser(this);
+    mTokenizer->setEncodingDeclarationHandler(this);
     mTreeBuilder->setScriptingEnabled(mExecutor->IsScriptEnabled());
     mTokenizer->start();
     mExecutor->Start();
     /*
      * If you move the following line, be very careful not to cause 
      * WillBuildModel to be called before the document has had its 
      * script global object set.
      */
@@ -683,8 +689,40 @@ nsHtml5Parser::InitializeDocWriteParserS
 
 void
 nsHtml5Parser::ContinueAfterFailedCharsetSwitch()
 {
   NS_PRECONDITION(mStreamParser, 
     "Tried to continue after failed charset switch without a stream parser");
   mStreamParser->ContinueAfterFailedCharsetSwitch();
 }
+
+void
+nsHtml5Parser::internalEncodingDeclaration(nsString* aEncoding)
+{
+  // Note: This handler is only installed when parsing a document.open()ed doc
+  // See bug 539887 and bug 255820.
+  if (mCharsetSource >= kCharsetFromMetaTag) { // this threshold corresponds to "confident" in the HTML5 spec
+    return;
+  }
+
+  nsresult rv = NS_OK;
+  nsCOMPtr<nsICharsetAlias> calias(do_GetService(kCharsetAliasCID, &rv));
+  if (NS_FAILED(rv)) {
+    NS_NOTREACHED("Charset alias service not available.");
+    return;
+  }
+  nsCAutoString newEncoding;
+  CopyUTF16toUTF8(*aEncoding, newEncoding);
+  
+  // XXX check HTML5 non-IANA aliases here
+  
+  nsCAutoString preferred;
+  
+  rv = calias->GetPreferred(newEncoding, preferred);
+  if (NS_FAILED(rv)) {
+    // the encoding name is bogus
+    return;
+  }
+  
+  mCharsetSource = kCharsetFromMetaTag;
+  mTreeBuilder->SetDocumentCharset(preferred, mCharsetSource);
+}
--- a/parser/html/nsHtml5Parser.h
+++ b/parser/html/nsHtml5Parser.h
@@ -57,19 +57,21 @@
 #include "nsCycleCollectionParticipant.h"
 #include "nsIInputStream.h"
 #include "nsDetectionConfident.h"
 #include "nsHtml5UTF16Buffer.h"
 #include "nsHtml5TreeOpExecutor.h"
 #include "nsHtml5StreamParser.h"
 #include "nsHtml5AtomTable.h"
 #include "nsWeakReference.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 class nsHtml5Parser : public nsIParser,
-                      public nsSupportsWeakReference
+                      public nsSupportsWeakReference,
+                      public nsAHtml5EncodingDeclarationHandler
 {
   public:
     NS_DECL_AND_IMPL_ZEROING_OPERATOR_NEW
     NS_DECL_CYCLE_COLLECTING_ISUPPORTS
 
     NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsHtml5Parser, nsIParser)
 
     nsHtml5Parser();
@@ -272,16 +274,22 @@ class nsHtml5Parser : public nsIParser,
 
     /**
      * True if this is a script-created HTML5 parser.
      */
     virtual PRBool IsScriptCreated();
 
     /* End nsIParser  */
 
+    // nsAHtml5EncodingDeclarationHandler
+    /**
+     * Tree builder uses this to report a late <meta charset>
+     */
+    virtual void internalEncodingDeclaration(nsString* aEncoding);
+
     // Not from an external interface
     // Non-inherited methods
 
   public:
 
     /**
      * Initializes the parser to load from a channel.
      */
@@ -315,16 +323,23 @@ class nsHtml5Parser : public nsIParser,
     /**
      * Parse until pending data is exhausted or a script blocks the parser
      */
     void ParseUntilBlocked();
 
     // State variables
 
     /**
+     * The charset source. This variable is used for script-created parsers
+     * only. When parsing from the stream, this variable can have a bogus 
+     * value.
+     */
+    PRInt32                       mCharsetSource;
+
+    /**
      * Whether the last character tokenized was a carriage return (for CRLF)
      */
     PRBool                        mLastWasCR;
 
     /**
      * The parser is in the fragment mode
      */
     PRBool                        mFragmentMode;
--- a/parser/html/nsHtml5Portability.h
+++ b/parser/html/nsHtml5Portability.h
@@ -39,16 +39,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 class nsHtml5StreamParser;
 class nsHtml5SpeculativeLoader;
 
 class nsHtml5Tokenizer;
 class nsHtml5TreeBuilder;
 class nsHtml5MetaScanner;
 class nsHtml5AttributeName;
--- a/parser/html/nsHtml5StackNode.cpp
+++ b/parser/html/nsHtml5StackNode.cpp
@@ -39,16 +39,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 #include "nsHtml5Tokenizer.h"
 #include "nsHtml5TreeBuilder.h"
 #include "nsHtml5MetaScanner.h"
 #include "nsHtml5AttributeName.h"
 #include "nsHtml5ElementName.h"
 #include "nsHtml5HtmlAttributes.h"
 #include "nsHtml5UTF16Buffer.h"
--- a/parser/html/nsHtml5StackNode.h
+++ b/parser/html/nsHtml5StackNode.h
@@ -40,16 +40,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 class nsHtml5StreamParser;
 class nsHtml5SpeculativeLoader;
 
 class nsHtml5Tokenizer;
 class nsHtml5TreeBuilder;
 class nsHtml5MetaScanner;
 class nsHtml5AttributeName;
--- a/parser/html/nsHtml5StateSnapshot.cpp
+++ b/parser/html/nsHtml5StateSnapshot.cpp
@@ -38,16 +38,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 #include "nsHtml5Tokenizer.h"
 #include "nsHtml5TreeBuilder.h"
 #include "nsHtml5MetaScanner.h"
 #include "nsHtml5AttributeName.h"
 #include "nsHtml5ElementName.h"
 #include "nsHtml5HtmlAttributes.h"
 #include "nsHtml5StackNode.h"
--- a/parser/html/nsHtml5StateSnapshot.h
+++ b/parser/html/nsHtml5StateSnapshot.h
@@ -39,16 +39,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 class nsHtml5StreamParser;
 class nsHtml5SpeculativeLoader;
 
 class nsHtml5Tokenizer;
 class nsHtml5TreeBuilder;
 class nsHtml5MetaScanner;
 class nsHtml5AttributeName;
--- a/parser/html/nsHtml5StreamParser.cpp
+++ b/parser/html/nsHtml5StreamParser.cpp
@@ -35,16 +35,17 @@
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 #include "nsHtml5StreamParser.h"
 #include "nsICharsetConverterManager.h"
+#include "nsICharsetAlias.h"
 #include "nsServiceManagerUtils.h"
 #include "nsEncoderDecoderUtils.h"
 #include "nsContentUtils.h"
 #include "nsHtml5Tokenizer.h"
 #include "nsIHttpChannel.h"
 #include "nsHtml5Parser.h"
 #include "nsHtml5TreeBuilder.h"
 #include "nsHtml5AtomTable.h"
@@ -209,17 +210,17 @@ nsHtml5StreamParser::GetChannel(nsIChann
 
 NS_IMETHODIMP
 nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
 {
   NS_ASSERTION(IsParserThread(), "Wrong thread!");
   if (aConf == eBestAnswer || aConf == eSureAnswer) {
     mCharset.Assign(aCharset);
     mCharsetSource = kCharsetFromAutoDetection;
-    mTreeBuilder->SetDocumentCharset(mCharset);
+    mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
   }
   return NS_OK;
 }
 
 nsresult
 nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const PRUint8* aFromSegment, // can be null
                                                                           PRUint32 aCount,
                                                                           PRUint32* aWriteCount)
@@ -228,17 +229,17 @@ nsHtml5StreamParser::SetupDecodingAndWri
   nsresult rv = NS_OK;
   nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
   NS_ENSURE_SUCCESS(rv, rv);
   rv = convManager->GetUnicodeDecoder(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
   if (rv == NS_ERROR_UCONV_NOCONV) {
     mCharset.Assign("windows-1252"); // lower case is the raw form
     mCharsetSource = kCharsetFromWeakDocTypeDefault;
     rv = convManager->GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
-    mTreeBuilder->SetDocumentCharset(mCharset);
+    mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
   }
   NS_ENSURE_SUCCESS(rv, rv);
   mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
   return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
 }
 
 nsresult
 nsHtml5StreamParser::WriteSniffingBufferAndCurrentSegment(const PRUint8* aFromSegment, // can be null
@@ -266,17 +267,17 @@ nsHtml5StreamParser::SetupDecodingFromBo
   NS_ASSERTION(IsParserThread(), "Wrong thread!");
   nsresult rv = NS_OK;
   nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
   NS_ENSURE_SUCCESS(rv, rv);
   rv = convManager->GetUnicodeDecoderRaw(aDecoderCharsetName, getter_AddRefs(mUnicodeDecoder));
   NS_ENSURE_SUCCESS(rv, rv);
   mCharset.Assign(aCharsetName);
   mCharsetSource = kCharsetFromByteOrderMark;
-  mTreeBuilder->SetDocumentCharset(mCharset);
+  mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
   mSniffingBuffer = nsnull;
   mMetaScanner = nsnull;
   mBomState = BOM_SNIFFING_OVER;
   return rv;
 }
 
 nsresult
 nsHtml5StreamParser::FinalizeSniffing(const PRUint8* aFromSegment, // can be null
@@ -304,17 +305,17 @@ nsHtml5StreamParser::FinalizeSniffing(co
     rv = mChardet->Done();
     NS_ENSURE_SUCCESS(rv, rv);
     // fall thru; callback may have changed charset  
   }
   if (mCharsetSource == kCharsetUninitialized) {
     // Hopefully this case is never needed, but dealing with it anyway
     mCharset.Assign("windows-1252");
     mCharsetSource = kCharsetFromWeakDocTypeDefault;
-    mTreeBuilder->SetDocumentCharset(mCharset);
+    mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
   }
   return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
 }
 
 nsresult
 nsHtml5StreamParser::SniffStreamBytes(const PRUint8* aFromSegment,
                                       PRUint32 aCount,
                                       PRUint32* aWriteCount)
@@ -399,30 +400,30 @@ nsHtml5StreamParser::SniffStreamBytes(co
     // this is the last buffer
     PRUint32 countToSniffingLimit = NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE - mSniffingLength;
     nsHtml5ByteReadable readable(aFromSegment, aFromSegment + countToSniffingLimit);
     mMetaScanner->sniff(&readable, getter_AddRefs(mUnicodeDecoder), mCharset);
     if (mUnicodeDecoder) {
       mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
       // meta scan successful
       mCharsetSource = kCharsetFromMetaPrescan;
-      mTreeBuilder->SetDocumentCharset(mCharset);
+      mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
       mMetaScanner = nsnull;
       return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
     }
     return FinalizeSniffing(aFromSegment, aCount, aWriteCount, countToSniffingLimit);
   }
 
   // not the last buffer
   nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount);
   mMetaScanner->sniff(&readable, getter_AddRefs(mUnicodeDecoder), mCharset);
   if (mUnicodeDecoder) {
     // meta scan successful
     mCharsetSource = kCharsetFromMetaPrescan;
-    mTreeBuilder->SetDocumentCharset(mCharset);
+    mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
     mMetaScanner = nsnull;
     return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
   }
   if (!mSniffingBuffer) {
     mSniffingBuffer = new PRUint8[NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE];
   }
   memcpy(mSniffingBuffer + mSniffingLength, aFromSegment, aCount);
   mSniffingLength += aCount;
--- a/parser/html/nsHtml5StreamParser.h
+++ b/parser/html/nsHtml5StreamParser.h
@@ -43,22 +43,22 @@
 #include "nsCOMPtr.h"
 #include "nsIStreamListener.h"
 #include "nsICharsetDetectionObserver.h"
 #include "nsHtml5MetaScanner.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsHtml5TreeOpExecutor.h"
 #include "nsHtml5UTF16Buffer.h"
 #include "nsIInputStream.h"
-#include "nsICharsetAlias.h"
 #include "mozilla/Mutex.h"
 #include "nsHtml5AtomTable.h"
 #include "nsHtml5Speculation.h"
 #include "nsITimer.h"
 #include "nsICharsetDetector.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 class nsHtml5Parser;
 
 #define NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE 1024
 #define NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE 1024
 
 enum eBomState {
   /**
@@ -98,17 +98,19 @@ enum eBomState {
 
 enum eHtml5StreamState {
   STREAM_NOT_STARTED = 0,
   STREAM_BEING_READ = 1,
   STREAM_ENDED = 2
 };
 
 class nsHtml5StreamParser : public nsIStreamListener,
-                            public nsICharsetDetectionObserver {
+                            public nsICharsetDetectionObserver,
+                            public nsAHtml5EncodingDeclarationHandler
+{
 
   friend class nsHtml5RequestStopper;
   friend class nsHtml5DataAvailable;
   friend class nsHtml5StreamParserContinuation;
   friend class nsHtml5StreamParserTimerFlusher;
 
   public:
     NS_DECL_AND_IMPL_ZEROING_OPERATOR_NEW
@@ -128,21 +130,21 @@ class nsHtml5StreamParser : public nsISt
     NS_DECL_NSISTREAMLISTENER
     
     // nsICharsetDetectionObserver
     /**
      * Chardet calls this to report the detection result
      */
     NS_IMETHOD Notify(const char* aCharset, nsDetectionConfident aConf);
 
-    // EncodingDeclarationHandler
+    // nsAHtml5EncodingDeclarationHandler
     /**
      * Tree builder uses this to report a late <meta charset>
      */
-    void internalEncodingDeclaration(nsString* aEncoding);
+    virtual void internalEncodingDeclaration(nsString* aEncoding);
 
     // Not from an external interface
 
     /**
      *  Call this method once you've created a parser, and want to instruct it
      *  about what charset to load
      *
      *  @param   aCharset the charset of a document
--- a/parser/html/nsHtml5Tokenizer.cpp
+++ b/parser/html/nsHtml5Tokenizer.cpp
@@ -41,16 +41,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 #include "nsHtml5TreeBuilder.h"
 #include "nsHtml5MetaScanner.h"
 #include "nsHtml5AttributeName.h"
 #include "nsHtml5ElementName.h"
 #include "nsHtml5HtmlAttributes.h"
 #include "nsHtml5StackNode.h"
 #include "nsHtml5UTF16Buffer.h"
@@ -3971,17 +3972,17 @@ nsHtml5Tokenizer::initializeWithoutStart
   confident = PR_FALSE;
   strBuf = jArray<PRUnichar,PRInt32>(64);
   longStrBuf = jArray<PRUnichar,PRInt32>(1024);
   line = 1;
   resetToDataState();
 }
 
 void 
-nsHtml5Tokenizer::setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler)
+nsHtml5Tokenizer::setEncodingDeclarationHandler(nsAHtml5EncodingDeclarationHandler* encodingDeclarationHandler)
 {
   this->encodingDeclarationHandler = encodingDeclarationHandler;
 }
 
 void
 nsHtml5Tokenizer::initializeStatics()
 {
   TITLE_ARR = jArray<PRUnichar,PRInt32>((PRUnichar*)TITLE_ARR_DATA, 5);
--- a/parser/html/nsHtml5Tokenizer.h
+++ b/parser/html/nsHtml5Tokenizer.h
@@ -42,16 +42,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 class nsHtml5StreamParser;
 class nsHtml5SpeculativeLoader;
 
 class nsHtml5TreeBuilder;
 class nsHtml5MetaScanner;
 class nsHtml5AttributeName;
 class nsHtml5ElementName;
@@ -80,17 +81,17 @@ class nsHtml5Tokenizer
     static jArray<PRUnichar,PRInt32> XMP_ARR;
     static jArray<PRUnichar,PRInt32> TEXTAREA_ARR;
     static jArray<PRUnichar,PRInt32> IFRAME_ARR;
     static jArray<PRUnichar,PRInt32> NOEMBED_ARR;
     static jArray<PRUnichar,PRInt32> NOSCRIPT_ARR;
     static jArray<PRUnichar,PRInt32> NOFRAMES_ARR;
   protected:
     nsHtml5TreeBuilder* tokenHandler;
-    nsHtml5StreamParser* encodingDeclarationHandler;
+    nsAHtml5EncodingDeclarationHandler* encodingDeclarationHandler;
     PRBool lastCR;
     PRInt32 stateSave;
   private:
     PRInt32 returnStateSave;
   protected:
     PRInt32 index;
   private:
     PRBool forceQuirks;
@@ -260,17 +261,17 @@ class nsHtml5Tokenizer
     PRBool isNextCharOnNewLine();
     PRBool isPrevCR();
     PRInt32 getLine();
     PRInt32 getCol();
     PRBool isInDataState();
     void resetToDataState();
     void loadState(nsHtml5Tokenizer* other);
     void initializeWithoutStarting();
-    void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler);
+    void setEncodingDeclarationHandler(nsAHtml5EncodingDeclarationHandler* encodingDeclarationHandler);
     static void initializeStatics();
     static void releaseStatics();
 };
 
 #ifdef nsHtml5Tokenizer_cpp__
 PRUnichar nsHtml5Tokenizer::LT_GT[] = { '<', '>' };
 PRUnichar nsHtml5Tokenizer::LT_SOLIDUS[] = { '<', '/' };
 PRUnichar nsHtml5Tokenizer::RSQB_RSQB[] = { ']', ']' };
--- a/parser/html/nsHtml5TreeBuilderCppSupplement.h
+++ b/parser/html/nsHtml5TreeBuilderCppSupplement.h
@@ -641,21 +641,22 @@ nsHtml5TreeBuilder::Flush()
   PRBool hasOps = !mOpQueue.IsEmpty();
   if (hasOps) {
     mOpSink->MoveOpsFrom(mOpQueue);
   }
   return hasOps;
 }
 
 void
-nsHtml5TreeBuilder::SetDocumentCharset(nsACString& aCharset)
+nsHtml5TreeBuilder::SetDocumentCharset(nsACString& aCharset, 
+                                       PRInt32 aCharsetSource)
 {
   nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
   NS_ASSERTION(treeOp, "Tree op allocation failed.");
-  treeOp->Init(eTreeOpSetDocumentCharset, aCharset);  
+  treeOp->Init(eTreeOpSetDocumentCharset, aCharset, aCharsetSource);  
 }
 
 void
 nsHtml5TreeBuilder::StreamEnded()
 {
   // The fragment mode calls DidBuildModel from nsHtml5Parser. 
   // Letting DidBuildModel be called from the executor in the fragment case
   // confuses the EndLoad logic of nsHTMLDocument, since nsHTMLDocument
--- a/parser/html/nsHtml5TreeBuilderHSupplement.h
+++ b/parser/html/nsHtml5TreeBuilderHSupplement.h
@@ -80,17 +80,17 @@
     PRBool HasSpeculativeLoader() {
       return !!mSpeculativeLoader;
     }
     
     void DropSpeculativeLoader();
 
     PRBool Flush();
     
-    void SetDocumentCharset(nsACString& aCharset);
+    void SetDocumentCharset(nsACString& aCharset, PRInt32 aCharsetSource);
 
     void StreamEnded();
 
     void NeedsCharsetSwitchTo(const nsACString& aEncoding);
 
     void AddSnapshotToScript(nsAHtml5TreeBuilderState* aSnapshot, PRInt32 aLine);
 
     inline void Dispatch(nsIRunnable* aEvent) {
--- a/parser/html/nsHtml5TreeOpExecutor.cpp
+++ b/parser/html/nsHtml5TreeOpExecutor.cpp
@@ -196,56 +196,58 @@ nsHtml5TreeOpExecutor::SetParser(nsIPars
   return NS_OK;
 }
 
 void
 nsHtml5TreeOpExecutor::FlushPendingNotifications(mozFlushType aType)
 {
 }
 
-NS_IMETHODIMP
-nsHtml5TreeOpExecutor::SetDocumentCharset(nsACString& aCharset)
+void
+nsHtml5TreeOpExecutor::SetDocumentCharsetAndSource(nsACString& aCharset, PRInt32 aCharsetSource)
 {
+  if (mDocument) {
+    mDocument->SetDocumentCharacterSetSource(aCharsetSource);
+    mDocument->SetDocumentCharacterSet(aCharset);
+  }
   if (mDocShell) {
     // the following logic to get muCV is copied from
     // nsHTMLDocument::StartDocumentLoad
     // We need to call muCV->SetPrevDocCharacterSet here in case
     // the charset is detected by parser DetectMetaTag
     nsCOMPtr<nsIMarkupDocumentViewer> muCV;
     nsCOMPtr<nsIContentViewer> cv;
     mDocShell->GetContentViewer(getter_AddRefs(cv));
     if (cv) {
       muCV = do_QueryInterface(cv);
     } else {
       // in this block of code, if we get an error result, we return
       // it but if we get a null pointer, that's perfectly legal for
       // parent and parentContentViewer
       nsCOMPtr<nsIDocShellTreeItem> docShellAsItem =
         do_QueryInterface(mDocShell);
-      NS_ENSURE_TRUE(docShellAsItem, NS_ERROR_FAILURE);
+      if (!docShellAsItem) {
+    	  return;
+      }
       nsCOMPtr<nsIDocShellTreeItem> parentAsItem;
       docShellAsItem->GetSameTypeParent(getter_AddRefs(parentAsItem));
       nsCOMPtr<nsIDocShell> parent(do_QueryInterface(parentAsItem));
       if (parent) {
         nsCOMPtr<nsIContentViewer> parentContentViewer;
         nsresult rv =
           parent->GetContentViewer(getter_AddRefs(parentContentViewer));
         if (NS_SUCCEEDED(rv) && parentContentViewer) {
           muCV = do_QueryInterface(parentContentViewer);
         }
       }
     }
     if (muCV) {
       muCV->SetPrevDocCharacterSet(aCharset);
     }
   }
-  if (mDocument) {
-    mDocument->SetDocumentCharacterSet(aCharset);
-  }
-  return NS_OK;
 }
 
 nsISupports*
 nsHtml5TreeOpExecutor::GetTarget()
 {
   return mDocument;
 }
 
--- a/parser/html/nsHtml5TreeOpExecutor.h
+++ b/parser/html/nsHtml5TreeOpExecutor.h
@@ -161,19 +161,22 @@ class nsHtml5TreeOpExecutor : public nsC
     NS_IMETHOD SetParser(nsIParser* aParser);
 
     /**
      * No-op for backwards compat.
      */
     virtual void FlushPendingNotifications(mozFlushType aType);
 
     /**
-     * Sets mCharset
+     * Don't call. For interface compat only.
      */
-    NS_IMETHOD SetDocumentCharset(nsACString& aCharset);
+    NS_IMETHOD SetDocumentCharset(nsACString& aCharset) {
+    	NS_NOTREACHED("No one should call this.");
+    	return NS_ERROR_NOT_IMPLEMENTED;
+    }
 
     /**
      * Returns the document.
      */
     virtual nsISupports *GetTarget();
   
     // nsContentSink methods
     virtual nsresult ProcessBASETag(nsIContent* aContent);
@@ -204,16 +207,20 @@ class nsHtml5TreeOpExecutor : public nsC
       mDocumentBaseURI = mDocument->GetBaseURI();
       mHasProcessedBase = PR_TRUE;
     }
     
     void SetNodeInfoManager(nsNodeInfoManager* aManager) {
       mNodeInfoManager = aManager;
     }
     
+    // Not from interface
+
+    void SetDocumentCharsetAndSource(nsACString& aCharset, PRInt32 aCharsetSource);
+
     void SetStreamParser(nsHtml5StreamParser* aStreamParser) {
       mStreamParser = aStreamParser;
     }
     
     void InitializeDocWriteParserState(nsAHtml5TreeBuilderState* aState, PRInt32 aLine);
 
     PRBool IsScriptEnabled();
 
--- a/parser/html/nsHtml5TreeOperation.cpp
+++ b/parser/html/nsHtml5TreeOperation.cpp
@@ -574,18 +574,19 @@ nsHtml5TreeOperation::Perform(nsHtml5Tre
       return rv;
     }
     case eTreeOpFlushPendingAppendNotifications: {
       aBuilder->FlushPendingAppendNotifications();
       return rv;
     }
     case eTreeOpSetDocumentCharset: {
       char* str = mOne.charPtr;
+      PRInt32 charsetSource = mInt;
       nsDependentCString dependentString(str);
-      aBuilder->SetDocumentCharset(dependentString);
+      aBuilder->SetDocumentCharsetAndSource(dependentString, charsetSource);
       return rv;
     }
     case eTreeOpNeedsCharsetSwitchTo: {
       char* str = mOne.charPtr;
       aBuilder->NeedsCharsetSwitchTo(str);
       return rv;    
     }
     case eTreeOpUpdateStyleSheet: {
--- a/parser/html/nsHtml5TreeOperation.h
+++ b/parser/html/nsHtml5TreeOperation.h
@@ -133,16 +133,23 @@ class nsHtml5TreeOperation {
       NS_PRECONDITION(mOpCode == eTreeOpUninitialized,
         "Op code must be uninitialized when initializing.");
       NS_PRECONDITION(aNode, "Initialized tree op with null node.");
       NS_PRECONDITION(aParent, "Initialized tree op with null parent.");
       mOpCode = aOpCode;
       mOne.node = aNode;
       mTwo.node = aParent;
     }
+    
+    inline void Init(eHtml5TreeOperation aOpCode, 
+                     const nsACString& aString,
+                     PRInt32 aInt32) {
+      Init(aOpCode, aString);
+      mInt = aInt32;
+    }
 
     inline void Init(eHtml5TreeOperation aOpCode,
                      nsIContent** aNode,
                      nsIContent** aParent, 
                      nsIContent** aTable) {
       NS_PRECONDITION(mOpCode == eTreeOpUninitialized,
         "Op code must be uninitialized when initializing.");
       NS_PRECONDITION(aNode, "Initialized tree op with null node.");
--- a/parser/html/nsHtml5UTF16Buffer.cpp
+++ b/parser/html/nsHtml5UTF16Buffer.cpp
@@ -38,16 +38,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 #include "nsHtml5Tokenizer.h"
 #include "nsHtml5TreeBuilder.h"
 #include "nsHtml5MetaScanner.h"
 #include "nsHtml5AttributeName.h"
 #include "nsHtml5ElementName.h"
 #include "nsHtml5HtmlAttributes.h"
 #include "nsHtml5StackNode.h"
--- a/parser/html/nsHtml5UTF16Buffer.h
+++ b/parser/html/nsHtml5UTF16Buffer.h
@@ -39,16 +39,17 @@
 #include "jArray.h"
 #include "nsHtml5DocumentMode.h"
 #include "nsHtml5ArrayCopy.h"
 #include "nsHtml5NamedCharacters.h"
 #include "nsHtml5Atoms.h"
 #include "nsHtml5ByteReadable.h"
 #include "nsIUnicodeDecoder.h"
 #include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
 
 class nsHtml5StreamParser;
 class nsHtml5SpeculativeLoader;
 
 class nsHtml5Tokenizer;
 class nsHtml5TreeBuilder;
 class nsHtml5MetaScanner;
 class nsHtml5AttributeName;