new file mode 100644
--- /dev/null
+++ b/parser/html/nsAHtml5EncodingDeclarationHandler.h
@@ -0,0 +1,50 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is HTML Parser C++ Translator code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2010
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Henri Sivonen <hsivonen@iki.fi>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef nsAHtml5EncodingDeclarationHandler_h_
+#define nsAHtml5EncodingDeclarationHandler_h_
+
+class nsAHtml5EncodingDeclarationHandler {
+ public:
+
+ virtual void internalEncodingDeclaration(nsString* aEncoding) = 0;
+
+ virtual ~nsAHtml5EncodingDeclarationHandler() {
+ }
+};
+
+#endif /* nsAHtml5EncodingDeclarationHandler_h_ */
--- a/parser/html/nsHtml5AttributeName.cpp
+++ b/parser/html/nsHtml5AttributeName.cpp
@@ -38,16 +38,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5ElementName.h"
#include "nsHtml5HtmlAttributes.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5UTF16Buffer.h"
--- a/parser/html/nsHtml5AttributeName.h
+++ b/parser/html/nsHtml5AttributeName.h
@@ -39,16 +39,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5ElementName;
--- a/parser/html/nsHtml5ElementName.cpp
+++ b/parser/html/nsHtml5ElementName.cpp
@@ -38,16 +38,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5AttributeName.h"
#include "nsHtml5HtmlAttributes.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5UTF16Buffer.h"
--- a/parser/html/nsHtml5ElementName.h
+++ b/parser/html/nsHtml5ElementName.h
@@ -39,16 +39,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5AttributeName;
--- a/parser/html/nsHtml5HtmlAttributes.cpp
+++ b/parser/html/nsHtml5HtmlAttributes.cpp
@@ -39,16 +39,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5AttributeName.h"
#include "nsHtml5ElementName.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5UTF16Buffer.h"
--- a/parser/html/nsHtml5HtmlAttributes.h
+++ b/parser/html/nsHtml5HtmlAttributes.h
@@ -40,16 +40,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5AttributeName;
--- a/parser/html/nsHtml5MetaScanner.cpp
+++ b/parser/html/nsHtml5MetaScanner.cpp
@@ -39,16 +39,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5AttributeName.h"
#include "nsHtml5ElementName.h"
#include "nsHtml5HtmlAttributes.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5UTF16Buffer.h"
--- a/parser/html/nsHtml5MetaScanner.h
+++ b/parser/html/nsHtml5MetaScanner.h
@@ -40,16 +40,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5AttributeName;
class nsHtml5ElementName;
--- a/parser/html/nsHtml5Parser.cpp
+++ b/parser/html/nsHtml5Parser.cpp
@@ -55,16 +55,19 @@
#include "nsIScriptGlobalObjectOwner.h"
#include "nsIScriptSecurityManager.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5Parser.h"
#include "nsHtml5AtomTable.h"
+#include "nsICharsetAlias.h"
+
+static NS_DEFINE_CID(kCharsetAliasCID, NS_CHARSETALIAS_CID);
NS_INTERFACE_TABLE_HEAD(nsHtml5Parser)
NS_INTERFACE_TABLE2(nsHtml5Parser, nsIParser, nsISupportsWeakReference)
NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5Parser)
NS_INTERFACE_MAP_END
NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5Parser)
NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5Parser)
@@ -134,17 +137,18 @@ nsHtml5Parser::SetCommand(eParserCommand
NS_IMETHODIMP_(void)
nsHtml5Parser::SetDocumentCharset(const nsACString& aCharset, PRInt32 aCharsetSource)
{
NS_PRECONDITION(!mExecutor->HasStarted(),
"Document charset set too late.");
NS_PRECONDITION(mStreamParser, "Tried to set charset on a script-only parser.");
mStreamParser->SetDocumentCharset(aCharset, aCharsetSource);
- mExecutor->SetDocumentCharset((nsACString&)aCharset);
+ mExecutor->SetDocumentCharsetAndSource((nsACString&)aCharset, aCharsetSource);
+ mCharsetSource = aCharsetSource; // used for the document.open() case only
}
NS_IMETHODIMP_(void)
nsHtml5Parser::SetParserFilter(nsIParserFilter* aFilter)
{
NS_ERROR("Attempt to set a parser filter on HTML5 parser.");
}
@@ -261,17 +265,19 @@ nsHtml5Parser::Parse(const nsAString& aS
// required grips to these, too.
nsRefPtr<nsHtml5StreamParser> streamKungFuDeathGrip(mStreamParser);
nsRefPtr<nsHtml5TreeOpExecutor> treeOpKungFuDeathGrip(mExecutor);
// Return early if the parser has processed EOF
if (!mExecutor->HasStarted()) {
NS_ASSERTION(!mStreamParser,
"Had stream parser but document.write started life cycle.");
+ // This is the first document.write() on a document.open()ed document
mExecutor->SetParser(this);
+ mTokenizer->setEncodingDeclarationHandler(this);
mTreeBuilder->setScriptingEnabled(mExecutor->IsScriptEnabled());
mTokenizer->start();
mExecutor->Start();
/*
* If you move the following line, be very careful not to cause
* WillBuildModel to be called before the document has had its
* script global object set.
*/
@@ -683,8 +689,40 @@ nsHtml5Parser::InitializeDocWriteParserS
void
nsHtml5Parser::ContinueAfterFailedCharsetSwitch()
{
NS_PRECONDITION(mStreamParser,
"Tried to continue after failed charset switch without a stream parser");
mStreamParser->ContinueAfterFailedCharsetSwitch();
}
+
+void
+nsHtml5Parser::internalEncodingDeclaration(nsString* aEncoding)
+{
+ // Note: This handler is only installed when parsing a document.open()ed doc
+ // See bug 539887 and bug 255820.
+ if (mCharsetSource >= kCharsetFromMetaTag) { // this threshold corresponds to "confident" in the HTML5 spec
+ return;
+ }
+
+ nsresult rv = NS_OK;
+ nsCOMPtr<nsICharsetAlias> calias(do_GetService(kCharsetAliasCID, &rv));
+ if (NS_FAILED(rv)) {
+ NS_NOTREACHED("Charset alias service not available.");
+ return;
+ }
+ nsCAutoString newEncoding;
+ CopyUTF16toUTF8(*aEncoding, newEncoding);
+
+ // XXX check HTML5 non-IANA aliases here
+
+ nsCAutoString preferred;
+
+ rv = calias->GetPreferred(newEncoding, preferred);
+ if (NS_FAILED(rv)) {
+ // the encoding name is bogus
+ return;
+ }
+
+ mCharsetSource = kCharsetFromMetaTag;
+ mTreeBuilder->SetDocumentCharset(preferred, mCharsetSource);
+}
--- a/parser/html/nsHtml5Parser.h
+++ b/parser/html/nsHtml5Parser.h
@@ -57,19 +57,21 @@
#include "nsCycleCollectionParticipant.h"
#include "nsIInputStream.h"
#include "nsDetectionConfident.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5TreeOpExecutor.h"
#include "nsHtml5StreamParser.h"
#include "nsHtml5AtomTable.h"
#include "nsWeakReference.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5Parser : public nsIParser,
- public nsSupportsWeakReference
+ public nsSupportsWeakReference,
+ public nsAHtml5EncodingDeclarationHandler
{
public:
NS_DECL_AND_IMPL_ZEROING_OPERATOR_NEW
NS_DECL_CYCLE_COLLECTING_ISUPPORTS
NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsHtml5Parser, nsIParser)
nsHtml5Parser();
@@ -272,16 +274,22 @@ class nsHtml5Parser : public nsIParser,
/**
* True if this is a script-created HTML5 parser.
*/
virtual PRBool IsScriptCreated();
/* End nsIParser */
+ // nsAHtml5EncodingDeclarationHandler
+ /**
+ * Tree builder uses this to report a late <meta charset>
+ */
+ virtual void internalEncodingDeclaration(nsString* aEncoding);
+
// Not from an external interface
// Non-inherited methods
public:
/**
* Initializes the parser to load from a channel.
*/
@@ -315,16 +323,23 @@ class nsHtml5Parser : public nsIParser,
/**
* Parse until pending data is exhausted or a script blocks the parser
*/
void ParseUntilBlocked();
// State variables
/**
+ * The charset source. This variable is used for script-created parsers
+ * only. When parsing from the stream, this variable can have a bogus
+ * value.
+ */
+ PRInt32 mCharsetSource;
+
+ /**
* Whether the last character tokenized was a carriage return (for CRLF)
*/
PRBool mLastWasCR;
/**
* The parser is in the fragment mode
*/
PRBool mFragmentMode;
--- a/parser/html/nsHtml5Portability.h
+++ b/parser/html/nsHtml5Portability.h
@@ -39,16 +39,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5AttributeName;
--- a/parser/html/nsHtml5StackNode.cpp
+++ b/parser/html/nsHtml5StackNode.cpp
@@ -39,16 +39,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5AttributeName.h"
#include "nsHtml5ElementName.h"
#include "nsHtml5HtmlAttributes.h"
#include "nsHtml5UTF16Buffer.h"
--- a/parser/html/nsHtml5StackNode.h
+++ b/parser/html/nsHtml5StackNode.h
@@ -40,16 +40,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5AttributeName;
--- a/parser/html/nsHtml5StateSnapshot.cpp
+++ b/parser/html/nsHtml5StateSnapshot.cpp
@@ -38,16 +38,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5AttributeName.h"
#include "nsHtml5ElementName.h"
#include "nsHtml5HtmlAttributes.h"
#include "nsHtml5StackNode.h"
--- a/parser/html/nsHtml5StateSnapshot.h
+++ b/parser/html/nsHtml5StateSnapshot.h
@@ -39,16 +39,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5AttributeName;
--- a/parser/html/nsHtml5StreamParser.cpp
+++ b/parser/html/nsHtml5StreamParser.cpp
@@ -35,16 +35,17 @@
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nsHtml5StreamParser.h"
#include "nsICharsetConverterManager.h"
+#include "nsICharsetAlias.h"
#include "nsServiceManagerUtils.h"
#include "nsEncoderDecoderUtils.h"
#include "nsContentUtils.h"
#include "nsHtml5Tokenizer.h"
#include "nsIHttpChannel.h"
#include "nsHtml5Parser.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5AtomTable.h"
@@ -209,17 +210,17 @@ nsHtml5StreamParser::GetChannel(nsIChann
NS_IMETHODIMP
nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
{
NS_ASSERTION(IsParserThread(), "Wrong thread!");
if (aConf == eBestAnswer || aConf == eSureAnswer) {
mCharset.Assign(aCharset);
mCharsetSource = kCharsetFromAutoDetection;
- mTreeBuilder->SetDocumentCharset(mCharset);
+ mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
}
return NS_OK;
}
nsresult
nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const PRUint8* aFromSegment, // can be null
PRUint32 aCount,
PRUint32* aWriteCount)
@@ -228,17 +229,17 @@ nsHtml5StreamParser::SetupDecodingAndWri
nsresult rv = NS_OK;
nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
NS_ENSURE_SUCCESS(rv, rv);
rv = convManager->GetUnicodeDecoder(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
if (rv == NS_ERROR_UCONV_NOCONV) {
mCharset.Assign("windows-1252"); // lower case is the raw form
mCharsetSource = kCharsetFromWeakDocTypeDefault;
rv = convManager->GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
- mTreeBuilder->SetDocumentCharset(mCharset);
+ mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
}
NS_ENSURE_SUCCESS(rv, rv);
mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
}
nsresult
nsHtml5StreamParser::WriteSniffingBufferAndCurrentSegment(const PRUint8* aFromSegment, // can be null
@@ -266,17 +267,17 @@ nsHtml5StreamParser::SetupDecodingFromBo
NS_ASSERTION(IsParserThread(), "Wrong thread!");
nsresult rv = NS_OK;
nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
NS_ENSURE_SUCCESS(rv, rv);
rv = convManager->GetUnicodeDecoderRaw(aDecoderCharsetName, getter_AddRefs(mUnicodeDecoder));
NS_ENSURE_SUCCESS(rv, rv);
mCharset.Assign(aCharsetName);
mCharsetSource = kCharsetFromByteOrderMark;
- mTreeBuilder->SetDocumentCharset(mCharset);
+ mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
mSniffingBuffer = nsnull;
mMetaScanner = nsnull;
mBomState = BOM_SNIFFING_OVER;
return rv;
}
nsresult
nsHtml5StreamParser::FinalizeSniffing(const PRUint8* aFromSegment, // can be null
@@ -304,17 +305,17 @@ nsHtml5StreamParser::FinalizeSniffing(co
rv = mChardet->Done();
NS_ENSURE_SUCCESS(rv, rv);
// fall thru; callback may have changed charset
}
if (mCharsetSource == kCharsetUninitialized) {
// Hopefully this case is never needed, but dealing with it anyway
mCharset.Assign("windows-1252");
mCharsetSource = kCharsetFromWeakDocTypeDefault;
- mTreeBuilder->SetDocumentCharset(mCharset);
+ mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
}
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
}
nsresult
nsHtml5StreamParser::SniffStreamBytes(const PRUint8* aFromSegment,
PRUint32 aCount,
PRUint32* aWriteCount)
@@ -399,30 +400,30 @@ nsHtml5StreamParser::SniffStreamBytes(co
// this is the last buffer
PRUint32 countToSniffingLimit = NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE - mSniffingLength;
nsHtml5ByteReadable readable(aFromSegment, aFromSegment + countToSniffingLimit);
mMetaScanner->sniff(&readable, getter_AddRefs(mUnicodeDecoder), mCharset);
if (mUnicodeDecoder) {
mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
// meta scan successful
mCharsetSource = kCharsetFromMetaPrescan;
- mTreeBuilder->SetDocumentCharset(mCharset);
+ mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
mMetaScanner = nsnull;
return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
}
return FinalizeSniffing(aFromSegment, aCount, aWriteCount, countToSniffingLimit);
}
// not the last buffer
nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount);
mMetaScanner->sniff(&readable, getter_AddRefs(mUnicodeDecoder), mCharset);
if (mUnicodeDecoder) {
// meta scan successful
mCharsetSource = kCharsetFromMetaPrescan;
- mTreeBuilder->SetDocumentCharset(mCharset);
+ mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
mMetaScanner = nsnull;
return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
}
if (!mSniffingBuffer) {
mSniffingBuffer = new PRUint8[NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE];
}
memcpy(mSniffingBuffer + mSniffingLength, aFromSegment, aCount);
mSniffingLength += aCount;
--- a/parser/html/nsHtml5StreamParser.h
+++ b/parser/html/nsHtml5StreamParser.h
@@ -43,22 +43,22 @@
#include "nsCOMPtr.h"
#include "nsIStreamListener.h"
#include "nsICharsetDetectionObserver.h"
#include "nsHtml5MetaScanner.h"
#include "nsIUnicodeDecoder.h"
#include "nsHtml5TreeOpExecutor.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsIInputStream.h"
-#include "nsICharsetAlias.h"
#include "mozilla/Mutex.h"
#include "nsHtml5AtomTable.h"
#include "nsHtml5Speculation.h"
#include "nsITimer.h"
#include "nsICharsetDetector.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5Parser;
#define NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE 1024
#define NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE 1024
enum eBomState {
/**
@@ -98,17 +98,19 @@ enum eBomState {
enum eHtml5StreamState {
STREAM_NOT_STARTED = 0,
STREAM_BEING_READ = 1,
STREAM_ENDED = 2
};
class nsHtml5StreamParser : public nsIStreamListener,
- public nsICharsetDetectionObserver {
+ public nsICharsetDetectionObserver,
+ public nsAHtml5EncodingDeclarationHandler
+{
friend class nsHtml5RequestStopper;
friend class nsHtml5DataAvailable;
friend class nsHtml5StreamParserContinuation;
friend class nsHtml5StreamParserTimerFlusher;
public:
NS_DECL_AND_IMPL_ZEROING_OPERATOR_NEW
@@ -128,21 +130,21 @@ class nsHtml5StreamParser : public nsISt
NS_DECL_NSISTREAMLISTENER
// nsICharsetDetectionObserver
/**
* Chardet calls this to report the detection result
*/
NS_IMETHOD Notify(const char* aCharset, nsDetectionConfident aConf);
- // EncodingDeclarationHandler
+ // nsAHtml5EncodingDeclarationHandler
/**
* Tree builder uses this to report a late <meta charset>
*/
- void internalEncodingDeclaration(nsString* aEncoding);
+ virtual void internalEncodingDeclaration(nsString* aEncoding);
// Not from an external interface
/**
* Call this method once you've created a parser, and want to instruct it
* about what charset to load
*
* @param aCharset the charset of a document
--- a/parser/html/nsHtml5Tokenizer.cpp
+++ b/parser/html/nsHtml5Tokenizer.cpp
@@ -41,16 +41,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5AttributeName.h"
#include "nsHtml5ElementName.h"
#include "nsHtml5HtmlAttributes.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5UTF16Buffer.h"
@@ -3971,17 +3972,17 @@ nsHtml5Tokenizer::initializeWithoutStart
confident = PR_FALSE;
strBuf = jArray<PRUnichar,PRInt32>(64);
longStrBuf = jArray<PRUnichar,PRInt32>(1024);
line = 1;
resetToDataState();
}
void
-nsHtml5Tokenizer::setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler)
+nsHtml5Tokenizer::setEncodingDeclarationHandler(nsAHtml5EncodingDeclarationHandler* encodingDeclarationHandler)
{
this->encodingDeclarationHandler = encodingDeclarationHandler;
}
void
nsHtml5Tokenizer::initializeStatics()
{
TITLE_ARR = jArray<PRUnichar,PRInt32>((PRUnichar*)TITLE_ARR_DATA, 5);
--- a/parser/html/nsHtml5Tokenizer.h
+++ b/parser/html/nsHtml5Tokenizer.h
@@ -42,16 +42,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5AttributeName;
class nsHtml5ElementName;
@@ -80,17 +81,17 @@ class nsHtml5Tokenizer
static jArray<PRUnichar,PRInt32> XMP_ARR;
static jArray<PRUnichar,PRInt32> TEXTAREA_ARR;
static jArray<PRUnichar,PRInt32> IFRAME_ARR;
static jArray<PRUnichar,PRInt32> NOEMBED_ARR;
static jArray<PRUnichar,PRInt32> NOSCRIPT_ARR;
static jArray<PRUnichar,PRInt32> NOFRAMES_ARR;
protected:
nsHtml5TreeBuilder* tokenHandler;
- nsHtml5StreamParser* encodingDeclarationHandler;
+ nsAHtml5EncodingDeclarationHandler* encodingDeclarationHandler;
PRBool lastCR;
PRInt32 stateSave;
private:
PRInt32 returnStateSave;
protected:
PRInt32 index;
private:
PRBool forceQuirks;
@@ -260,17 +261,17 @@ class nsHtml5Tokenizer
PRBool isNextCharOnNewLine();
PRBool isPrevCR();
PRInt32 getLine();
PRInt32 getCol();
PRBool isInDataState();
void resetToDataState();
void loadState(nsHtml5Tokenizer* other);
void initializeWithoutStarting();
- void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler);
+ void setEncodingDeclarationHandler(nsAHtml5EncodingDeclarationHandler* encodingDeclarationHandler);
static void initializeStatics();
static void releaseStatics();
};
#ifdef nsHtml5Tokenizer_cpp__
PRUnichar nsHtml5Tokenizer::LT_GT[] = { '<', '>' };
PRUnichar nsHtml5Tokenizer::LT_SOLIDUS[] = { '<', '/' };
PRUnichar nsHtml5Tokenizer::RSQB_RSQB[] = { ']', ']' };
--- a/parser/html/nsHtml5TreeBuilderCppSupplement.h
+++ b/parser/html/nsHtml5TreeBuilderCppSupplement.h
@@ -641,21 +641,22 @@ nsHtml5TreeBuilder::Flush()
PRBool hasOps = !mOpQueue.IsEmpty();
if (hasOps) {
mOpSink->MoveOpsFrom(mOpQueue);
}
return hasOps;
}
void
-nsHtml5TreeBuilder::SetDocumentCharset(nsACString& aCharset)
+nsHtml5TreeBuilder::SetDocumentCharset(nsACString& aCharset,
+ PRInt32 aCharsetSource)
{
nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
NS_ASSERTION(treeOp, "Tree op allocation failed.");
- treeOp->Init(eTreeOpSetDocumentCharset, aCharset);
+ treeOp->Init(eTreeOpSetDocumentCharset, aCharset, aCharsetSource);
}
void
nsHtml5TreeBuilder::StreamEnded()
{
// The fragment mode calls DidBuildModel from nsHtml5Parser.
// Letting DidBuildModel be called from the executor in the fragment case
// confuses the EndLoad logic of nsHTMLDocument, since nsHTMLDocument
--- a/parser/html/nsHtml5TreeBuilderHSupplement.h
+++ b/parser/html/nsHtml5TreeBuilderHSupplement.h
@@ -80,17 +80,17 @@
PRBool HasSpeculativeLoader() {
return !!mSpeculativeLoader;
}
void DropSpeculativeLoader();
PRBool Flush();
- void SetDocumentCharset(nsACString& aCharset);
+ void SetDocumentCharset(nsACString& aCharset, PRInt32 aCharsetSource);
void StreamEnded();
void NeedsCharsetSwitchTo(const nsACString& aEncoding);
void AddSnapshotToScript(nsAHtml5TreeBuilderState* aSnapshot, PRInt32 aLine);
inline void Dispatch(nsIRunnable* aEvent) {
--- a/parser/html/nsHtml5TreeOpExecutor.cpp
+++ b/parser/html/nsHtml5TreeOpExecutor.cpp
@@ -196,56 +196,58 @@ nsHtml5TreeOpExecutor::SetParser(nsIPars
return NS_OK;
}
void
nsHtml5TreeOpExecutor::FlushPendingNotifications(mozFlushType aType)
{
}
-NS_IMETHODIMP
-nsHtml5TreeOpExecutor::SetDocumentCharset(nsACString& aCharset)
+void
+nsHtml5TreeOpExecutor::SetDocumentCharsetAndSource(nsACString& aCharset, PRInt32 aCharsetSource)
{
+ if (mDocument) {
+ mDocument->SetDocumentCharacterSetSource(aCharsetSource);
+ mDocument->SetDocumentCharacterSet(aCharset);
+ }
if (mDocShell) {
// the following logic to get muCV is copied from
// nsHTMLDocument::StartDocumentLoad
// We need to call muCV->SetPrevDocCharacterSet here in case
// the charset is detected by parser DetectMetaTag
nsCOMPtr<nsIMarkupDocumentViewer> muCV;
nsCOMPtr<nsIContentViewer> cv;
mDocShell->GetContentViewer(getter_AddRefs(cv));
if (cv) {
muCV = do_QueryInterface(cv);
} else {
// in this block of code, if we get an error result, we return
// it but if we get a null pointer, that's perfectly legal for
// parent and parentContentViewer
nsCOMPtr<nsIDocShellTreeItem> docShellAsItem =
do_QueryInterface(mDocShell);
- NS_ENSURE_TRUE(docShellAsItem, NS_ERROR_FAILURE);
+ if (!docShellAsItem) {
+ return;
+ }
nsCOMPtr<nsIDocShellTreeItem> parentAsItem;
docShellAsItem->GetSameTypeParent(getter_AddRefs(parentAsItem));
nsCOMPtr<nsIDocShell> parent(do_QueryInterface(parentAsItem));
if (parent) {
nsCOMPtr<nsIContentViewer> parentContentViewer;
nsresult rv =
parent->GetContentViewer(getter_AddRefs(parentContentViewer));
if (NS_SUCCEEDED(rv) && parentContentViewer) {
muCV = do_QueryInterface(parentContentViewer);
}
}
}
if (muCV) {
muCV->SetPrevDocCharacterSet(aCharset);
}
}
- if (mDocument) {
- mDocument->SetDocumentCharacterSet(aCharset);
- }
- return NS_OK;
}
nsISupports*
nsHtml5TreeOpExecutor::GetTarget()
{
return mDocument;
}
--- a/parser/html/nsHtml5TreeOpExecutor.h
+++ b/parser/html/nsHtml5TreeOpExecutor.h
@@ -161,19 +161,22 @@ class nsHtml5TreeOpExecutor : public nsC
NS_IMETHOD SetParser(nsIParser* aParser);
/**
* No-op for backwards compat.
*/
virtual void FlushPendingNotifications(mozFlushType aType);
/**
- * Sets mCharset
+ * Don't call. For interface compat only.
*/
- NS_IMETHOD SetDocumentCharset(nsACString& aCharset);
+ NS_IMETHOD SetDocumentCharset(nsACString& aCharset) {
+ NS_NOTREACHED("No one should call this.");
+ return NS_ERROR_NOT_IMPLEMENTED;
+ }
/**
* Returns the document.
*/
virtual nsISupports *GetTarget();
// nsContentSink methods
virtual nsresult ProcessBASETag(nsIContent* aContent);
@@ -204,16 +207,20 @@ class nsHtml5TreeOpExecutor : public nsC
mDocumentBaseURI = mDocument->GetBaseURI();
mHasProcessedBase = PR_TRUE;
}
void SetNodeInfoManager(nsNodeInfoManager* aManager) {
mNodeInfoManager = aManager;
}
+ // Not from interface
+
+ void SetDocumentCharsetAndSource(nsACString& aCharset, PRInt32 aCharsetSource);
+
void SetStreamParser(nsHtml5StreamParser* aStreamParser) {
mStreamParser = aStreamParser;
}
void InitializeDocWriteParserState(nsAHtml5TreeBuilderState* aState, PRInt32 aLine);
PRBool IsScriptEnabled();
--- a/parser/html/nsHtml5TreeOperation.cpp
+++ b/parser/html/nsHtml5TreeOperation.cpp
@@ -574,18 +574,19 @@ nsHtml5TreeOperation::Perform(nsHtml5Tre
return rv;
}
case eTreeOpFlushPendingAppendNotifications: {
aBuilder->FlushPendingAppendNotifications();
return rv;
}
case eTreeOpSetDocumentCharset: {
char* str = mOne.charPtr;
+ PRInt32 charsetSource = mInt;
nsDependentCString dependentString(str);
- aBuilder->SetDocumentCharset(dependentString);
+ aBuilder->SetDocumentCharsetAndSource(dependentString, charsetSource);
return rv;
}
case eTreeOpNeedsCharsetSwitchTo: {
char* str = mOne.charPtr;
aBuilder->NeedsCharsetSwitchTo(str);
return rv;
}
case eTreeOpUpdateStyleSheet: {
--- a/parser/html/nsHtml5TreeOperation.h
+++ b/parser/html/nsHtml5TreeOperation.h
@@ -133,16 +133,23 @@ class nsHtml5TreeOperation {
NS_PRECONDITION(mOpCode == eTreeOpUninitialized,
"Op code must be uninitialized when initializing.");
NS_PRECONDITION(aNode, "Initialized tree op with null node.");
NS_PRECONDITION(aParent, "Initialized tree op with null parent.");
mOpCode = aOpCode;
mOne.node = aNode;
mTwo.node = aParent;
}
+
+ inline void Init(eHtml5TreeOperation aOpCode,
+ const nsACString& aString,
+ PRInt32 aInt32) {
+ Init(aOpCode, aString);
+ mInt = aInt32;
+ }
inline void Init(eHtml5TreeOperation aOpCode,
nsIContent** aNode,
nsIContent** aParent,
nsIContent** aTable) {
NS_PRECONDITION(mOpCode == eTreeOpUninitialized,
"Op code must be uninitialized when initializing.");
NS_PRECONDITION(aNode, "Initialized tree op with null node.");
--- a/parser/html/nsHtml5UTF16Buffer.cpp
+++ b/parser/html/nsHtml5UTF16Buffer.cpp
@@ -38,16 +38,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5AttributeName.h"
#include "nsHtml5ElementName.h"
#include "nsHtml5HtmlAttributes.h"
#include "nsHtml5StackNode.h"
--- a/parser/html/nsHtml5UTF16Buffer.h
+++ b/parser/html/nsHtml5UTF16Buffer.h
@@ -39,16 +39,17 @@
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsAHtml5TreeBuilderState.h"
+#include "nsAHtml5EncodingDeclarationHandler.h"
class nsHtml5StreamParser;
class nsHtml5SpeculativeLoader;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5AttributeName;