author | Masatoshi Kimura <VYV03354@nifty.ne.jp> |
Wed, 07 Nov 2012 18:04:22 -0500 | |
changeset 112628 | eb7d1fd8a86863057863c4bd2c971a11e90fcdf0 |
parent 112627 | c0af6d983c0ea12ba4d78a6c408cb75fe38282bb |
child 112629 | 91879bfc7042f9f9728349b4e7e8c28f1f94b2bf |
push id | 23833 |
push user | emorley@mozilla.com |
push date | Thu, 08 Nov 2012 10:20:57 +0000 |
treeherder | mozilla-central@e0d7b394462b [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | hsivonen |
bugs | 801402 |
milestone | 19.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/dom/locales/en-US/chrome/layout/htmlparser.properties +++ b/dom/locales/en-US/chrome/layout/htmlparser.properties @@ -9,17 +9,16 @@ EncNoDeclaration=The character encoding EncLateMetaFrame=The character encoding declaration of the framed HTML document was not found when prescanning the first 1024 bytes of the file. When viewed without the document framing it, the page will reload automatically. The encoding declaration needs to be moved to be within the first 1024 bytes of the file. EncLateMeta=The character encoding declaration of the HTML document was not found when prescanning the first 1024 bytes of the file. When viewed in a differently-configured browser, this page will reload automatically. The encoding declaration needs to be moved to be within the first 1024 bytes of the file. EncLateMetaReload=The page was reloaded, because the character encoding declaration of the HTML document was not found when prescanning the first 1024 bytes of the file. The encoding declaration needs to be moved to be within the first 1024 bytes of the file. EncLateMetaTooLate=The character encoding declaration of document was found too late for it to take effect. The encoding declaration needs to be moved to be within the first 1024 bytes of the file. EncMetaUnsupported=An unsupported character encoding was declared for the HTML document using a meta tag. The declaration was ignored. EncProtocolUnsupported=An unsupported character encoding was declared on the transfer protocol level. The declaration was ignored. EncBomlessUtf16=Detected UTF-16-encoded Basic Latin-only text without a byte order mark and without a transfer protocol-level declaration. Encoding this content in UTF-16 is inefficient and the character encoding should have been declared in any case. EncMetaUtf16=A meta tag was used to declare the character encoding as UTF-16. This was interpreted as an UTF-8 declaration instead. -EncMetaNonRoughSuperset=A meta tag was used to declare a character encoding the does not encode the Basic Latin range roughly like US-ASCII. The declaration was ignored. # The bulk of the messages below are derived from # http://hg.mozilla.org/projects/htmlparser/file/1f633cef7de7/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java # which is available under the MIT license. # Tokenizer errors errGarbageAfterLtSlash=Garbage after “</”. errLtSlashGt=Saw “</>”. Probable causes: Unescaped “<” (escape as “<”) or mistyped end tag.
--- a/parser/html/nsHtml5MetaScannerCppSupplement.h +++ b/parser/html/nsHtml5MetaScannerCppSupplement.h @@ -1,18 +1,20 @@ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsICharsetConverterManager.h" #include "nsServiceManagerUtils.h" -#include "nsCharsetAlias.h" #include "nsEncoderDecoderUtils.h" #include "nsTraceRefcnt.h" +#include "mozilla/dom/EncodingUtils.h" + +using mozilla::dom::EncodingUtils; void nsHtml5MetaScanner::sniff(nsHtml5ByteReadable* bytes, nsIUnicodeDecoder** decoder, nsACString& charset) { readable = bytes; stateLoop(stateSave); readable = nullptr; if (mUnicodeDecoder) { @@ -43,18 +45,17 @@ nsHtml5MetaScanner::tryCharset(nsString* res = convManager->GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder)); if (NS_FAILED(res)) { NS_ERROR("Could not get decoder for UTF-8."); return false; } return true; } nsAutoCString preferred; - res = nsCharsetAlias::GetPreferred(encoding, preferred); - if (NS_FAILED(res)) { + if (!EncodingUtils::FindEncodingForLabel(encoding, preferred)) { return false; } if (preferred.LowerCaseEqualsLiteral("utf-16") || preferred.LowerCaseEqualsLiteral("utf-16be") || preferred.LowerCaseEqualsLiteral("utf-16le") || preferred.LowerCaseEqualsLiteral("utf-7") || preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7")) { return false;
--- a/parser/html/nsHtml5StreamParser.cpp +++ b/parser/html/nsHtml5StreamParser.cpp @@ -1,17 +1,16 @@ /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set sw=2 ts=2 et tw=79: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsHtml5StreamParser.h" #include "nsICharsetConverterManager.h" -#include "nsCharsetAlias.h" #include "nsServiceManagerUtils.h" #include "nsEncoderDecoderUtils.h" #include "nsContentUtils.h" #include "nsHtml5Tokenizer.h" #include "nsIHttpChannel.h" #include "nsHtml5Parser.h" #include "nsHtml5TreeBuilder.h" #include "nsHtml5AtomTable.h" @@ -21,18 +20,20 @@ #include "mozilla/Preferences.h" #include "nsHtml5Highlighter.h" #include "expat_config.h" #include "expat.h" #include "nsINestedURI.h" #include "nsCharsetSource.h" #include "nsIWyciwygChannel.h" +#include "mozilla/dom/EncodingUtils.h" + using namespace mozilla; - +using mozilla::dom::EncodingUtils; int32_t nsHtml5StreamParser::sTimerInitialDelay = 120; int32_t nsHtml5StreamParser::sTimerSubsequentDelay = 120; // static void nsHtml5StreamParser::InitializeStatics() { @@ -1188,84 +1189,52 @@ nsHtml5StreamParser::OnDataAvailable(nsI NS_WARNING("Dispatching DataAvailable event failed."); } return rv; } bool nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding) { - nsAutoCString newEncoding(aEncoding); - newEncoding.Trim(" \t\r\n\f"); - if (newEncoding.LowerCaseEqualsLiteral("utf-16") || - newEncoding.LowerCaseEqualsLiteral("utf-16be") || - newEncoding.LowerCaseEqualsLiteral("utf-16le")) { + nsAutoCString newEncoding; + if (!EncodingUtils::FindEncodingForLabel(aEncoding, newEncoding)) { + // the encoding name is bogus + mTreeBuilder->MaybeComplainAboutCharset("EncMetaUnsupported", + true, + mTokenizer->getLineNumber()); + return false; + } + + if (newEncoding.EqualsLiteral("UTF-16") || + newEncoding.EqualsLiteral("UTF-16BE") || + newEncoding.EqualsLiteral("UTF-16LE")) { mTreeBuilder->MaybeComplainAboutCharset("EncMetaUtf16", true, mTokenizer->getLineNumber()); newEncoding.Assign("UTF-8"); } - nsresult rv = NS_OK; - bool eq; - rv = nsCharsetAlias::Equals(newEncoding, mCharset, &eq); - if (NS_FAILED(rv)) { - // the encoding name is bogus - mTreeBuilder->MaybeComplainAboutCharset("EncMetaUnsupported", - true, - mTokenizer->getLineNumber()); - return false; - } - if (eq) { + if (newEncoding.Equals(mCharset)) { if (mCharsetSource < kCharsetFromMetaPrescan) { if (mInitialEncodingWasFromParentFrame) { mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaFrame", false, mTokenizer->getLineNumber()); } else { mTreeBuilder->MaybeComplainAboutCharset("EncLateMeta", false, mTokenizer->getLineNumber()); } } mCharsetSource = kCharsetFromMetaTag; // become confident mFeedChardet = false; // don't feed chardet when confident return false; } - // XXX check HTML5 non-IANA aliases here - - nsAutoCString preferred; - rv = nsCharsetAlias::GetPreferred(newEncoding, preferred); - if (NS_FAILED(rv)) { - // This charset has been blacklisted for permitting XSS smuggling. - // EncMetaNonRoughSuperset is a reasonable approximation to the - // right error message. - mTreeBuilder->MaybeComplainAboutCharset("EncMetaNonRoughSuperset", - true, - mTokenizer->getLineNumber()); - return false; - } - - // ??? Explicit further blacklist of character sets that are not - // "rough supersets" of ASCII. Some of these are handled above (utf-16), - // some by the XSS smuggling blacklist in charsetData.properties, - // maybe all of the remainder should also be blacklisted there. - if (preferred.LowerCaseEqualsLiteral("utf-16") || - preferred.LowerCaseEqualsLiteral("utf-16be") || - preferred.LowerCaseEqualsLiteral("utf-16le") || - preferred.LowerCaseEqualsLiteral("utf-7") || - preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7")) { - // Not a rough ASCII superset - mTreeBuilder->MaybeComplainAboutCharset("EncMetaNonRoughSuperset", - true, - mTokenizer->getLineNumber()); - return false; - } - aEncoding.Assign(preferred); + aEncoding.Assign(newEncoding); return true; } bool nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding) { // This code needs to stay in sync with // nsHtml5MetaScanner::tryCharset. Unfortunately, the
--- a/parser/htmlparser/src/nsParser.cpp +++ b/parser/htmlparser/src/nsParser.cpp @@ -9,17 +9,16 @@ #include "nsString.h" #include "nsCRT.h" #include "nsScanner.h" #include "plstr.h" #include "nsIStringStream.h" #include "nsIChannel.h" #include "nsICachingChannel.h" #include "nsICacheEntryDescriptor.h" -#include "nsCharsetAlias.h" #include "nsICharsetConverterManager.h" #include "nsIInputStream.h" #include "CNavDTD.h" #include "prenv.h" #include "prlock.h" #include "prcvar.h" #include "nsParserCIID.h" #include "nsReadableUtils.h" @@ -38,17 +37,20 @@ #include "nsXPCOMCIDInternal.h" #include "nsMimeTypes.h" #include "mozilla/CondVar.h" #include "mozilla/Mutex.h" #include "nsParserConstants.h" #include "nsCharsetSource.h" #include "nsContentUtils.h" +#include "mozilla/dom/EncodingUtils.h" + using namespace mozilla; +using mozilla::dom::EncodingUtils; #define NS_PARSER_FLAG_PARSER_ENABLED 0x00000002 #define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008 #define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040 static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); @@ -1835,18 +1837,17 @@ ParserWriteFunc(nsIInputStream* in, // The decoder will swallow the BOM. The UTF-16 will re-sniff for // endianness. The value of preferred is now either "UTF-8" or "UTF-16". preferred.Assign(maybePrefer); source = kCharsetFromByteOrderMark; } else if (source < kCharsetFromChannel) { nsAutoCString declCharset; if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) { - nsresult rv = nsCharsetAlias::GetPreferred(declCharset, maybePrefer); - if (NS_SUCCEEDED(rv)) { + if (EncodingUtils::FindEncodingForLabel(declCharset, maybePrefer)) { preferred.Assign(maybePrefer); source = kCharsetFromMetaTag; } } } pws->mParser->SetDocumentCharset(preferred, source); pws->mParser->SetSinkCharset(preferred);
--- a/parser/htmlparser/src/nsScanner.cpp +++ b/parser/htmlparser/src/nsScanner.cpp @@ -5,26 +5,29 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ //#define __INCREMENTAL 1 #include "nsScanner.h" #include "nsDebug.h" #include "nsIServiceManager.h" #include "nsICharsetConverterManager.h" -#include "nsCharsetAlias.h" #include "nsReadableUtils.h" #include "nsIInputStream.h" #include "nsIFile.h" #include "nsNetUtil.h" #include "nsUTF8Utils.h" // for LossyConvertEncoding #include "nsCRT.h" #include "nsParser.h" #include "nsCharsetSource.h" +#include "mozilla/dom/EncodingUtils.h" + +using mozilla::dom::EncodingUtils; + // We replace NUL characters with this character. static PRUnichar sInvalid = UCS2_REPLACEMENT_CHAR; nsReadEndCondition::nsReadEndCondition(const PRUnichar* aTerminateChars) : mChars(aTerminateChars), mFilter(PRUnichar(~0)) // All bits set { // Build filter that will be used to filter out characters with // bits that none of the terminal chars have. This works very well @@ -113,41 +116,38 @@ nsScanner::nsScanner(nsString& aFilename SetDocumentCharset(NS_LITERAL_CSTRING("UTF-8"), kCharsetFromDocTypeDefault); } nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSource) { if (aSource < mCharsetSource) // priority is lower the the current one , just return NS_OK; - nsresult res = NS_OK; + nsCString charsetName; + bool valid = EncodingUtils::FindEncodingForLabel(aCharset, charsetName); + MOZ_ASSERT(valid, "Should never call with a bogus aCharset."); if (!mCharset.IsEmpty()) { - bool same; - res = nsCharsetAlias::Equals(aCharset, mCharset, &same); - if(NS_SUCCEEDED(res) && same) + if (charsetName.Equals(mCharset)) { mCharsetSource = aSource; return NS_OK; // no difference, don't change it } } // different, need to change it - nsCString charsetName; - res = nsCharsetAlias::GetPreferred(aCharset, charsetName); - MOZ_ASSERT(NS_SUCCEEDED(res), "Should never call with a bogus aCharset."); mCharset.Assign(charsetName); mCharsetSource = aSource; NS_ASSERTION(nsParser::GetCharsetConverterManager(), "Must have the charset converter manager!"); - res = nsParser::GetCharsetConverterManager()-> + nsresult res = nsParser::GetCharsetConverterManager()-> GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder)); if (NS_SUCCEEDED(res) && mUnicodeDecoder) { // We need to detect conversion error of character to support XML // encoding error. mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal); }