author | Masatoshi Kimura <VYV03354@nifty.ne.jp> |
Sun, 18 Jun 2017 20:37:50 +0900 | |
changeset 417199 | 64412d8b6f4b4b64ace0c914fe897b4bc02cefbd |
parent 417198 | 5c870a786e945197a770a308aeeececdff8bac18 |
child 417200 | ce85883942f72709b6760fe842869049741fbecf |
push id | 1517 |
push user | jlorenzo@mozilla.com |
push date | Thu, 14 Sep 2017 16:50:54 +0000 |
treeherder | mozilla-release@3b41fd564418 [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | hsivonen |
bugs | 1373984 |
milestone | 56.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/accessible/xul/XULElementAccessibles.cpp +++ b/accessible/xul/XULElementAccessibles.cpp @@ -276,13 +276,13 @@ XULLinkAccessible::AnchorURIAt(uint32_t nsAutoString href; mContent->GetAttr(kNameSpaceID_None, nsGkAtoms::href, href); nsCOMPtr<nsIURI> baseURI = mContent->GetBaseURI(); nsIDocument* document = mContent->OwnerDoc(); nsCOMPtr<nsIURI> anchorURI; NS_NewURI(getter_AddRefs(anchorURI), href, - document->GetDocumentCharacterSet().get(), + document->GetDocumentCharacterSet(), baseURI); return anchorURI.forget(); }
--- a/docshell/base/nsDocShell.cpp +++ b/docshell/base/nsDocShell.cpp @@ -369,24 +369,25 @@ ForEachPing(nsIContent* aContent, ForEac } nsCOMPtr<nsIIOService> ios = do_GetIOService(); if (!ios) { return; } nsIDocument* doc = aContent->OwnerDoc(); + nsAutoCString charset; + doc->GetDocumentCharacterSet()->Name(charset); nsWhitespaceTokenizer tokenizer(value); while (tokenizer.hasMoreTokens()) { nsCOMPtr<nsIURI> uri, baseURI = aContent->GetBaseURI(); ios->NewURI(NS_ConvertUTF16toUTF8(tokenizer.nextToken()), - doc->GetDocumentCharacterSet().get(), - baseURI, getter_AddRefs(uri)); + charset.get(), baseURI, getter_AddRefs(uri)); // if we can't generate a valid URI, then there is nothing to do if (!uri) { continue; } // Explicitly not allow loading data: URIs bool isDataScheme = (NS_SUCCEEDED(uri->SchemeIs("data", &isDataScheme)) && isDataScheme); @@ -827,16 +828,18 @@ nsDocShell::nsDocShell() , mBlankTiming(false) , mCreatingDocument(false) #ifdef DEBUG , mInEnsureScriptEnv(false) #endif , mDefaultLoadFlags(nsIRequest::LOAD_NORMAL) , mFrameType(FRAME_TYPE_REGULAR) , mPrivateBrowsingId(0) + , mForcedCharset(nullptr) + , mParentCharset(nullptr) , mParentCharsetSource(0) , mJSRunToCompletionDepth(0) , mTouchEventsOverride(nsIDocShell::TOUCHEVENTS_OVERRIDE_NONE) { AssertOriginAttributesMatchPrivateBrowsing(); nsContentUtils::GenerateUUIDInPlace(mHistoryID); @@ -2025,17 +2028,17 @@ NS_IMETHODIMP nsDocShell::GetCharset(nsACString& aCharset) { aCharset.Truncate(); nsIPresShell* presShell = GetPresShell(); NS_ENSURE_TRUE(presShell, NS_ERROR_FAILURE); nsIDocument* doc = presShell->GetDocument(); NS_ENSURE_TRUE(doc, NS_ERROR_FAILURE); - aCharset = doc->GetDocumentCharacterSet(); + doc->GetDocumentCharacterSet()->Name(aCharset); return NS_OK; } NS_IMETHODIMP nsDocShell::GatherCharsetMenuTelemetry() { nsCOMPtr<nsIContentViewer> viewer; GetContentViewer(getter_AddRefs(viewer)); @@ -2111,51 +2114,51 @@ nsDocShell::SetCharset(const nsACString& // set the charset override return SetForcedCharset(aCharset); } NS_IMETHODIMP nsDocShell::SetForcedCharset(const nsACString& aCharset) { if (aCharset.IsEmpty()) { - mForcedCharset.Truncate(); + mForcedCharset = nullptr; return NS_OK; } const Encoding* encoding = Encoding::ForLabel(aCharset); if (!encoding) { // Reject unknown labels return NS_ERROR_INVALID_ARG; } if (!encoding->IsAsciiCompatible() && encoding != ISO_2022_JP_ENCODING) { // Reject XSS hazards return NS_ERROR_INVALID_ARG; } - encoding->Name(mForcedCharset); + mForcedCharset = encoding; return NS_OK; } NS_IMETHODIMP nsDocShell::GetForcedCharset(nsACString& aResult) { - aResult = mForcedCharset; + mForcedCharset->Name(aResult); return NS_OK; } void -nsDocShell::SetParentCharset(const nsACString& aCharset, +nsDocShell::SetParentCharset(const Encoding*& aCharset, int32_t aCharsetSource, nsIPrincipal* aPrincipal) { mParentCharset = aCharset; mParentCharsetSource = aCharsetSource; mParentCharsetPrincipal = aPrincipal; } void -nsDocShell::GetParentCharset(nsACString& aCharset, +nsDocShell::GetParentCharset(const Encoding*& aCharset, int32_t* aCharsetSource, nsIPrincipal** aPrincipal) { aCharset = mParentCharset; *aCharsetSource = mParentCharsetSource; NS_IF_ADDREF(*aPrincipal = mParentCharsetPrincipal); } @@ -4187,17 +4190,17 @@ nsDocShell::AddChild(nsIDocShellTreeItem } if (!isWyciwyg) { // If this docshell is loaded from a wyciwyg: URI, don't // advertise our charset since it does not in any way reflect // the actual source charset, which is what we're trying to // expose here. - const nsACString& parentCS = doc->GetDocumentCharacterSet(); + const Encoding* parentCS = doc->GetDocumentCharacterSet(); int32_t charsetSource = doc->GetDocumentCharacterSetSource(); // set the child's parentCharset childAsDocShell->SetParentCharset(parentCS, charsetSource, doc->NodePrincipal()); } // printf("### 1 >>> Adding child. Parent CS = %s. ItemType = %d.\n", @@ -11616,17 +11619,18 @@ nsDocShell::ScrollToAnchor(bool aCurHasR // Above will fail if the anchor name is not UTF-8. Need to // convert from document charset to unicode. if (NS_FAILED(rv)) { // Get a document charset NS_ENSURE_TRUE(mContentViewer, NS_ERROR_FAILURE); nsIDocument* doc = mContentViewer->GetDocument(); NS_ENSURE_TRUE(doc, NS_ERROR_FAILURE); - const nsACString& charset = doc->GetDocumentCharacterSet(); + nsAutoCString charset; + doc->GetDocumentCharacterSet()->Name(charset); nsCOMPtr<nsITextToSubURI> textToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv); NS_ENSURE_SUCCESS(rv, rv); // Unescape and convert to unicode nsAutoString uStr;
--- a/docshell/base/nsDocShell.h +++ b/docshell/base/nsDocShell.h @@ -57,16 +57,17 @@ #include "nsITabParent.h" #include "nsCRT.h" #include "prtime.h" #include "nsRect.h" #include "Units.h" #include "nsIDeprecationWarner.h" namespace mozilla { +class Encoding; enum class TaskCategory; namespace dom { class EventTarget; class PendingGlobalHistoryEntry; typedef uint32_t ScreenOrientationInternal; } // namespace dom } // namespace mozilla @@ -150,16 +151,17 @@ class nsDocShell final , public nsIClipboardCommands , public nsIDOMStorageManager , public nsINetworkInterceptController , public nsIDeprecationWarner , public mozilla::SupportsWeakPtr<nsDocShell> { friend class nsDSURIContentListener; friend class FramingChecker; + using Encoding = mozilla::Encoding; public: MOZ_DECLARE_WEAKREFERENCE_TYPENAME(nsDocShell) nsDocShell(); virtual nsresult Init() override; @@ -269,16 +271,18 @@ public: void NotifyAsyncPanZoomStopped(); void SetInFrameSwap(bool aInSwap) { mInFrameSwap = aInSwap; } bool InFrameSwap(); + const Encoding* GetForcedCharset() { return mForcedCharset; } + private: bool CanSetOriginAttributes(); public: const mozilla::OriginAttributes& GetOriginAttributes() { return mOriginAttributes; @@ -1031,18 +1035,18 @@ protected: // On content docshells mPrivateBrowsingId == mOriginAttributes.mPrivateBrowsingId // On chrome docshells this value will be set, but not have the corresponding // origin attribute set. uint32_t mPrivateBrowsingId; nsString mInterceptedDocumentId; private: - nsCString mForcedCharset; - nsCString mParentCharset; + const Encoding* mForcedCharset; + const Encoding* mParentCharset; int32_t mParentCharsetSource; nsCOMPtr<nsIPrincipal> mParentCharsetPrincipal; nsTObserverArray<nsWeakPtr> mPrivacyObservers; nsTObserverArray<nsWeakPtr> mReflowObservers; nsTObserverArray<nsWeakPtr> mScrollObservers; nsCString mOriginalUriString; nsWeakPtr mOpener; mozilla::OriginAttributes mOriginAttributes;
--- a/docshell/base/nsIDocShell.idl +++ b/docshell/base/nsIDocShell.idl @@ -6,27 +6,32 @@ #include "domstubs.idl" #include "nsIDocShellTreeItem.idl" #include "nsIRequest.idl" %{ C++ #include "js/TypeDecls.h" #include "mozilla/Maybe.h" +#include "mozilla/NotNull.h" class nsPresContext; class nsIPresShell; +namespace mozilla { +class Encoding; +} %} /** * The nsIDocShell interface. */ [ptr] native nsPresContext(nsPresContext); [ptr] native nsIPresShell(nsIPresShell); [ref] native MaybeURI(mozilla::Maybe<nsCOMPtr<nsIURI>>); +[ref] native Encoding(const mozilla::Encoding*); interface nsIURI; interface nsIChannel; interface nsIContentViewer; interface nsIDOMEventTarget; interface nsIDocShellLoadInfo; interface nsIEditor; interface nsIEditingSession; @@ -708,21 +713,21 @@ interface nsIDocShell : nsIDocShellTreeI * The charset forced by the user. */ attribute ACString forcedCharset; /** * In a child docshell, this is the charset of the parent docshell */ [noscript, notxpcom, nostdcall] void setParentCharset( - in ACString parentCharset, + in Encoding parentCharset, in int32_t parentCharsetSource, in nsIPrincipal parentCharsetPrincipal); [noscript, notxpcom, nostdcall] void getParentCharset( - out ACString parentCharset, + out Encoding parentCharset, out int32_t parentCharsetSource, out nsIPrincipal parentCharsetPrincipal); /** * Whether the docShell records profile timeline markers at the moment */ [infallible] attribute boolean recordProfileTimelineMarkers;
--- a/dom/base/FragmentOrElement.cpp +++ b/dom/base/FragmentOrElement.cpp @@ -424,17 +424,17 @@ nsIContent::GetBaseURI(bool aTryUseXHRDo } while(elem); if (!baseAttrs.IsEmpty()) { doc->WarnOnceAbout(nsIDocument::eXMLBaseAttribute); // Now resolve against all xml:base attrs for (uint32_t i = baseAttrs.Length() - 1; i != uint32_t(-1); --i) { nsCOMPtr<nsIURI> newBase; nsresult rv = NS_NewURI(getter_AddRefs(newBase), baseAttrs[i], - doc->GetDocumentCharacterSet().get(), base); + doc->GetDocumentCharacterSet(), base); // Do a security check, almost the same as nsDocument::SetBaseURL() // Only need to do this on the final uri if (NS_SUCCEEDED(rv) && i == 0) { rv = nsContentUtils::GetSecurityManager()-> CheckLoadURIWithPrincipal(NodePrincipal(), newBase, nsIScriptSecurityManager::STANDARD); } if (NS_SUCCEEDED(rv)) {
--- a/dom/base/Location.cpp +++ b/dom/base/Location.cpp @@ -34,28 +34,16 @@ #include "NullPrincipal.h" #include "mozilla/Unused.h" #include "mozilla/dom/LocationBinding.h" #include "mozilla/dom/ScriptSettings.h" namespace mozilla { namespace dom { -static nsresult -GetDocumentCharacterSetForURI(const nsAString& aHref, nsACString& aCharset) -{ - aCharset.Truncate(); - - if (nsIDocument* doc = GetEntryDocument()) { - aCharset = doc->GetDocumentCharacterSet(); - } - - return NS_OK; -} - Location::Location(nsPIDOMWindowInner* aWindow, nsIDocShell *aDocShell) : mInnerWindow(aWindow) { MOZ_ASSERT(aDocShell); MOZ_ASSERT(mInnerWindow->IsInnerWindow()); mDocShell = do_GetWeakReference(aDocShell); } @@ -501,21 +489,22 @@ nsresult Location::SetHrefWithBase(const nsAString& aHref, nsIURI* aBase, bool aReplace) { nsresult result; nsCOMPtr<nsIURI> newUri; nsCOMPtr<nsIDocShell> docShell(do_QueryReferent(mDocShell)); - nsAutoCString docCharset; - if (NS_SUCCEEDED(GetDocumentCharacterSetForURI(aHref, docCharset))) - result = NS_NewURI(getter_AddRefs(newUri), aHref, docCharset.get(), aBase); - else + if (nsIDocument* doc = GetEntryDocument()) { + result = NS_NewURI(getter_AddRefs(newUri), aHref, + doc->GetDocumentCharacterSet(), aBase); + } else { result = NS_NewURI(getter_AddRefs(newUri), aHref, nullptr, aBase); + } if (newUri) { /* Check with the scriptContext if it is currently processing a script tag. * If so, this must be a <script> tag with a location.href in it. * we want to do a replace load, in such a situation. * In other cases, for example if a event handler or a JS timer * had a location.href in it, we want to do a normal load, * so that the new url will be appended to Session History.
--- a/dom/base/nsContentSink.cpp +++ b/dom/base/nsContentSink.cpp @@ -840,20 +840,19 @@ nsContentSink::ProcessMETATag(nsIContent void nsContentSink::PrefetchHref(const nsAString &aHref, nsINode *aSource, bool aExplicit) { nsCOMPtr<nsIPrefetchService> prefetchService(do_GetService(NS_PREFETCHSERVICE_CONTRACTID)); if (prefetchService) { // construct URI using document charset - const nsACString &charset = mDocument->GetDocumentCharacterSet(); + auto encoding = mDocument->GetDocumentCharacterSet(); nsCOMPtr<nsIURI> uri; - NS_NewURI(getter_AddRefs(uri), aHref, - charset.IsEmpty() ? nullptr : PromiseFlatCString(charset).get(), + NS_NewURI(getter_AddRefs(uri), aHref, encoding, mDocument->GetDocBaseURI()); if (uri) { nsCOMPtr<nsIDOMNode> domNode = do_QueryInterface(aSource); prefetchService->PrefetchURI(uri, mDocumentURI, domNode, aExplicit); } } } @@ -887,20 +886,19 @@ nsContentSink::PrefetchDNS(const nsAStri mDocument->NodePrincipal()->OriginAttributesRef()); } } void nsContentSink::Preconnect(const nsAString& aHref, const nsAString& aCrossOrigin) { // construct URI using document charset - const nsACString& charset = mDocument->GetDocumentCharacterSet(); + auto encoding = mDocument->GetDocumentCharacterSet(); nsCOMPtr<nsIURI> uri; - NS_NewURI(getter_AddRefs(uri), aHref, - charset.IsEmpty() ? nullptr : PromiseFlatCString(charset).get(), + NS_NewURI(getter_AddRefs(uri), aHref, encoding, mDocument->GetDocBaseURI()); if (uri && mDocument) { mDocument->MaybePreconnect(uri, dom::Element::StringToCORSMode(aCrossOrigin)); } } nsresult
--- a/dom/base/nsContentUtils.cpp +++ b/dom/base/nsContentUtils.cpp @@ -3153,19 +3153,22 @@ nsContentUtils::ObjectPrincipal(JSObject // static nsresult nsContentUtils::NewURIWithDocumentCharset(nsIURI** aResult, const nsAString& aSpec, nsIDocument* aDocument, nsIURI* aBaseURI) { - return NS_NewURI(aResult, aSpec, - aDocument ? aDocument->GetDocumentCharacterSet().get() : nullptr, - aBaseURI, sIOService); + if (aDocument) { + return NS_NewURI(aResult, aSpec, + aDocument->GetDocumentCharacterSet(), + aBaseURI, sIOService); + } + return NS_NewURI(aResult, aSpec, nullptr, aBaseURI, sIOService); } // static bool nsContentUtils::IsCustomElementName(nsIAtom* aName) { // A valid custom element name is a sequence of characters name which // must match the PotentialCustomElementName production:
--- a/dom/base/nsDOMSerializer.cpp +++ b/dom/base/nsDOMSerializer.cpp @@ -1,16 +1,17 @@ /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsDOMSerializer.h" +#include "mozilla/Encoding.h" #include "nsIDocument.h" #include "nsIDocumentEncoder.h" #include "nsIDOMDocument.h" #include "nsComponentManagerUtils.h" #include "nsContentCID.h" #include "nsContentUtils.h" #include "nsError.h" #include "nsINode.h" @@ -66,17 +67,17 @@ SetUpEncoder(nsIDOMNode *aRoot, const ns if (NS_FAILED(rv)) return rv; nsAutoCString charset(aCharset); if (charset.IsEmpty()) { nsCOMPtr<nsIDocument> doc = do_QueryInterface(domDoc); NS_ASSERTION(doc, "Need a document"); - charset = doc->GetDocumentCharacterSet(); + doc->GetDocumentCharacterSet()->Name(charset); } rv = encoder->SetCharset(charset); if (NS_FAILED(rv)) return rv; // If we are working on the entire document we do not need to // specify which part to serialize if (!entireDocument) {
--- a/dom/base/nsDocument.cpp +++ b/dom/base/nsDocument.cpp @@ -1289,17 +1289,17 @@ static already_AddRefed<mozilla::dom::No nsIDocument::nsIDocument() : nsINode(nullNodeInfo), mReferrerPolicySet(false), mReferrerPolicy(mozilla::net::RP_Unset), mBlockAllMixedContent(false), mBlockAllMixedContentPreloads(false), mUpgradeInsecureRequests(false), mUpgradeInsecurePreloads(false), - mCharacterSet(NS_LITERAL_CSTRING("windows-1252")), + mCharacterSet(WINDOWS_1252_ENCODING), mCharacterSetSource(0), mParentDocument(nullptr), mCachedRootElement(nullptr), mNodeInfoManager(nullptr), mBidiEnabled(false), mMathMLEnabled(false), mIsInitialDocumentInWindow(false), mIgnoreDocGroupMismatches(false), @@ -3673,31 +3673,32 @@ nsIDocument::DefaultStyleAttrURLData() void nsDocument::GetBaseTarget(nsAString &aBaseTarget) { aBaseTarget = mBaseTarget; } void -nsDocument::SetDocumentCharacterSet(const nsACString& aCharSetID) -{ - // XXX it would be a good idea to assert the sanity of the argument, - // but before we figure out what to do about non-Encoding Standard - // encodings in the charset menu and in mailnews, assertions are futile. - if (!mCharacterSet.Equals(aCharSetID)) { - mCharacterSet = aCharSetID; +nsDocument::SetDocumentCharacterSet(NotNull<const Encoding*> aEncoding) +{ + if (mCharacterSet != aEncoding) { + mCharacterSet = aEncoding; + + nsAutoCString charsetID; + aEncoding->Name(charsetID); + NS_ConvertASCIItoUTF16 charset16(charsetID); int32_t n = mCharSetObservers.Length(); for (int32_t i = 0; i < n; i++) { nsIObserver* observer = mCharSetObservers.ElementAt(i); observer->Observe(static_cast<nsIDocument *>(this), "charset", - NS_ConvertASCIItoUTF16(aCharSetID).get()); + charset16.get()); } } } nsresult nsDocument::AddCharSetObserver(nsIObserver* aObserver) { NS_ENSURE_ARG_POINTER(aObserver); @@ -3844,26 +3845,26 @@ nsDocument::SetHeaderData(nsIAtom* aHead mReferrerPolicySet = true; } } } void nsDocument::TryChannelCharset(nsIChannel *aChannel, int32_t& aCharsetSource, - nsACString& aCharset, + NotNull<const Encoding*>& aEncoding, nsHtml5TreeOpExecutor* aExecutor) { if (aChannel) { nsAutoCString charsetVal; nsresult rv = aChannel->GetContentCharset(charsetVal); if (NS_SUCCEEDED(rv)) { const Encoding* preferred = Encoding::ForLabel(charsetVal); if (preferred) { - preferred->Name(aCharset); + aEncoding = WrapNotNull(preferred); aCharsetSource = kCharsetFromChannel; return; } else if (aExecutor && !charsetVal.IsEmpty()) { aExecutor->ComplainAboutBogusProtocolCharset(this); } } } } @@ -6484,17 +6485,19 @@ nsDocument::GetCharacterSet(nsAString& a { nsIDocument::GetCharacterSet(aCharacterSet); return NS_OK; } void nsIDocument::GetCharacterSet(nsAString& aCharacterSet) const { - CopyASCIItoUTF16(GetDocumentCharacterSet(), aCharacterSet); + nsAutoCString charset; + GetDocumentCharacterSet()->Name(charset); + CopyASCIItoUTF16(charset, aCharacterSet); } NS_IMETHODIMP nsDocument::ImportNode(nsIDOMNode* aImportedNode, bool aDeep, uint8_t aArgc, nsIDOMNode** aResult) { @@ -6581,19 +6584,17 @@ nsIDocument::LoadBindingDocument(const n } void nsIDocument::LoadBindingDocument(const nsAString& aURI, const Maybe<nsIPrincipal*>& aSubjectPrincipal, ErrorResult& rv) { nsCOMPtr<nsIURI> uri; - rv = NS_NewURI(getter_AddRefs(uri), aURI, - mCharacterSet.get(), - GetDocBaseURI()); + rv = NS_NewURI(getter_AddRefs(uri), aURI, mCharacterSet, GetDocBaseURI()); if (rv.Failed()) { return; } // Note - This computation of subjectPrincipal isn't necessarily sensical. // It's just designed to preserve the old semantics during a mass-conversion // patch. nsCOMPtr<nsIPrincipal> subjectPrincipal = @@ -9896,18 +9897,17 @@ nsDocument::ScrollToRef() } else { rv = NS_ERROR_FAILURE; } // If UTF-8 URI failed then try to assume the string as a // document's charset. if (NS_FAILED(rv)) { - const nsACString &docCharset = GetDocumentCharacterSet(); - const Encoding* encoding = Encoding::ForName(docCharset); + auto encoding = GetDocumentCharacterSet(); rv = encoding->DecodeWithoutBOMHandling(unescapedRef, ref); if (NS_SUCCEEDED(rv) && !ref.IsEmpty()) { rv = shell->GoToAnchor(ref, mChangeScrollPosWhenScrollingToRef); } } if (NS_SUCCEEDED(rv)) {
--- a/dom/base/nsDocument.h +++ b/dom/base/nsDocument.h @@ -419,20 +419,21 @@ public: virtual void SetBaseURI(nsIURI* aURI) override; /** * Get/Set the base target of a link in a document. */ virtual void GetBaseTarget(nsAString &aBaseTarget) override; /** - * Return a standard name for the document's character set. This will + * Set the document's character encoding. This will * trigger a startDocumentLoad if necessary to answer the question. */ - virtual void SetDocumentCharacterSet(const nsACString& aCharSetID) override; + virtual void + SetDocumentCharacterSet(NotNull<const Encoding*> aEncoding) override; /** * Add an observer that gets notified whenever the charset changes. */ virtual nsresult AddCharSetObserver(nsIObserver* aObserver) override; /** * Remove a charset observer. @@ -1043,17 +1044,17 @@ protected: void ReportEmptyGetElementByIdArg(); void DispatchContentLoadedEvents(); void RetrieveRelevantHeaders(nsIChannel *aChannel); void TryChannelCharset(nsIChannel *aChannel, int32_t& aCharsetSource, - nsACString& aCharset, + NotNull<const Encoding*>& aEncoding, nsHtml5TreeOpExecutor* aExecutor); // Call this before the document does something that will unbind all content. // That will stop us from doing a lot of work as each element is removed. void DestroyElementMaps(); // Refreshes the hrefs of all the links in the document. void RefreshLinkHrefs();
--- a/dom/base/nsFrameLoader.cpp +++ b/dom/base/nsFrameLoader.cpp @@ -267,26 +267,25 @@ nsFrameLoader::LoadFrame() } if (doc->IsLoadedAsInteractiveData()) { // XBL bindings doc shouldn't load sub-documents. return NS_OK; } nsCOMPtr<nsIURI> base_uri = mOwnerContent->GetBaseURI(); - const nsCString& doc_charset = doc->GetDocumentCharacterSet(); - const char *charset = doc_charset.IsEmpty() ? nullptr : doc_charset.get(); + auto encoding = doc->GetDocumentCharacterSet(); nsCOMPtr<nsIURI> uri; - nsresult rv = NS_NewURI(getter_AddRefs(uri), src, charset, base_uri); + nsresult rv = NS_NewURI(getter_AddRefs(uri), src, encoding, base_uri); // If the URI was malformed, try to recover by loading about:blank. if (rv == NS_ERROR_MALFORMED_URI) { rv = NS_NewURI(getter_AddRefs(uri), NS_LITERAL_STRING("about:blank"), - charset, base_uri); + encoding, base_uri); } if (NS_SUCCEEDED(rv)) { rv = LoadURI(uri); } if (NS_FAILED(rv)) { FireErrorEvent();
--- a/dom/base/nsGlobalWindow.cpp +++ b/dom/base/nsGlobalWindow.cpp @@ -13392,24 +13392,24 @@ nsGlobalWindow::SecurityCheckURL(const c JSAutoCompartment ac(cx, sourceWin->GetGlobalJSObject()); // Resolve the baseURI, which could be relative to the calling window. // // Note the algorithm to get the base URI should match the one // used to actually kick off the load in nsWindowWatcher.cpp. nsCOMPtr<nsIDocument> doc = sourceWindow->GetDoc(); nsIURI* baseURI = nullptr; - nsAutoCString charset(NS_LITERAL_CSTRING("UTF-8")); // default to utf-8 + auto encoding = UTF_8_ENCODING; // default to utf-8 if (doc) { baseURI = doc->GetDocBaseURI(); - charset = doc->GetDocumentCharacterSet(); + encoding = doc->GetDocumentCharacterSet(); } nsCOMPtr<nsIURI> uri; nsresult rv = NS_NewURI(getter_AddRefs(uri), nsDependentCString(aURL), - charset.get(), baseURI); + encoding, baseURI); if (NS_WARN_IF(NS_FAILED(rv))) { return rv; } if (NS_FAILED(nsContentUtils::GetSecurityManager()-> CheckLoadURIFromScript(cx, uri))) { return NS_ERROR_FAILURE; }
--- a/dom/base/nsIDocument.h +++ b/dom/base/nsIDocument.h @@ -29,16 +29,17 @@ #include "mozilla/WeakPtr.h" #include "Units.h" #include "nsContentListDeclarations.h" #include "nsExpirationTracker.h" #include "nsClassHashtable.h" #include "mozilla/CORSMode.h" #include "mozilla/dom/DispatcherTrait.h" #include "mozilla/LinkedList.h" +#include "mozilla/NotNull.h" #include "mozilla/SegmentedVector.h" #include "mozilla/StyleBackendType.h" #include "mozilla/StyleSheet.h" #include "mozilla/TimeStamp.h" #include "mozilla/UniquePtr.h" #include <bitset> // for member #ifdef MOZILLA_INTERNAL_API @@ -99,16 +100,17 @@ class nsWindowSizes; class nsDOMCaretPosition; class nsViewportInfo; class nsIGlobalObject; struct nsCSSSelectorList; namespace mozilla { class AbstractThread; class CSSStyleSheet; +class Encoding; class ErrorResult; class EventStates; class PendingAnimationTracker; class StyleSetHandle; template<typename> class OwningNonNull; struct URLExtraData; namespace css { @@ -206,16 +208,20 @@ class nsContentList; // Document interface. This is implemented by all document objects in // Gecko. class nsIDocument : public nsINode, public mozilla::dom::DispatcherTrait { typedef mozilla::dom::GlobalObject GlobalObject; +protected: + using Encoding = mozilla::Encoding; + template <typename T> using NotNull = mozilla::NotNull<T>; + public: typedef mozilla::net::ReferrerPolicy ReferrerPolicyEnum; typedef mozilla::dom::Element Element; typedef mozilla::dom::FullscreenRequest FullscreenRequest; NS_DECLARE_STATIC_IID_ACCESSOR(NS_IDOCUMENT_IID) #ifdef MOZILLA_INTERNAL_API @@ -499,26 +505,25 @@ public: virtual void GetBaseTarget(nsAString &aBaseTarget) = 0; void SetBaseTarget(const nsString& aBaseTarget) { mBaseTarget = aBaseTarget; } /** * Return a standard name for the document's character set. */ - const nsCString& GetDocumentCharacterSet() const + NotNull<const Encoding*> GetDocumentCharacterSet() const { return mCharacterSet; } /** - * Set the document's character encoding. |aCharSetID| should be canonical. - * That is, callers are responsible for the charset alias resolution. + * Set the document's character encoding. */ - virtual void SetDocumentCharacterSet(const nsACString& aCharSetID) = 0; + virtual void SetDocumentCharacterSet(NotNull<const Encoding*> aEncoding) = 0; int32_t GetDocumentCharacterSetSource() const { return mCharacterSetSource; } // This method MUST be called before SetDocumentCharacterSet if // you're planning to call both. @@ -3056,17 +3061,17 @@ protected: // if nsMixedContentBlocker requires sending an HSTS priming request, // temporarily store that in the document so that it can be propogated to the // LoadInfo and eventually the HTTP Channel nsDataHashtable<nsURIHashKey, HSTSPrimingState> mHSTSPrimingURIList; mozilla::WeakPtr<nsDocShell> mDocumentContainer; - nsCString mCharacterSet; + NotNull<const Encoding*> mCharacterSet; int32_t mCharacterSetSource; // This is just a weak pointer; the parent document owns its children. nsIDocument* mParentDocument; // A reference to the element last returned from GetRootElement(). mozilla::dom::Element* mCachedRootElement;
--- a/dom/base/nsImageLoadingContent.cpp +++ b/dom/base/nsImageLoadingContent.cpp @@ -1123,22 +1123,22 @@ nsImageLoadingContent::StringToURI(const NS_PRECONDITION(aDocument, "Must have a document"); NS_PRECONDITION(aURI, "Null out param"); // (1) Get the base URI nsIContent* thisContent = AsContent(); nsCOMPtr<nsIURI> baseURL = thisContent->GetBaseURI(); // (2) Get the charset - const nsCString& charset = aDocument->GetDocumentCharacterSet(); + auto encoding = aDocument->GetDocumentCharacterSet(); // (3) Construct the silly thing return NS_NewURI(aURI, aSpec, - charset.IsEmpty() ? nullptr : charset.get(), + encoding, baseURL, nsContentUtils::GetIOService()); } nsresult nsImageLoadingContent::FireEvent(const nsAString& aEventType, bool aIsCancelable) { if (nsContentUtils::DocumentInactiveForImageLoads(GetOurOwnerDoc())) {
--- a/dom/base/nsReferencedElement.cpp +++ b/dom/base/nsReferencedElement.cpp @@ -28,18 +28,20 @@ nsReferencedElement::Reset(nsIContent* a nsAutoCString refPart; aURI->GetRef(refPart); // Unescape %-escapes in the reference. The result will be in the // origin charset of the URL, hopefully... NS_UnescapeURL(refPart); nsAutoCString charset; aURI->GetOriginCharset(charset); - const Encoding* encoding = charset.IsEmpty() ? - UTF_8_ENCODING : Encoding::ForName(charset); + auto encoding = Encoding::ForLabelNoReplacement(charset); + if (!encoding) { + encoding = UTF_8_ENCODING; + } nsAutoString ref; nsresult rv = encoding->DecodeWithoutBOMHandling(refPart, ref); if (NS_FAILED(rv) || ref.IsEmpty()) { return; } rv = NS_OK; // Get the current document
--- a/dom/encoding/FallbackEncoding.cpp +++ b/dom/encoding/FallbackEncoding.cpp @@ -31,86 +31,87 @@ static constexpr nsUConvProp nonParticip }; NS_IMPL_ISUPPORTS(FallbackEncoding, nsIObserver) FallbackEncoding* FallbackEncoding::sInstance = nullptr; bool FallbackEncoding::sGuessFallbackFromTopLevelDomain = true; FallbackEncoding::FallbackEncoding() + : mFallback(nullptr) { MOZ_ASSERT(!FallbackEncoding::sInstance, "Singleton already exists."); } -void -FallbackEncoding::Get(nsACString& aFallback) +NotNull<const Encoding*> +FallbackEncoding::Get() { - if (!mFallback.IsEmpty()) { - aFallback = mFallback; - return; + if (mFallback) { + return WrapNotNull(mFallback); } const nsAdoptingCString& override = Preferences::GetCString("intl.charset.fallback.override"); // Don't let the user break things by setting the override to unreasonable // values via about:config - const Encoding* encoding = Encoding::ForLabel(override); + auto encoding = Encoding::ForLabel(override); if (!encoding || !encoding->IsAsciiCompatible() || encoding == UTF_8_ENCODING) { - mFallback.Truncate(); + mFallback = nullptr; } else { - encoding->Name(mFallback); + mFallback = encoding; } - if (!mFallback.IsEmpty()) { - aFallback = mFallback; - return; + if (mFallback) { + return WrapNotNull(mFallback); } nsAutoCString locale; LocaleService::GetInstance()->GetAppLocaleAsLangTag(locale); // Let's lower case the string just in case unofficial language packs // don't stick to conventions. ToLowerCase(locale); // ASCII lowercasing with CString input! // Special case Traditional Chinese before throwing away stuff after the // language itself. Today we only ship zh-TW, but be defensive about // possible future values. if (locale.EqualsLiteral("zh-tw") || locale.EqualsLiteral("zh-hk") || locale.EqualsLiteral("zh-mo") || locale.EqualsLiteral("zh-hant")) { - mFallback.AssignLiteral("Big5"); - aFallback = mFallback; - return; + mFallback = BIG5_ENCODING; + return WrapNotNull(mFallback); } // Throw away regions and other variants to accommodate weird stuff seen // in telemetry--apparently unofficial language packs. int32_t index = locale.FindChar('-'); if (index >= 0) { locale.Truncate(index); } + nsAutoCString fallback; if (NS_FAILED(nsUConvPropertySearch::SearchPropertyValue( - localesFallbacks, ArrayLength(localesFallbacks), locale, mFallback))) { - mFallback.AssignLiteral("windows-1252"); + localesFallbacks, ArrayLength(localesFallbacks), locale, fallback))) { + mFallback = WINDOWS_1252_ENCODING; + } else { + mFallback = Encoding::ForName(fallback); } - aFallback = mFallback; + return WrapNotNull(mFallback); } -void -FallbackEncoding::FromLocale(nsACString& aFallback) +NotNull<const Encoding*> +FallbackEncoding::FromLocale() { MOZ_ASSERT(FallbackEncoding::sInstance, "Using uninitialized fallback cache."); - FallbackEncoding::sInstance->Get(aFallback); + return FallbackEncoding::sInstance->Get(); } // PrefChangedFunc void FallbackEncoding::PrefChanged(const char*, void*) { MOZ_ASSERT(FallbackEncoding::sInstance, "Pref callback called with null fallback cache."); @@ -164,20 +165,21 @@ FallbackEncoding::IsParticipatingTopLeve nsAutoCString dummy; return NS_FAILED(nsUConvPropertySearch::SearchPropertyValue( nonParticipatingDomains, ArrayLength(nonParticipatingDomains), aTLD, dummy)); } -void -FallbackEncoding::FromTopLevelDomain(const nsACString& aTLD, - nsACString& aFallback) +NotNull<const Encoding*> +FallbackEncoding::FromTopLevelDomain(const nsACString& aTLD) { + nsAutoCString fallback; if (NS_FAILED(nsUConvPropertySearch::SearchPropertyValue( - domainsFallbacks, ArrayLength(domainsFallbacks), aTLD, aFallback))) { - aFallback.AssignLiteral("windows-1252"); + domainsFallbacks, ArrayLength(domainsFallbacks), aTLD, fallback))) { + return WINDOWS_1252_ENCODING; } + return Encoding::ForName(fallback); } } // namespace dom } // namespace mozilla
--- a/dom/encoding/FallbackEncoding.h +++ b/dom/encoding/FallbackEncoding.h @@ -2,20 +2,22 @@ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef mozilla_dom_FallbackEncoding_h_ #define mozilla_dom_FallbackEncoding_h_ +#include "mozilla/NotNull.h" #include "nsIObserver.h" #include "nsString.h" namespace mozilla { +class Encoding; namespace dom { class FallbackEncoding : public nsIObserver { public: NS_DECL_ISUPPORTS NS_DECL_NSIOBSERVER @@ -25,34 +27,34 @@ public: static bool sGuessFallbackFromTopLevelDomain; /** * Gets the locale-dependent fallback encoding for legacy HTML and plain * text content. * * @param aFallback the outparam for the fallback encoding */ - static void FromLocale(nsACString& aFallback); + static NotNull<const Encoding*> FromLocale(); /** * Checks if it is appropriate to call FromTopLevelDomain() for a given TLD. * * @param aTLD the top-level domain (in Punycode) * @return true if OK to call FromTopLevelDomain() */ static bool IsParticipatingTopLevelDomain(const nsACString& aTLD); /** * Gets a top-level domain-depedendent fallback encoding for legacy HTML * and plain text content * * @param aTLD the top-level domain (in Punycode) * @param aFallback the outparam for the fallback encoding */ - static void FromTopLevelDomain(const nsACString& aTLD, nsACString& aFallback); + static NotNull<const Encoding*> FromTopLevelDomain(const nsACString& aTLD); // public API ends here! /** * Allocate sInstance used by FromLocale(). * To be called from nsLayoutStatics only. */ static void Initialize(); @@ -73,27 +75,27 @@ private: FallbackEncoding(); virtual ~FallbackEncoding() {}; /** * Invalidates the cache. */ void Invalidate() { - mFallback.Truncate(); + mFallback = nullptr; } static void PrefChanged(const char*, void*); /** * Gets the fallback encoding label. * @param aFallback the fallback encoding */ - void Get(nsACString& aFallback); + NotNull<const Encoding*> Get(); - nsCString mFallback; + const Encoding* mFallback; }; } // namespace dom } // namespace mozilla #endif // mozilla_dom_FallbackEncoding_h_
--- a/dom/html/HTMLFormSubmission.cpp +++ b/dom/html/HTMLFormSubmission.cpp @@ -882,17 +882,17 @@ GetSubmitEncoding(nsGenericHTMLElement* } offset = spPos + 1; } while (spPos != -1); } // if there are no accept-charset or all the charset are not supported // Get the charset from document nsIDocument* doc = aForm->GetComposedDoc(); if (doc) { - return Encoding::ForName(doc->GetDocumentCharacterSet()); + return doc->GetDocumentCharacterSet(); } return UTF_8_ENCODING; } void GetEnumAttr(nsGenericHTMLElement* aContent, nsIAtom* atom, int32_t* aValue) {
--- a/dom/html/MediaDocument.cpp +++ b/dom/html/MediaDocument.cpp @@ -166,27 +166,26 @@ MediaDocument::StartDocumentLoad(const c // in UTF-8, we don't lose anything because the default empty value is // considered synonymous with UTF-8. nsCOMPtr<nsIDocShell> docShell(do_QueryInterface(aContainer)); // not being able to set the charset is not critical. NS_ENSURE_TRUE(docShell, NS_OK); - nsAutoCString charset; + const Encoding* encoding; int32_t source; nsCOMPtr<nsIPrincipal> principal; // opening in a new tab - docShell->GetParentCharset(charset, &source, getter_AddRefs(principal)); + docShell->GetParentCharset(encoding, &source, getter_AddRefs(principal)); - if (!charset.IsEmpty() && - !charset.EqualsLiteral("UTF-8") && + if (encoding && encoding != UTF_8_ENCODING && NodePrincipal()->Equals(principal)) { SetDocumentCharacterSetSource(source); - SetDocumentCharacterSet(charset); + SetDocumentCharacterSet(WrapNotNull(encoding)); } return NS_OK; } void MediaDocument::BecomeInteractive() { @@ -294,21 +293,24 @@ MediaDocument::GetFileName(nsAString& aR nsAutoCString docCharset; // Now that the charset is set in |StartDocumentLoad| to the charset of // the document viewer instead of a bogus value ("windows-1252" set in // |nsDocument|'s ctor), the priority is given to the current charset. // This is necessary to deal with a media document being opened in a new // window or a new tab, in which case |originCharset| of |nsIURI| is not // reliable. if (mCharacterSetSource != kCharsetUninitialized) { - docCharset = mCharacterSet; + mCharacterSet->Name(docCharset); } else { // resort to |originCharset| url->GetOriginCharset(docCharset); - SetDocumentCharacterSet(docCharset); + auto encoding = Encoding::ForLabelNoReplacement(docCharset); + if (encoding) { + SetDocumentCharacterSet(WrapNotNull(encoding)); + } } nsresult rv; nsCOMPtr<nsITextToSubURI> textToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv); if (NS_SUCCEEDED(rv)) { // UnEscapeURIForUI always succeeds textToSubURI->UnEscapeURIForUI(docCharset, fileName, aResult);
--- a/dom/html/nsHTMLContentSink.cpp +++ b/dom/html/nsHTMLContentSink.cpp @@ -131,17 +131,17 @@ public: // nsIContentSink NS_IMETHOD WillParse(void) override; NS_IMETHOD WillBuildModel(nsDTDMode aDTDMode) override; NS_IMETHOD DidBuildModel(bool aTerminated) override; NS_IMETHOD WillInterrupt(void) override; NS_IMETHOD WillResume(void) override; NS_IMETHOD SetParser(nsParserBase* aParser) override; virtual void FlushPendingNotifications(FlushType aType) override; - NS_IMETHOD SetDocumentCharset(nsACString& aCharset) override; + virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding) override; virtual nsISupports *GetTarget() override; virtual bool IsScriptExecuting() override; // nsIHTMLContentSink NS_IMETHOD OpenContainer(ElementType aNodeType) override; NS_IMETHOD CloseContainer(ElementType aTag) override; protected: @@ -1082,21 +1082,20 @@ HTMLContentSink::FlushTags() if (!mNotifiedRootInsertion) { NotifyRootInsertion(); return NS_OK; } return mCurrentContext ? mCurrentContext->FlushTags() : NS_OK; } -NS_IMETHODIMP -HTMLContentSink::SetDocumentCharset(nsACString& aCharset) +void +HTMLContentSink::SetDocumentCharset(NotNull<const Encoding*> aEncoding) { MOZ_ASSERT_UNREACHABLE("<meta charset> case doesn't occur with about:blank"); - return NS_ERROR_NOT_IMPLEMENTED; } nsISupports * HTMLContentSink::GetTarget() { return mDocument; }
--- a/dom/html/nsHTMLDocument.cpp +++ b/dom/html/nsHTMLDocument.cpp @@ -159,16 +159,22 @@ IsAsciiCompatible(const nsACString& aPre aPreferredName.LowerCaseEqualsLiteral("utf-16be") || aPreferredName.LowerCaseEqualsLiteral("utf-16le") || aPreferredName.LowerCaseEqualsLiteral("replacement") || aPreferredName.LowerCaseEqualsLiteral("hz-gb-2312") || aPreferredName.LowerCaseEqualsLiteral("utf-7") || aPreferredName.LowerCaseEqualsLiteral("x-imap4-modified-utf7")); } +static bool +IsAsciiCompatible(const Encoding* aEncoding) +{ + return aEncoding->IsAsciiCompatible() || aEncoding == ISO_2022_JP_ENCODING; +} + nsresult NS_NewHTMLDocument(nsIDocument** aInstancePtrResult, bool aLoadedAsData) { RefPtr<nsHTMLDocument> doc = new nsHTMLDocument(); nsresult rv = doc->Init(); if (NS_FAILED(rv)) { @@ -281,92 +287,94 @@ nsHTMLDocument::ResetToURI(nsIURI *aURI, // Make the content type default to "text/html", we are a HTML // document, after all. Once we start getting data, this may be // changed. SetContentTypeInternal(nsDependentCString("text/html")); } void nsHTMLDocument::TryHintCharset(nsIContentViewer* aCv, - int32_t& aCharsetSource, nsACString& aCharset) + int32_t& aCharsetSource, + NotNull<const Encoding*>& aEncoding) { if (aCv) { int32_t requestCharsetSource; nsresult rv = aCv->GetHintCharacterSetSource(&requestCharsetSource); if(NS_SUCCEEDED(rv) && kCharsetUninitialized != requestCharsetSource) { nsAutoCString requestCharset; rv = aCv->GetHintCharacterSet(requestCharset); aCv->SetHintCharacterSetSource((int32_t)(kCharsetUninitialized)); - if(requestCharsetSource <= aCharsetSource) + if (requestCharsetSource <= aCharsetSource) return; - if(NS_SUCCEEDED(rv) && IsAsciiCompatible(requestCharset)) { - aCharsetSource = requestCharsetSource; - aCharset = requestCharset; - + if (NS_SUCCEEDED(rv) && !requestCharset.IsEmpty()) { + auto encoding = Encoding::ForName(requestCharset); + if (IsAsciiCompatible(encoding)) { + aCharsetSource = requestCharsetSource; + aEncoding = encoding; + } return; } } } return; } void nsHTMLDocument::TryUserForcedCharset(nsIContentViewer* aCv, nsIDocShell* aDocShell, int32_t& aCharsetSource, - nsACString& aCharset) + NotNull<const Encoding*>& aEncoding) { nsresult rv = NS_OK; if(kCharsetFromUserForced <= aCharsetSource) return; - // mCharacterSet not updated yet for channel, so check aCharset, too. - if (WillIgnoreCharsetOverride() || !IsAsciiCompatible(aCharset)) { + // mCharacterSet not updated yet for channel, so check aEncoding, too. + if (WillIgnoreCharsetOverride() || !IsAsciiCompatible(aEncoding)) { return; } nsAutoCString forceCharsetFromDocShell; if (aCv) { // XXX mailnews-only rv = aCv->GetForceCharacterSet(forceCharsetFromDocShell); } if(NS_SUCCEEDED(rv) && !forceCharsetFromDocShell.IsEmpty() && IsAsciiCompatible(forceCharsetFromDocShell)) { - aCharset = forceCharsetFromDocShell; + aEncoding = Encoding::ForName(forceCharsetFromDocShell); aCharsetSource = kCharsetFromUserForced; return; } if (aDocShell) { // This is the Character Encoding menu code path in Firefox - nsAutoCString charset; - rv = aDocShell->GetForcedCharset(charset); - - if (NS_SUCCEEDED(rv) && !charset.IsEmpty()) { - if (!IsAsciiCompatible(charset)) { + auto encoding = nsDocShell::Cast(aDocShell)->GetForcedCharset(); + + if (encoding) { + if (!IsAsciiCompatible(encoding)) { return; } - aCharset = charset; + aEncoding = WrapNotNull(encoding); aCharsetSource = kCharsetFromUserForced; aDocShell->SetForcedCharset(NS_LITERAL_CSTRING("")); } } } void nsHTMLDocument::TryCacheCharset(nsICachingChannel* aCachingChannel, int32_t& aCharsetSource, - nsACString& aCharset) + NotNull<const Encoding*>& aEncoding) { nsresult rv; if (kCharsetFromCache <= aCharsetSource) { return; } nsCString cachedCharset; @@ -384,72 +392,72 @@ nsHTMLDocument::TryCacheCharset(nsICachi return; } // Check IsAsciiCompatible() even in the cache case, because the value // might be stale and in the case of a stale charset that is not a rough // ASCII superset, the parser has no way to recover. if (!encoding->IsAsciiCompatible() && encoding != ISO_2022_JP_ENCODING) { return; } - encoding->Name(cachedCharset); - aCharset = cachedCharset; + aEncoding = WrapNotNull(encoding); aCharsetSource = kCharsetFromCache; } void nsHTMLDocument::TryParentCharset(nsIDocShell* aDocShell, int32_t& aCharsetSource, - nsACString& aCharset) + NotNull<const Encoding*>& aEncoding) { if (!aDocShell) { return; } if (aCharsetSource >= kCharsetFromParentForced) { return; } int32_t parentSource; - nsAutoCString parentCharset; + const Encoding* parentCharset; nsCOMPtr<nsIPrincipal> parentPrincipal; aDocShell->GetParentCharset(parentCharset, &parentSource, getter_AddRefs(parentPrincipal)); - if (parentCharset.IsEmpty()) { + if (!parentCharset) { return; } if (kCharsetFromParentForced == parentSource || kCharsetFromUserForced == parentSource) { if (WillIgnoreCharsetOverride() || - !IsAsciiCompatible(aCharset) || // if channel said UTF-16 + !IsAsciiCompatible(aEncoding) || // if channel said UTF-16 !IsAsciiCompatible(parentCharset)) { return; } - aCharset.Assign(parentCharset); + aEncoding = WrapNotNull(parentCharset); aCharsetSource = kCharsetFromParentForced; return; } if (aCharsetSource >= kCharsetFromParentFrame) { return; } if (kCharsetFromCache <= parentSource) { // Make sure that's OK if (!NodePrincipal()->Equals(parentPrincipal) || !IsAsciiCompatible(parentCharset)) { return; } - aCharset.Assign(parentCharset); + aEncoding = WrapNotNull(parentCharset); aCharsetSource = kCharsetFromParentFrame; } } void -nsHTMLDocument::TryTLD(int32_t& aCharsetSource, nsACString& aCharset) +nsHTMLDocument::TryTLD(int32_t& aCharsetSource, + NotNull<const Encoding*>& aEncoding) { if (aCharsetSource >= kCharsetFromTopLevelDomain) { return; } if (!FallbackEncoding::sGuessFallbackFromTopLevelDomain) { return; } if (!mDocumentURI) { @@ -495,39 +503,42 @@ nsHTMLDocument::TryTLD(int32_t& aCharset seenNonDigit = true; break; } } if (!seenNonDigit) { return; } aCharsetSource = kCharsetFromTopLevelDomain; - FallbackEncoding::FromTopLevelDomain(tld, aCharset); + aEncoding = FallbackEncoding::FromTopLevelDomain(tld); } void -nsHTMLDocument::TryFallback(int32_t& aCharsetSource, nsACString& aCharset) +nsHTMLDocument::TryFallback(int32_t& aCharsetSource, + NotNull<const Encoding*>& aEncoding) { if (kCharsetFromFallback <= aCharsetSource) return; aCharsetSource = kCharsetFromFallback; - FallbackEncoding::FromLocale(aCharset); + aEncoding = FallbackEncoding::FromLocale(); } void -nsHTMLDocument::SetDocumentCharacterSet(const nsACString& aCharSetID) +nsHTMLDocument::SetDocumentCharacterSet(NotNull<const Encoding*> aEncoding) { - nsDocument::SetDocumentCharacterSet(aCharSetID); + nsDocument::SetDocumentCharacterSet(aEncoding); // Make sure to stash this charset on our channel as needed if it's a wyciwyg // channel. nsCOMPtr<nsIWyciwygChannel> wyciwygChannel = do_QueryInterface(mChannel); if (wyciwygChannel) { + nsAutoCString charset; + aEncoding->Name(charset); wyciwygChannel->SetCharsetAndSource(GetDocumentCharacterSetSource(), - aCharSetID); + charset); } } nsresult nsHTMLDocument::StartDocumentLoad(const char* aCommand, nsIChannel* aChannel, nsILoadGroup* aLoadGroup, nsISupports* aContainer, @@ -666,48 +677,45 @@ nsHTMLDocument::StartDocumentLoad(const nsAutoCString urlSpec; uri->GetSpec(urlSpec); #ifdef DEBUG_charset printf("Determining charset for %s\n", urlSpec.get()); #endif // These are the charset source and charset for our document int32_t charsetSource; - nsAutoCString charset; + auto encoding = UTF_8_ENCODING; // These are the charset source and charset for the parser. This can differ // from that for the document if the channel is a wyciwyg channel. int32_t parserCharsetSource; - nsAutoCString parserCharset; + auto parserCharset = UTF_8_ENCODING; nsCOMPtr<nsIWyciwygChannel> wyciwygChannel; // For error reporting and referrer policy setting nsHtml5TreeOpExecutor* executor = nullptr; if (loadAsHtml5) { executor = static_cast<nsHtml5TreeOpExecutor*> (mParser->GetContentSink()); if (mReferrerPolicySet) { // CSP may have set the referrer policy, so a speculative parser should // start with the new referrer policy. executor->SetSpeculationReferrerPolicy(static_cast<ReferrerPolicy>(mReferrerPolicy)); } } if (forceUtf8) { charsetSource = kCharsetFromUtf8OnlyMime; - charset.AssignLiteral("UTF-8"); parserCharsetSource = charsetSource; - parserCharset = charset; } else if (!IsHTMLDocument() || !docShell) { // no docshell for text/html XHR charsetSource = IsHTMLDocument() ? kCharsetFromFallback : kCharsetFromDocTypeDefault; - charset.AssignLiteral("UTF-8"); - TryChannelCharset(aChannel, charsetSource, charset, executor); + TryChannelCharset(aChannel, charsetSource, encoding, executor); + parserCharset = encoding; parserCharsetSource = charsetSource; - parserCharset = charset; } else { NS_ASSERTION(docShell, "Unexpected null value"); charsetSource = kCharsetUninitialized; wyciwygChannel = do_QueryInterface(aChannel); // The following will try to get the character encoding from various // sources. Each Try* function will return early if the source is already @@ -721,76 +729,83 @@ nsHTMLDocument::StartDocumentLoad(const if (!wyciwygChannel) { // Otherwise, try the channel's charset (e.g., charset from HTTP // "Content-Type" header) first. This way, we get to reject overrides in // TryParentCharset and TryUserForcedCharset if the channel said UTF-16. // This is to avoid socially engineered XSS by adding user-supplied // content to a UTF-16 site such that the byte have a dangerous // interpretation as ASCII and the user can be lured to using the // charset menu. - TryChannelCharset(aChannel, charsetSource, charset, executor); + TryChannelCharset(aChannel, charsetSource, encoding, executor); } - TryUserForcedCharset(cv, docShell, charsetSource, charset); - - TryHintCharset(cv, charsetSource, charset); // XXX mailnews-only - TryParentCharset(docShell, charsetSource, charset); + TryUserForcedCharset(cv, docShell, charsetSource, encoding); + + TryHintCharset(cv, charsetSource, encoding); // XXX mailnews-only + TryParentCharset(docShell, charsetSource, encoding); if (cachingChan && !urlSpec.IsEmpty()) { - TryCacheCharset(cachingChan, charsetSource, charset); + TryCacheCharset(cachingChan, charsetSource, encoding); } - TryTLD(charsetSource, charset); - TryFallback(charsetSource, charset); + TryTLD(charsetSource, encoding); + TryFallback(charsetSource, encoding); if (wyciwygChannel) { // We know for sure that the parser needs to be using UTF16. - parserCharset = "UTF-16LE"; + parserCharset = UTF_16LE_ENCODING; parserCharsetSource = charsetSource < kCharsetFromChannel ? kCharsetFromChannel : charsetSource; nsAutoCString cachedCharset; int32_t cachedSource; rv = wyciwygChannel->GetCharsetAndSource(&cachedSource, cachedCharset); if (NS_SUCCEEDED(rv)) { if (cachedSource > charsetSource) { - charsetSource = cachedSource; - charset = cachedCharset; + auto cachedEncoding = Encoding::ForLabel(cachedCharset); + if (!cachedEncoding && cachedCharset.EqualsLiteral("replacement")) { + cachedEncoding = REPLACEMENT_ENCODING; + } + if (cachedEncoding) { + charsetSource = cachedSource; + encoding = WrapNotNull(cachedEncoding); + } } } else { // Don't propagate this error. rv = NS_OK; } - } else { - parserCharset = charset; + parserCharset = encoding; parserCharsetSource = charsetSource; } } SetDocumentCharacterSetSource(charsetSource); - SetDocumentCharacterSet(charset); + SetDocumentCharacterSet(encoding); if (cachingChan) { - NS_ASSERTION(charset == parserCharset, + NS_ASSERTION(encoding == parserCharset, "How did those end up different here? wyciwyg channels are " "not nsICachingChannel"); + nsAutoCString charset; + encoding->Name(charset); rv = cachingChan->SetCacheTokenCachedCharset(charset); NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "cannot SetMetaDataElement"); rv = NS_OK; // don't propagate error } // Set the parser as the stream listener for the document loader... rv = NS_OK; nsCOMPtr<nsIStreamListener> listener = mParser->GetStreamListener(); listener.forget(aDocListener); #ifdef DEBUG_charset printf(" charset = %s source %d\n", - charset.get(), charsetSource); + charset.get(), charsetSource); #endif mParser->SetDocumentCharset(parserCharset, parserCharsetSource); mParser->SetCommand(aCommand); if (!IsHTMLDocument()) { MOZ_ASSERT(!loadAsHtml5); nsCOMPtr<nsIXMLContentSink> xmlsink; NS_NewXMLContentSink(getter_AddRefs(xmlsink), this, uri, @@ -2411,18 +2426,19 @@ nsHTMLDocument::CreateAndAddWyciwygChann mWyciwygChannel = do_QueryInterface(channel); mWyciwygChannel->SetSecurityInfo(mSecurityInfo); // Note: we want to treat this like a "previous document" hint so that, // e.g. a <meta> tag in the document.write content can override it. SetDocumentCharacterSetSource(kCharsetFromHintPrevDoc); - mWyciwygChannel->SetCharsetAndSource(kCharsetFromHintPrevDoc, - GetDocumentCharacterSet()); + nsAutoCString charset; + GetDocumentCharacterSet()->Name(charset); + mWyciwygChannel->SetCharsetAndSource(kCharsetFromHintPrevDoc, charset); // Inherit load flags from the original document's channel channel->SetLoadFlags(mLoadFlags); nsCOMPtr<nsILoadGroup> loadGroup = GetDocumentLoadGroup(); // Use the Parent document's loadgroup to trigger load notifications if (loadGroup && channel) { @@ -3737,17 +3753,18 @@ nsHTMLDocument::WillIgnoreCharsetOverrid { if (mType != eHTML) { MOZ_ASSERT(mType == eXHTML); return true; } if (mCharacterSetSource >= kCharsetFromByteOrderMark) { return true; } - if (!IsAsciiCompatible(mCharacterSet)) { + if (!mCharacterSet->IsAsciiCompatible() && + mCharacterSet != ISO_2022_JP_ENCODING) { return true; } nsCOMPtr<nsIWyciwygChannel> wyciwyg = do_QueryInterface(mChannel); if (wyciwyg) { return true; } nsIURI* uri = GetOriginalURI(); if (uri) {
--- a/dom/html/nsHTMLDocument.h +++ b/dom/html/nsHTMLDocument.h @@ -316,31 +316,34 @@ protected: /** # of forms in the document, synchronously set */ int32_t mNumForms; static uint32_t gWyciwygSessionCnt; static void TryHintCharset(nsIContentViewer* aContentViewer, int32_t& aCharsetSource, - nsACString& aCharset); + NotNull<const Encoding*>& aEncoding); void TryUserForcedCharset(nsIContentViewer* aCv, nsIDocShell* aDocShell, int32_t& aCharsetSource, - nsACString& aCharset); + NotNull<const Encoding*>& aEncoding); static void TryCacheCharset(nsICachingChannel* aCachingChannel, - int32_t& aCharsetSource, - nsACString& aCharset); + int32_t& aCharsetSource, + NotNull<const Encoding*>& aEncoding); void TryParentCharset(nsIDocShell* aDocShell, - int32_t& charsetSource, nsACString& aCharset); - void TryTLD(int32_t& aCharsetSource, nsACString& aCharset); - static void TryFallback(int32_t& aCharsetSource, nsACString& aCharset); + int32_t& charsetSource, + NotNull<const Encoding*>& aEncoding); + void TryTLD(int32_t& aCharsetSource, NotNull<const Encoding*>& aCharset); + static void TryFallback(int32_t& aCharsetSource, + NotNull<const Encoding*>& aEncoding); // Override so we can munge the charset on our wyciwyg channel as needed. - virtual void SetDocumentCharacterSet(const nsACString& aCharSetID) override; + virtual void + SetDocumentCharacterSet(NotNull<const Encoding*> aEncoding) override; // Tracks if we are currently processing any document.write calls (either // implicit or explicit). Note that if a write call writes out something which // would block the parser, then mWriteLevel will be incorrect until the parser // finishes processing that script. uint32_t mWriteLevel; // Load flags of the document's channel
--- a/dom/notification/Notification.cpp +++ b/dom/notification/Notification.cpp @@ -1,16 +1,17 @@ /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "mozilla/dom/Notification.h" +#include "mozilla/Encoding.h" #include "mozilla/JSONWriter.h" #include "mozilla/Move.h" #include "mozilla/OwningNonNull.h" #include "mozilla/Preferences.h" #include "mozilla/Services.h" #include "mozilla/Telemetry.h" #include "mozilla/Unused.h" @@ -1917,44 +1918,44 @@ Notification::ResolveIconAndSoundURL(nsS // XXXnsm If I understand correctly, the character encoding for resolving // URIs in new specs is dictated by the URL spec, which states that unless // the URL parser is passed an override encoding, the charset to be used is // UTF-8. The new Notification icon/sound specification just says to use the // Fetch API, where the Request constructor defers to URL parsing specifying // the API base URL and no override encoding. So we've to use UTF-8 on // workers, but for backwards compat keeping it document charset on main // thread. - const char* charset = "UTF-8"; + auto encoding = UTF_8_ENCODING; if (mWorkerPrivate) { baseUri = mWorkerPrivate->GetBaseURI(); } else { nsIDocument* doc = GetOwner() ? GetOwner()->GetExtantDoc() : nullptr; if (doc) { baseUri = doc->GetBaseURI(); - charset = doc->GetDocumentCharacterSet().get(); + encoding = doc->GetDocumentCharacterSet(); } else { NS_WARNING("No document found for main thread notification!"); return NS_ERROR_FAILURE; } } if (baseUri) { if (mIconUrl.Length() > 0) { nsCOMPtr<nsIURI> srcUri; - rv = NS_NewURI(getter_AddRefs(srcUri), mIconUrl, charset, baseUri); + rv = NS_NewURI(getter_AddRefs(srcUri), mIconUrl, encoding, baseUri); if (NS_SUCCEEDED(rv)) { nsAutoCString src; srcUri->GetSpec(src); iconUrl = NS_ConvertUTF8toUTF16(src); } } if (mBehavior.mSoundFile.Length() > 0) { nsCOMPtr<nsIURI> srcUri; - rv = NS_NewURI(getter_AddRefs(srcUri), mBehavior.mSoundFile, charset, baseUri); + rv = NS_NewURI(getter_AddRefs(srcUri), mBehavior.mSoundFile, encoding, baseUri); if (NS_SUCCEEDED(rv)) { nsAutoCString src; srcUri->GetSpec(src); soundUrl = NS_ConvertUTF8toUTF16(src); } } }
--- a/dom/presentation/PresentationRequest.cpp +++ b/dom/presentation/PresentationRequest.cpp @@ -42,21 +42,23 @@ NS_INTERFACE_MAP_END_INHERITING(DOMEvent static nsresult GetAbsoluteURL(const nsAString& aUrl, nsIURI* aBaseUri, nsIDocument* aDocument, nsAString& aAbsoluteUrl) { nsCOMPtr<nsIURI> uri; - nsresult rv = NS_NewURI(getter_AddRefs(uri), - aUrl, - aDocument ? aDocument->GetDocumentCharacterSet().get() - : nullptr, - aBaseUri); + nsresult rv; + if (aDocument) { + rv = NS_NewURI(getter_AddRefs(uri), aUrl, + aDocument->GetDocumentCharacterSet(), aBaseUri); + } else { + rv = NS_NewURI(getter_AddRefs(uri), aUrl, nullptr, aBaseUri); + } if (NS_FAILED(rv)) { return rv; } nsAutoCString spec; uri->GetSpec(spec);
--- a/dom/script/ScriptLoadHandler.cpp +++ b/dom/script/ScriptLoadHandler.cpp @@ -223,18 +223,17 @@ ScriptLoadHandler::EnsureDecoder(nsIIncr if ((encoding = Encoding::ForLabel(hintCharset))) { mDecoder = encoding->NewDecoderWithoutBOMHandling(); encoding->Name(oCharset); return true; } // Get the charset from the charset of the document. if (mScriptLoader->mDocument) { - encoding = - Encoding::ForName(mScriptLoader->mDocument->GetDocumentCharacterSet()); + encoding = mScriptLoader->mDocument->GetDocumentCharacterSet(); mDecoder = encoding->NewDecoderWithoutBOMHandling(); encoding->Name(oCharset); return true; } // Curiously, there are various callers that don't pass aDocument. The // fallback in the old code was ISO-8859-1, which behaved like // windows-1252.
--- a/dom/script/ScriptLoader.cpp +++ b/dom/script/ScriptLoader.cpp @@ -2559,17 +2559,17 @@ ScriptLoader::ConvertToUTF16(nsIChannel* } } if (!unicodeDecoder && (encoding = Encoding::ForLabel(aHintCharset))) { unicodeDecoder = encoding->NewDecoderWithoutBOMHandling(); } if (!unicodeDecoder && aDocument) { - unicodeDecoder = Encoding::ForName(aDocument->GetDocumentCharacterSet()) + unicodeDecoder = aDocument->GetDocumentCharacterSet() ->NewDecoderWithoutBOMHandling(); } if (!unicodeDecoder) { // Curiously, there are various callers that don't pass aDocument. The // fallback in the old code was ISO-8859-1, which behaved like // windows-1252. unicodeDecoder = WINDOWS_1252_ENCODING->NewDecoderWithoutBOMHandling();
--- a/dom/webbrowserpersist/WebBrowserPersistLocalDocument.cpp +++ b/dom/webbrowserpersist/WebBrowserPersistLocalDocument.cpp @@ -132,17 +132,17 @@ WebBrowserPersistLocalDocument::GetConte NS_ENSURE_SUCCESS(rv, rv); aContentType = NS_ConvertUTF16toUTF8(utf16Type); return NS_OK; } NS_IMETHODIMP WebBrowserPersistLocalDocument::GetCharacterSet(nsACString& aCharSet) { - aCharSet = GetCharacterSet(); + GetCharacterSet()->Name(aCharSet); return NS_OK; } NS_IMETHODIMP WebBrowserPersistLocalDocument::GetTitle(nsAString& aTitle) { nsAutoString titleBuffer; mDocument->GetTitle(titleBuffer); @@ -231,17 +231,17 @@ WebBrowserPersistLocalDocument::GetHisto // This can fail if, e.g., the document is a Print Preview. if (NS_FAILED(rv) || NS_WARN_IF(!curDesc)) { return nullptr; } nsCOMPtr<nsISHEntry> history = do_QueryInterface(curDesc); return history.forget(); } -const nsCString& +NotNull<const Encoding*> WebBrowserPersistLocalDocument::GetCharacterSet() const { return mDocument->GetDocumentCharacterSet(); } uint32_t WebBrowserPersistLocalDocument::GetPersistFlags() const { @@ -392,17 +392,17 @@ ResourceReader::OnWalkURI(nsIURI* aURI) nsresult ResourceReader::OnWalkURI(const nsACString& aURISpec) { nsresult rv; nsCOMPtr<nsIURI> uri; rv = NS_NewURI(getter_AddRefs(uri), aURISpec, - mParent->GetCharacterSet().get(), + mParent->GetCharacterSet(), mCurrentBaseURI); NS_ENSURE_SUCCESS(rv, rv); return OnWalkURI(uri); } static nsresult ExtractAttribute(nsIDOMNode* aNode, const char* aAttribute, @@ -547,17 +547,17 @@ ResourceReader::OnWalkDOMNode(nsIDOMNode // codebase (which is resolved relative to the base URI). nsCOMPtr<nsIURI> oldBase = mCurrentBaseURI; nsAutoString codebase; rv = nodeAsApplet->GetCodeBase(codebase); NS_ENSURE_SUCCESS(rv, rv); if (!codebase.IsEmpty()) { nsCOMPtr<nsIURI> baseURI; rv = NS_NewURI(getter_AddRefs(baseURI), codebase, - mParent->GetCharacterSet().get(), mCurrentBaseURI); + mParent->GetCharacterSet(), mCurrentBaseURI); NS_ENSURE_SUCCESS(rv, rv); if (baseURI) { mCurrentBaseURI = baseURI; // Must restore this before returning (or ENSURE'ing). } } // We only store 'code' locally if there is no 'archive', @@ -712,17 +712,17 @@ PersistNodeFixup::GetNodeToFixup(nsIDOMN } nsresult PersistNodeFixup::FixupURI(nsAString &aURI) { // get the current location of the file (absolutized) nsCOMPtr<nsIURI> uri; nsresult rv = NS_NewURI(getter_AddRefs(uri), aURI, - mParent->GetCharacterSet().get(), mCurrentBaseURI); + mParent->GetCharacterSet(), mCurrentBaseURI); NS_ENSURE_SUCCESS(rv, rv); nsAutoCString spec; rv = uri->GetSpec(spec); NS_ENSURE_SUCCESS(rv, rv); const nsCString* replacement = mMap.Get(spec); if (!replacement) { // Note that most callers ignore this "failure". @@ -799,17 +799,17 @@ PersistNodeFixup::FixupAnchor(nsIDOMNode } nsCOMPtr<nsIURI> relativeURI; relativeURI = IsFlagSet(IWBP::PERSIST_FLAGS_FIXUP_LINKS_TO_DESTINATION) ? mTargetBaseURI : mCurrentBaseURI; // Make a new URI to replace the current one nsCOMPtr<nsIURI> newURI; rv = NS_NewURI(getter_AddRefs(newURI), oldCValue, - mParent->GetCharacterSet().get(), relativeURI); + mParent->GetCharacterSet(), relativeURI); if (NS_SUCCEEDED(rv) && newURI) { newURI->SetUserPass(EmptyCString()); nsAutoCString uriSpec; rv = newURI->GetSpec(uriSpec); NS_ENSURE_SUCCESS(rv, rv); attr->SetValue(NS_ConvertUTF8toUTF16(uriSpec)); } } @@ -1125,17 +1125,17 @@ PersistNodeFixup::FixupNode(nsIDOMNode * // For an applet, relative URIs are resolved relative to the // codebase (which is resolved relative to the base URI). nsCOMPtr<nsIURI> oldBase = mCurrentBaseURI; nsAutoString codebase; nodeAsApplet->GetCodeBase(codebase); if (!codebase.IsEmpty()) { nsCOMPtr<nsIURI> baseURI; NS_NewURI(getter_AddRefs(baseURI), codebase, - mParent->GetCharacterSet().get(), mCurrentBaseURI); + mParent->GetCharacterSet(), mCurrentBaseURI); if (baseURI) { mCurrentBaseURI = baseURI; } } // Unset the codebase too, since we'll correctly relativize the // code and archive paths. IgnoredErrorResult ignored; static_cast<dom::HTMLSharedObjectElement*>(newApplet.get())->
--- a/dom/webbrowserpersist/WebBrowserPersistLocalDocument.h +++ b/dom/webbrowserpersist/WebBrowserPersistLocalDocument.h @@ -19,17 +19,17 @@ class nsISHEntry; namespace mozilla { class WebBrowserPersistLocalDocument final : public nsIWebBrowserPersistDocument { public: explicit WebBrowserPersistLocalDocument(nsIDocument* aDocument); - const nsCString& GetCharacterSet() const; + NotNull<const Encoding*> GetCharacterSet() const; uint32_t GetPersistFlags() const; already_AddRefed<nsIURI> GetBaseURI() const; NS_DECL_CYCLE_COLLECTING_ISUPPORTS NS_DECL_NSIWEBBROWSERPERSISTDOCUMENT NS_DECL_CYCLE_COLLECTION_CLASS(WebBrowserPersistLocalDocument)
--- a/dom/xbl/nsXBLPrototypeBinding.cpp +++ b/dom/xbl/nsXBLPrototypeBinding.cpp @@ -1627,17 +1627,17 @@ nsXBLPrototypeBinding::ResolveBaseBindin } } if (hasDisplay || nameSpace.IsEmpty()) { mBinding->UnsetAttr(kNameSpaceID_None, nsGkAtoms::extends, false); mBinding->UnsetAttr(kNameSpaceID_None, nsGkAtoms::display, false); return NS_NewURI(getter_AddRefs(mBaseBindingURI), value, - doc->GetDocumentCharacterSet().get(), + doc->GetDocumentCharacterSet(), doc->GetDocBaseURI()); } return NS_OK; } void nsXBLPrototypeBinding::EnsureResources()
--- a/dom/xbl/nsXBLResourceLoader.cpp +++ b/dom/xbl/nsXBLResourceLoader.cpp @@ -108,17 +108,17 @@ nsXBLResourceLoader::LoadResources(nsICo nsCOMPtr<nsIURI> url; for (nsXBLResource* curr = mResourceList; curr; curr = curr->mNext) { if (curr->mSrc.IsEmpty()) continue; if (NS_FAILED(NS_NewURI(getter_AddRefs(url), curr->mSrc, - doc->GetDocumentCharacterSet().get(), docURL))) + doc->GetDocumentCharacterSet(), docURL))) continue; if (curr->mType == nsGkAtoms::image) { // Now kick off the image load... // Passing nullptr for pretty much everything -- cause we don't care! // XXX: initialDocumentURI is nullptr! RefPtr<imgRequestProxy> req; nsContentUtils::LoadImage(url, doc, doc, docPrincipal, docURL,
--- a/dom/xhr/XMLHttpRequestMainThread.cpp +++ b/dom/xhr/XMLHttpRequestMainThread.cpp @@ -173,16 +173,17 @@ static void AddLoadFlags(nsIRequest *req // ///////////////////////////////////////////// bool XMLHttpRequestMainThread::sDontWarnAboutSyncXHR = false; XMLHttpRequestMainThread::XMLHttpRequestMainThread() : mResponseBodyDecodedPos(0), + mResponseCharset(nullptr), mResponseType(XMLHttpRequestResponseType::_empty), mRequestObserver(nullptr), mState(State::unsent), mFlagSynchronous(false), mFlagAborted(false), mFlagParseBody(false), mFlagSyncLooping(false), mFlagBackgroundRequest(false), mFlagHadUploadListenersOnSend(false), mFlagACwithCredentials(false), mFlagTimedOut(false), mFlagDeleted(false), mFlagSend(false), mUploadTransferred(0), mUploadTotal(0), mUploadComplete(true), @@ -487,17 +488,17 @@ XMLHttpRequestMainThread::GetResponseXML /* * This piece copied from XMLDocument, we try to get the charset * from HTTP headers. */ nsresult XMLHttpRequestMainThread::DetectCharset() { - mResponseCharset.Truncate(); + mResponseCharset = nullptr; mDecoder = nullptr; if (mResponseType != XMLHttpRequestResponseType::_empty && mResponseType != XMLHttpRequestResponseType::Text && mResponseType != XMLHttpRequestResponseType::Json && mResponseType != XMLHttpRequestResponseType::Moz_chunked_text) { return NS_OK; } @@ -514,17 +515,17 @@ XMLHttpRequestMainThread::DetectCharset( if (mResponseType == XMLHttpRequestResponseType::Json && encoding != UTF_8_ENCODING) { // The XHR spec says only UTF-8 is supported for responseType == "json" LogMessage("JSONCharsetWarning", GetOwner()); encoding = UTF_8_ENCODING; } - encoding->Name(mResponseCharset); + mResponseCharset = encoding; mDecoder = encoding->NewDecoderWithBOMRemoval(); return NS_OK; } nsresult XMLHttpRequestMainThread::AppendToResponseText(const char * aSrcBuffer, uint32_t aSrcBufferLen) @@ -2426,17 +2427,17 @@ XMLHttpRequestMainThread::OnBodyParseEnd void XMLHttpRequestMainThread::MatchCharsetAndDecoderToResponseDocument() { if (mResponseXML && mResponseCharset != mResponseXML->GetDocumentCharacterSet()) { mResponseCharset = mResponseXML->GetDocumentCharacterSet(); TruncateResponseText(); mResponseBodyDecodedPos = 0; - mDecoder = Encoding::ForName(mResponseCharset)->NewDecoderWithBOMRemoval(); + mDecoder = mResponseCharset->NewDecoderWithBOMRemoval(); } } void XMLHttpRequestMainThread::ChangeStateToDone() { StopProgressEventTimer();
--- a/dom/xhr/XMLHttpRequestMainThread.h +++ b/dom/xhr/XMLHttpRequestMainThread.h @@ -703,17 +703,17 @@ protected: // Decoder used for decoding into mResponseText // Only used for DEFAULT, TEXT and JSON responseTypes. // In cases where we've only received half a surrogate, the decoder itself // carries the state to remember this. Next time we receive more data we // simply feed the new data into the decoder which will handle the second // part of the surrogate. mozilla::UniquePtr<mozilla::Decoder> mDecoder; - nsCString mResponseCharset; + const Encoding* mResponseCharset; void MatchCharsetAndDecoderToResponseDocument(); XMLHttpRequestResponseType mResponseType; // It is either a cached blob-response from the last call to GetResponse, // but is also explicitly set in OnStopRequest. RefPtr<Blob> mResponseBlob;
--- a/dom/xml/XMLDocument.cpp +++ b/dom/xml/XMLDocument.cpp @@ -146,17 +146,17 @@ NS_NewDOMDocument(nsIDOMDocument** aInst if (nsCOMPtr<nsIScriptGlobalObject> sgo = do_QueryInterface(aEventObject)) { d->SetScriptHandlingObject(sgo); } else if (aEventObject){ d->SetScopeObject(aEventObject); } // XMLDocuments and documents "created in memory" get to be UTF-8 by default, // unlike the legacy HTML mess - doc->SetDocumentCharacterSet(NS_LITERAL_CSTRING("UTF-8")); + doc->SetDocumentCharacterSet(UTF_8_ENCODING); if (aDoctype) { nsCOMPtr<nsINode> doctypeAsNode = do_QueryInterface(aDoctype); ErrorResult result; d->AppendChild(*doctypeAsNode, result); if (NS_WARN_IF(result.Failed())) { return result.StealNSResult(); } @@ -317,17 +317,17 @@ XMLDocument::Load(const nsAString& aUrl, docForWarning->WarnOnceAbout(nsIDocument::eUseOfDOM3LoadMethod); } nsIURI *baseURI = mDocumentURI; nsAutoCString charset; if (callingDoc) { baseURI = callingDoc->GetDocBaseURI(); - charset = callingDoc->GetDocumentCharacterSet(); + callingDoc->GetDocumentCharacterSet()->Name(charset); } // Create a new URI nsCOMPtr<nsIURI> uri; nsresult rv = NS_NewURI(getter_AddRefs(uri), aUrl, charset.get(), baseURI); if (NS_FAILED(rv)) { aRv.Throw(rv); return false; @@ -525,18 +525,18 @@ XMLDocument::StartDocumentLoad(const cha if (nsCRT::strcmp("loadAsInteractiveData", aCommand) == 0) { mLoadedAsInteractiveData = true; aCommand = kLoadAsData; // XBL, for example, needs scripts and styles } int32_t charsetSource = kCharsetFromDocTypeDefault; - nsAutoCString charset(NS_LITERAL_CSTRING("UTF-8")); - TryChannelCharset(aChannel, charsetSource, charset, nullptr); + NotNull<const Encoding*> encoding = UTF_8_ENCODING; + TryChannelCharset(aChannel, charsetSource, encoding, nullptr); nsCOMPtr<nsIURI> aUrl; rv = aChannel->GetURI(getter_AddRefs(aUrl)); if (NS_FAILED(rv)) return rv; static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID); mParser = do_CreateInstance(kCParserCID, &rv); @@ -560,18 +560,18 @@ XMLDocument::StartDocumentLoad(const cha // Set the parser as the stream listener for the document loader... rv = CallQueryInterface(mParser, aDocListener); NS_ENSURE_SUCCESS(rv, rv); NS_ASSERTION(mChannel, "How can we not have a channel here?"); mChannelIsPending = true; - SetDocumentCharacterSet(charset); - mParser->SetDocumentCharset(charset, charsetSource); + SetDocumentCharacterSet(encoding); + mParser->SetDocumentCharset(encoding, charsetSource); mParser->SetCommand(aCommand); mParser->SetContentSink(sink); mParser->Parse(aUrl, nullptr, (void *)this); return NS_OK; } void
--- a/dom/xml/XMLStylesheetProcessingInstruction.cpp +++ b/dom/xml/XMLStylesheetProcessingInstruction.cpp @@ -109,25 +109,24 @@ XMLStylesheetProcessingInstruction::GetS *aIsInline = false; nsAutoString href; if (!GetAttrValue(nsGkAtoms::href, href)) { return nullptr; } nsIURI *baseURL; - nsAutoCString charset; nsIDocument *document = OwnerDoc(); baseURL = mOverriddenBaseURI ? mOverriddenBaseURI.get() : document->GetDocBaseURI(); - charset = document->GetDocumentCharacterSet(); + auto encoding = document->GetDocumentCharacterSet(); nsCOMPtr<nsIURI> aURI; - NS_NewURI(getter_AddRefs(aURI), href, charset.get(), baseURL); + NS_NewURI(getter_AddRefs(aURI), href, encoding, baseURL); return aURI.forget(); } void XMLStylesheetProcessingInstruction::GetStyleSheetInfo(nsAString& aTitle, nsAString& aType, nsAString& aMedia, bool* aIsScoped,
--- a/dom/xml/nsXMLContentSink.cpp +++ b/dom/xml/nsXMLContentSink.cpp @@ -726,24 +726,22 @@ nsXMLContentSink::ProcessStyleLink(nsICo aTitle, aType, aMedia); // nsContentSink::ProcessStyleLink handles the bookkeeping here wrt // pending sheets. return rv; } -NS_IMETHODIMP -nsXMLContentSink::SetDocumentCharset(nsACString& aCharset) +void +nsXMLContentSink::SetDocumentCharset(NotNull<const Encoding*> aEncoding) { if (mDocument) { - mDocument->SetDocumentCharacterSet(aCharset); + mDocument->SetDocumentCharacterSet(aEncoding); } - - return NS_OK; } nsISupports * nsXMLContentSink::GetTarget() { return mDocument; }
--- a/dom/xml/nsXMLContentSink.h +++ b/dom/xml/nsXMLContentSink.h @@ -65,17 +65,17 @@ public: // nsIContentSink NS_IMETHOD WillParse(void) override; NS_IMETHOD WillBuildModel(nsDTDMode aDTDMode) override; NS_IMETHOD DidBuildModel(bool aTerminated) override; NS_IMETHOD WillInterrupt(void) override; NS_IMETHOD WillResume(void) override; NS_IMETHOD SetParser(nsParserBase* aParser) override; virtual void FlushPendingNotifications(mozilla::FlushType aType) override; - NS_IMETHOD SetDocumentCharset(nsACString& aCharset) override; + virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding) override; virtual nsISupports *GetTarget() override; virtual bool IsScriptExecuting() override; virtual void ContinueInterruptedParsingAsync() override; // nsITransformObserver NS_IMETHOD OnDocumentCreated(nsIDocument *aResultDocument) override; NS_IMETHOD OnTransformDone(nsresult aResult, nsIDocument *aResultDocument) override;
--- a/dom/xml/nsXMLFragmentContentSink.cpp +++ b/dom/xml/nsXMLFragmentContentSink.cpp @@ -57,17 +57,17 @@ public: NS_IMETHOD ReportError(const char16_t* aErrorText, const char16_t* aSourceText, nsIScriptError* aError, bool* aRetval) override; // nsIContentSink NS_IMETHOD WillBuildModel(nsDTDMode aDTDMode) override; NS_IMETHOD DidBuildModel(bool aTerminated) override; - NS_IMETHOD SetDocumentCharset(nsACString& aCharset) override; + virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding) override; virtual nsISupports* GetTarget() override; NS_IMETHOD DidProcessATokenImpl(); // nsIXMLContentSink // nsIFragmentContentSink NS_IMETHOD FinishFragmentParsing(nsIDOMDocumentFragment** aFragment) override; NS_IMETHOD SetTargetDocument(nsIDocument* aDocument) override; @@ -168,21 +168,21 @@ nsXMLFragmentContentSink::DidBuildModel( { // Drop our reference to the parser to get rid of a circular // reference. mParser = nullptr; return NS_OK; } -NS_IMETHODIMP -nsXMLFragmentContentSink::SetDocumentCharset(nsACString& aCharset) +void +nsXMLFragmentContentSink::SetDocumentCharset( + NotNull<const Encoding*> aEncoding) { NS_NOTREACHED("fragments shouldn't set charset"); - return NS_OK; } nsISupports * nsXMLFragmentContentSink::GetTarget() { return mTargetDocument; }
--- a/dom/xslt/xslt/txMozillaStylesheetCompiler.cpp +++ b/dom/xslt/xslt/txMozillaStylesheetCompiler.cpp @@ -79,17 +79,18 @@ public: // nsIContentSink NS_IMETHOD WillParse(void) override { return NS_OK; } NS_IMETHOD DidBuildModel(bool aTerminated) override; NS_IMETHOD WillInterrupt(void) override { return NS_OK; } NS_IMETHOD WillResume(void) override { return NS_OK; } NS_IMETHOD SetParser(nsParserBase* aParser) override { return NS_OK; } virtual void FlushPendingNotifications(mozilla::FlushType aType) override { } - NS_IMETHOD SetDocumentCharset(nsACString& aCharset) override { return NS_OK; } + virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding) + override { } virtual nsISupports *GetTarget() override { return nullptr; } private: RefPtr<txStylesheetCompiler> mCompiler; nsCOMPtr<nsIStreamListener> mListener; nsCOMPtr<nsIParser> mParser; bool mCheckedForXML; @@ -264,19 +265,17 @@ txStylesheetSink::OnStartRequest(nsIRequ charsetSource = kCharsetFromChannel; } } if (!encoding) { encoding = UTF_8_ENCODING; } - nsAutoCString charset; - encoding->Name(charset); - mParser->SetDocumentCharset(charset, charsetSource); + mParser->SetDocumentCharset(WrapNotNull(encoding), charsetSource); nsAutoCString contentType; channel->GetContentType(contentType); // Time to sniff! Note: this should go away once file channels do // sniffing themselves. nsCOMPtr<nsIURI> uri; channel->GetURI(getter_AddRefs(uri));
--- a/dom/xslt/xslt/txMozillaTextOutput.cpp +++ b/dom/xslt/xslt/txMozillaTextOutput.cpp @@ -158,19 +158,17 @@ txMozillaTextOutput::createResultDocumen // correct principal. mDocument->SetScriptHandlingObject(sgo); // Set the charset if (!mOutputFormat.mEncoding.IsEmpty()) { const Encoding* encoding = Encoding::ForLabel(mOutputFormat.mEncoding); if (encoding) { mDocument->SetDocumentCharacterSetSource(kCharsetFromOtherComponent); - nsAutoCString canonicalCharset; - encoding->Name(canonicalCharset); - mDocument->SetDocumentCharacterSet(canonicalCharset); + mDocument->SetDocumentCharacterSet(WrapNotNull(encoding)); } } // Notify the contentsink that the document is created nsCOMPtr<nsITransformObserver> observer = do_QueryReferent(mObserver); if (observer) { rv = observer->OnDocumentCreated(mDocument); NS_ENSURE_SUCCESS(rv, rv);
--- a/dom/xslt/xslt/txMozillaXMLOutput.cpp +++ b/dom/xslt/xslt/txMozillaXMLOutput.cpp @@ -818,19 +818,17 @@ txMozillaXMLOutput::createResultDocument // source, so that we have the right principal. mDocument->SetScriptHandlingObject(sgo); // Set the charset if (!mOutputFormat.mEncoding.IsEmpty()) { const Encoding* encoding = Encoding::ForLabel(mOutputFormat.mEncoding); if (encoding) { mDocument->SetDocumentCharacterSetSource(kCharsetFromOtherComponent); - nsAutoCString canonicalCharset; - encoding->Name(canonicalCharset); - mDocument->SetDocumentCharacterSet(canonicalCharset); + mDocument->SetDocumentCharacterSet(WrapNotNull(encoding)); } } // Set the mime-type if (!mOutputFormat.mMediaType.IsEmpty()) { mDocument->SetContentType(mOutputFormat.mMediaType); } else if (mOutputFormat.mMethod == eHTMLOutput) {
--- a/dom/xul/XULDocument.cpp +++ b/dom/xul/XULDocument.cpp @@ -208,17 +208,17 @@ XULDocument::XULDocument(void) mOffThreadCompileStringLength(0), mResolutionPhase(nsForwardReference::eStart), mBroadcasterMap(nullptr), mInitialLayoutComplete(false), mHandlingDelayedAttrChange(false), mHandlingDelayedBroadcasters(false) { // Override the default in nsDocument - mCharacterSet.AssignLiteral("UTF-8"); + mCharacterSet = UTF_8_ENCODING; mDefaultElementType = kNameSpaceID_XUL; mType = eXUL; mDelayFrameLoaderInitialization = true; mAllowXULXBL = eTriTrue; } @@ -2016,17 +2016,17 @@ XULDocument::PrepareToLoadPrototype(nsIU nsCOMPtr<nsIParser> parser = do_CreateInstance(kParserCID, &rv); NS_ASSERTION(NS_SUCCEEDED(rv), "unable to create parser"); if (NS_FAILED(rv)) return rv; parser->SetCommand(nsCRT::strcmp(aCommand, "view-source") ? eViewNormal : eViewSource); - parser->SetDocumentCharset(NS_LITERAL_CSTRING("UTF-8"), + parser->SetDocumentCharset(UTF_8_ENCODING, kCharsetFromDocTypeDefault); parser->SetContentSink(sink); // grabs a reference to the parser parser.forget(aResult); return NS_OK; }
--- a/dom/xul/nsXULContentSink.cpp +++ b/dom/xul/nsXULContentSink.cpp @@ -251,25 +251,23 @@ XULContentSinkImpl::WillResume(void) NS_IMETHODIMP XULContentSinkImpl::SetParser(nsParserBase* aParser) { mParser = aParser; return NS_OK; } -NS_IMETHODIMP -XULContentSinkImpl::SetDocumentCharset(nsACString& aCharset) +void +XULContentSinkImpl::SetDocumentCharset(NotNull<const Encoding*> aEncoding) { nsCOMPtr<nsIDocument> doc = do_QueryReferent(mDocument); if (doc) { - doc->SetDocumentCharacterSet(aCharset); + doc->SetDocumentCharacterSet(aEncoding); } - - return NS_OK; } nsISupports * XULContentSinkImpl::GetTarget() { nsCOMPtr<nsIDocument> doc = do_QueryReferent(mDocument); return doc; }
--- a/dom/xul/nsXULContentSink.h +++ b/dom/xul/nsXULContentSink.h @@ -36,17 +36,17 @@ public: // nsIContentSink NS_IMETHOD WillParse(void) override { return NS_OK; } NS_IMETHOD WillBuildModel(nsDTDMode aDTDMode) override; NS_IMETHOD DidBuildModel(bool aTerminated) override; NS_IMETHOD WillInterrupt(void) override; NS_IMETHOD WillResume(void) override; NS_IMETHOD SetParser(nsParserBase* aParser) override; virtual void FlushPendingNotifications(mozilla::FlushType aType) override { } - NS_IMETHOD SetDocumentCharset(nsACString& aCharset) override; + virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding) override; virtual nsISupports *GetTarget() override; /** * Initialize the content sink, giving it an nsIDocument object * with which to communicate with the outside world, and an * nsXULPrototypeDocument to build. */ nsresult Init(nsIDocument* aDocument, nsXULPrototypeDocument* aPrototype);
--- a/editor/libeditor/EditorBase.cpp +++ b/editor/libeditor/EditorBase.cpp @@ -1,16 +1,17 @@ /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "mozilla/EditorBase.h" #include "mozilla/DebugOnly.h" // for DebugOnly +#include "mozilla/Encoding.h" // for Encoding #include <stdio.h> // for nullptr, stdout #include <string.h> // for strcmp #include "ChangeAttributeTransaction.h" // for ChangeAttributeTransaction #include "CompositionTransaction.h" // for CompositionTransaction #include "CreateElementTransaction.h" // for CreateElementTransaction #include "DeleteNodeTransaction.h" // for DeleteNodeTransaction @@ -1175,28 +1176,34 @@ EditorBase::GetDocumentModified(bool* ou NS_IMETHODIMP EditorBase::GetDocumentCharacterSet(nsACString& characterSet) { nsCOMPtr<nsIDocument> document = GetDocument(); if (NS_WARN_IF(!document)) { return NS_ERROR_UNEXPECTED; } - characterSet = document->GetDocumentCharacterSet(); + document->GetDocumentCharacterSet()->Name(characterSet); return NS_OK; } NS_IMETHODIMP EditorBase::SetDocumentCharacterSet(const nsACString& characterSet) { nsCOMPtr<nsIDocument> document = GetDocument(); if (NS_WARN_IF(!document)) { return NS_ERROR_UNEXPECTED; } - document->SetDocumentCharacterSet(characterSet); + // This method is scriptable, so add-ons could pass in something other + // than a canonical name. + auto encoding = Encoding::ForLabelNoReplacement(characterSet); + if (!encoding) { + return NS_ERROR_INVALID_ARG; + } + document->SetDocumentCharacterSet(WrapNotNull(encoding)); return NS_OK; } NS_IMETHODIMP EditorBase::Cut() { return NS_ERROR_NOT_IMPLEMENTED; }
--- a/intl/locale/nsLanguageAtomService.cpp +++ b/intl/locale/nsLanguageAtomService.cpp @@ -4,16 +4,17 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsLanguageAtomService.h" #include "nsUConvPropertySearch.h" #include "nsUnicharUtils.h" #include "nsIAtom.h" #include "mozilla/ArrayUtils.h" #include "mozilla/ClearOnShutdown.h" +#include "mozilla/Encoding.h" #include "mozilla/intl/OSPreferences.h" #include "mozilla/ServoBindings.h" using namespace mozilla; using mozilla::intl::OSPreferences; static constexpr nsUConvProp encodingsGroups[] = { #include "encodingsgroups.properties.h" @@ -41,21 +42,23 @@ nsLanguageAtomService::LookupLanguage(co nsAutoCString lowered(aLanguage); ToLowerCase(lowered); nsCOMPtr<nsIAtom> lang = NS_Atomize(lowered); return GetLanguageGroup(lang); } already_AddRefed<nsIAtom> -nsLanguageAtomService::LookupCharSet(const nsACString& aCharSet) +nsLanguageAtomService::LookupCharSet(NotNull<const Encoding*> aEncoding) { + nsAutoCString charset; + aEncoding->Name(charset); nsAutoCString group; if (NS_FAILED(nsUConvPropertySearch::SearchPropertyValue( - encodingsGroups, ArrayLength(encodingsGroups), aCharSet, group))) { + encodingsGroups, ArrayLength(encodingsGroups), charset, group))) { return RefPtr<nsIAtom>(nsGkAtoms::Unicode).forget(); } return NS_Atomize(group); } nsIAtom* nsLanguageAtomService::GetLocaleLanguage() {
--- a/intl/locale/nsLanguageAtomService.h +++ b/intl/locale/nsLanguageAtomService.h @@ -6,27 +6,34 @@ /* * The nsILanguageAtomService provides a mapping from languages or charsets * to language groups, and access to the system locale language. */ #ifndef nsLanguageAtomService_h_ #define nsLanguageAtomService_h_ +#include "mozilla/NotNull.h" #include "nsCOMPtr.h" #include "nsIAtom.h" #include "nsInterfaceHashtable.h" -class nsLanguageAtomService +namespace mozilla { +class Encoding; +} + +class nsLanguageAtomService final { + using Encoding = mozilla::Encoding; + template <typename T> using NotNull = mozilla::NotNull<T>; public: static nsLanguageAtomService* GetService(); nsIAtom* LookupLanguage(const nsACString &aLanguage); - already_AddRefed<nsIAtom> LookupCharSet(const nsACString& aCharSet); + already_AddRefed<nsIAtom> LookupCharSet(NotNull<const Encoding*> aCharSet); nsIAtom* GetLocaleLanguage(); // Returns the language group that the specified language is a part of. // // aNeedsToCache is used for two things. If null, it indicates that // the nsLanguageAtomService is safe to cache the result of the // language group lookup, either because we're on the main thread, // or because we're on a style worker thread but the font lock has
--- a/layout/base/nsPresContext.cpp +++ b/layout/base/nsPresContext.cpp @@ -2,16 +2,17 @@ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* a presentation of a document, part 1 */ #include "mozilla/ArrayUtils.h" #include "mozilla/DebugOnly.h" +#include "mozilla/Encoding.h" #include "mozilla/EventDispatcher.h" #include "mozilla/EventStateManager.h" #include "base/basictypes.h" #include "nsCOMPtr.h" #include "nsPresContext.h" #include "nsIPresShell.h" @@ -115,32 +116,32 @@ public: }; namespace { class CharSetChangingRunnable : public Runnable { public: CharSetChangingRunnable(nsPresContext* aPresContext, - const nsCString& aCharSet) + NotNull<const Encoding*> aCharSet) : Runnable("CharSetChangingRunnable"), mPresContext(aPresContext), mCharSet(aCharSet) { } NS_IMETHOD Run() override { mPresContext->DoChangeCharSet(mCharSet); return NS_OK; } private: RefPtr<nsPresContext> mPresContext; - nsCString mCharSet; + NotNull<const Encoding*> mCharSet; }; } // namespace nscolor nsPresContext::MakeColorPref(const nsString& aColor) { nsCSSParser parser; @@ -191,25 +192,19 @@ nsPresContext::PrefChangedUpdateTimerCal { nsPresContext* presContext = (nsPresContext*)aClosure; NS_ASSERTION(presContext != nullptr, "bad instance data"); if (presContext) presContext->UpdateAfterPreferencesChanged(); } static bool -IsVisualCharset(const nsCString& aCharset) +IsVisualCharset(NotNull<const Encoding*> aCharset) { - if (aCharset.LowerCaseEqualsLiteral("ibm862") // Hebrew - || aCharset.LowerCaseEqualsLiteral("iso-8859-8") ) { // Hebrew - return true; // visual text type - } - else { - return false; // logical text type - } + return aCharset == ISO_8859_8_ENCODING; } nsPresContext::nsPresContext(nsIDocument* aDocument, nsPresContextType aType) : mType(aType), mShell(nullptr), mDocument(aDocument), mMedium(aType == eContext_Galley ? nsGkAtoms::screen : nsGkAtoms::print), mMediaEmulated(mMedium), @@ -1065,25 +1060,25 @@ nsPresContext::DetachShell() thisRoot->CancelApplyPluginGeometryTimer(); // The did-paint timer also depends on a non-null pres shell. thisRoot->CancelAllDidPaintTimers(); } } void -nsPresContext::DoChangeCharSet(const nsCString& aCharSet) +nsPresContext::DoChangeCharSet(NotNull<const Encoding*> aCharSet) { UpdateCharSet(aCharSet); mDeviceContext->FlushFontCache(); RebuildAllStyleData(NS_STYLE_HINT_REFLOW, nsRestyleHint(0)); } void -nsPresContext::UpdateCharSet(const nsCString& aCharSet) +nsPresContext::UpdateCharSet(NotNull<const Encoding*> aCharSet) { mLanguage = mLangService->LookupCharSet(aCharSet); // this will be a language group (or script) code rather than a true language code // bug 39570: moved from nsLanguageAtomService::LookupCharSet() if (mLanguage == nsGkAtoms::Unicode) { mLanguage = mLangService->GetLocaleLanguage(); } @@ -1107,18 +1102,19 @@ nsPresContext::UpdateCharSet(const nsCSt } NS_IMETHODIMP nsPresContext::Observe(nsISupports* aSubject, const char* aTopic, const char16_t* aData) { if (!nsCRT::strcmp(aTopic, "charset")) { + auto encoding = Encoding::ForName(NS_LossyConvertUTF16toASCII(aData)); RefPtr<CharSetChangingRunnable> runnable = - new CharSetChangingRunnable(this, NS_LossyConvertUTF16toASCII(aData)); + new CharSetChangingRunnable(this, encoding); return Document()->Dispatch("CharSetChangingRunnable", TaskCategory::Other, runnable.forget()); } NS_WARNING("unrecognized topic in nsPresContext::Observe"); return NS_ERROR_FAILURE; }
--- a/layout/base/nsPresContext.h +++ b/layout/base/nsPresContext.h @@ -4,16 +4,17 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* a presentation of a document, part 1 */ #ifndef nsPresContext_h___ #define nsPresContext_h___ #include "mozilla/Attributes.h" +#include "mozilla/NotNull.h" #include "mozilla/UniquePtr.h" #include "mozilla/WeakPtr.h" #include "nsColor.h" #include "nsCoord.h" #include "nsCOMPtr.h" #include "nsIPresShell.h" #include "nsRect.h" #include "nsFont.h" @@ -66,16 +67,17 @@ class nsTransitionManager; class nsAnimationManager; class nsRefreshDriver; class nsIWidget; class nsDeviceContext; class gfxMissingFontRecorder; namespace mozilla { class EffectCompositor; +class Encoding; class EventStateManager; class CounterStyleManager; class RestyleManager; namespace layers { class ContainerLayer; class LayerManager; } // namespace layers namespace dom { @@ -120,16 +122,18 @@ enum nsLayoutPhase { class nsRootPresContext; // An interface for presentation contexts. Presentation contexts are // objects that provide an outer context for a presentation shell. class nsPresContext : public nsIObserver, public mozilla::SupportsWeakPtr<nsPresContext> { public: + using Encoding = mozilla::Encoding; + template <typename T> using NotNull = mozilla::NotNull<T>; typedef mozilla::LangGroupFontPrefs LangGroupFontPrefs; typedef mozilla::ScrollbarStyles ScrollbarStyles; typedef mozilla::StaticPresData StaticPresData; NS_DECL_CYCLE_COLLECTING_ISUPPORTS NS_DECL_NSIOBSERVER NS_DECL_CYCLE_COLLECTION_CLASS(nsPresContext) MOZ_DECLARE_WEAKREFERENCE_TYPENAME(nsPresContext) @@ -1223,22 +1227,22 @@ protected: * langugage group. */ const LangGroupFontPrefs* GetFontPrefsForLang(nsIAtom *aLanguage, bool* aNeedsToCache = nullptr) const { nsIAtom* lang = aLanguage ? aLanguage : mLanguage.get(); return StaticPresData::Get()->GetFontPrefsForLangHelper(lang, &mLangGroupFontPrefs, aNeedsToCache); } - void UpdateCharSet(const nsCString& aCharSet); + void UpdateCharSet(NotNull<const Encoding*> aCharSet); static bool NotifyDidPaintSubdocumentCallback(nsIDocument* aDocument, void* aData); public: - void DoChangeCharSet(const nsCString& aCharSet); + void DoChangeCharSet(NotNull<const Encoding*> aCharSet); /** * Checks for MozAfterPaint listeners on the document */ bool MayHavePaintEventListener(); /** * Checks for MozAfterPaint listeners on the document and
--- a/layout/build/nsContentDLF.cpp +++ b/layout/build/nsContentDLF.cpp @@ -1,15 +1,18 @@ /* -*- Mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=2 sw=2 et tw=78: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "nsContentDLF.h" + +#include "mozilla/Encoding.h" + #include "nsCOMPtr.h" -#include "nsContentDLF.h" #include "nsDocShell.h" #include "nsGenericHTMLElement.h" #include "nsGkAtoms.h" #include "nsIComponentManager.h" #include "nsIComponentRegistrar.h" #include "nsIContentViewer.h" #include "nsICategoryManager.h" #include "nsIDocumentLoaderFactory.h" @@ -322,17 +325,17 @@ nsContentDLF::CreateBlankDocument(nsILoa } } } } // add a nice bow if (NS_SUCCEEDED(rv)) { blankDoc->SetDocumentCharacterSetSource(kCharsetFromDocTypeDefault); - blankDoc->SetDocumentCharacterSet(NS_LITERAL_CSTRING("UTF-8")); + blankDoc->SetDocumentCharacterSet(UTF_8_ENCODING); blankDoc.forget(aDocument); } return rv; } nsresult
--- a/layout/generic/nsImageFrame.cpp +++ b/layout/generic/nsImageFrame.cpp @@ -6,16 +6,17 @@ /* rendering object for replaced elements with image data */ #include "nsImageFrame.h" #include "gfx2DGlue.h" #include "gfxContext.h" #include "gfxUtils.h" #include "mozilla/DebugOnly.h" +#include "mozilla/Encoding.h" #include "mozilla/EventStates.h" #include "mozilla/gfx/2D.h" #include "mozilla/gfx/Helpers.h" #include "mozilla/gfx/PathHelpers.h" #include "mozilla/MouseEvents.h" #include "mozilla/Unused.h" #include "nsCOMPtr.h" @@ -2243,17 +2244,17 @@ nsImageFrame::LoadIcon(const nsAString& } void nsImageFrame::GetDocumentCharacterSet(nsACString& aCharset) const { if (mContent) { NS_ASSERTION(mContent->GetComposedDoc(), "Frame still alive after content removed from document!"); - aCharset = mContent->GetComposedDoc()->GetDocumentCharacterSet(); + mContent->GetComposedDoc()->GetDocumentCharacterSet()->Name(aCharset); } } void nsImageFrame::SpecToURI(const nsAString& aSpec, nsIIOService *aIOService, nsIURI **aURI) { nsCOMPtr<nsIURI> baseURI;
--- a/layout/style/Loader.cpp +++ b/layout/style/Loader.cpp @@ -756,18 +756,18 @@ SheetLoadData::OnDetermineCharset(nsIUni LOG((" Setting from parent sheet to: %s", PromiseFlatCString(aCharset).get())); return NS_OK; } } if (mLoader->mDocument) { // no useful data on charset. Try the document charset. - aCharset = mLoader->mDocument->GetDocumentCharacterSet(); - MOZ_ASSERT(!aCharset.IsEmpty()); + auto encoding = mLoader->mDocument->GetDocumentCharacterSet(); + encoding->Name(aCharset); mCharset.Assign(aCharset); LOG((" Setting from document to: %s", PromiseFlatCString(aCharset).get())); return NS_OK; } aCharset.AssignLiteral("UTF-8"); mCharset = aCharset; LOG((" Setting from default to: %s", PromiseFlatCString(aCharset).get()));
--- a/netwerk/base/nsNetUtil.cpp +++ b/netwerk/base/nsNetUtil.cpp @@ -2,21 +2,23 @@ /* vim:set ts=4 sw=4 sts=4 et cin: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ // HttpLog.h should generally be included first #include "HttpLog.h" +#include "nsNetUtil.h" + +#include "mozilla/Encoding.h" #include "mozilla/LoadContext.h" #include "mozilla/LoadInfo.h" #include "mozilla/BasePrincipal.h" #include "mozilla/Telemetry.h" -#include "nsNetUtil.h" #include "nsCategoryCache.h" #include "nsContentUtils.h" #include "nsHashKeys.h" #include "nsHttp.h" #include "nsIAsyncStreamCopier.h" #include "nsIAuthPrompt.h" #include "nsIAuthPrompt2.h" #include "nsIAuthPromptAdapterFactory.h" @@ -1532,26 +1534,48 @@ NS_NewURI(nsIURI **result, rv = net_EnsureIOService(&ioService, grip); if (ioService) rv = ioService->NewURI(spec, charset, baseURI, result); return rv; } nsresult NS_NewURI(nsIURI **result, + const nsACString &spec, + NotNull<const Encoding*> encoding, + nsIURI *baseURI /* = nullptr */, + nsIIOService *ioService /* = nullptr */) // pass in nsIIOService to optimize callers +{ + nsAutoCString charset; + encoding->Name(charset); + return NS_NewURI(result, spec, charset.get(), baseURI, ioService); +} + +nsresult +NS_NewURI(nsIURI **result, const nsAString &spec, const char *charset /* = nullptr */, nsIURI *baseURI /* = nullptr */, nsIIOService *ioService /* = nullptr */) // pass in nsIIOService to optimize callers { return NS_NewURI(result, NS_ConvertUTF16toUTF8(spec), charset, baseURI, ioService); } nsresult NS_NewURI(nsIURI **result, + const nsAString &spec, + NotNull<const Encoding*> encoding, + nsIURI *baseURI /* = nullptr */, + nsIIOService *ioService /* = nullptr */) // pass in nsIIOService to optimize callers +{ + return NS_NewURI(result, NS_ConvertUTF16toUTF8(spec), encoding, baseURI, ioService); +} + +nsresult +NS_NewURI(nsIURI **result, const char *spec, nsIURI *baseURI /* = nullptr */, nsIIOService *ioService /* = nullptr */) // pass in nsIIOService to optimize callers { return NS_NewURI(result, nsDependentCString(spec), nullptr, baseURI, ioService); } nsresult
--- a/netwerk/base/nsNetUtil.h +++ b/netwerk/base/nsNetUtil.h @@ -10,16 +10,17 @@ #include "nsCOMPtr.h" #include "nsIInterfaceRequestor.h" #include "nsIInterfaceRequestorUtils.h" #include "nsILoadGroup.h" #include "nsINetUtil.h" #include "nsIRequest.h" #include "nsILoadInfo.h" #include "nsIIOService.h" +#include "mozilla/NotNull.h" #include "mozilla/Services.h" #include "mozilla/Unused.h" #include "nsNetCID.h" #include "nsReadableUtils.h" #include "nsServiceManagerUtils.h" #include "nsString.h" class nsIURI; @@ -46,17 +47,20 @@ class nsIRequestObserver; class nsIStreamListener; class nsIStreamLoader; class nsIStreamLoaderObserver; class nsIIncrementalStreamLoader; class nsIIncrementalStreamLoaderObserver; class nsIUnicharStreamLoader; class nsIUnicharStreamLoaderObserver; -namespace mozilla { class OriginAttributes; } +namespace mozilla { +class Encoding; +class OriginAttributes; +} template <class> class nsCOMPtr; template <typename> struct already_AddRefed; already_AddRefed<nsIIOService> do_GetIOService(nsresult *error = 0); already_AddRefed<nsINetUtil> do_GetNetUtil(nsresult *error = 0); @@ -65,22 +69,34 @@ nsresult net_EnsureIOService(nsIIOServic nsresult NS_NewURI(nsIURI **result, const nsACString &spec, const char *charset = nullptr, nsIURI *baseURI = nullptr, nsIIOService *ioService = nullptr); // pass in nsIIOService to optimize callers nsresult NS_NewURI(nsIURI **result, + const nsACString &spec, + mozilla::NotNull<const mozilla::Encoding*> encoding, + nsIURI *baseURI = nullptr, + nsIIOService *ioService = nullptr); // pass in nsIIOService to optimize callers + +nsresult NS_NewURI(nsIURI **result, const nsAString &spec, const char *charset = nullptr, nsIURI *baseURI = nullptr, nsIIOService *ioService = nullptr); // pass in nsIIOService to optimize callers nsresult NS_NewURI(nsIURI **result, + const nsAString &spec, + mozilla::NotNull<const mozilla::Encoding*> encoding, + nsIURI *baseURI = nullptr, + nsIIOService *ioService = nullptr); // pass in nsIIOService to optimize callers + +nsresult NS_NewURI(nsIURI **result, const char *spec, nsIURI *baseURI = nullptr, nsIIOService *ioService = nullptr); // pass in nsIIOService to optimize callers nsresult NS_NewFileURI(nsIURI **result, nsIFile *spec, nsIIOService *ioService = nullptr); // pass in nsIIOService to optimize callers
--- a/netwerk/streamconv/converters/nsDirIndexParser.cpp +++ b/netwerk/streamconv/converters/nsDirIndexParser.cpp @@ -1,26 +1,26 @@ /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* This parsing code originally lived in xpfe/components/directory/ - bbaetz */ +#include "nsDirIndexParser.h" + #include "mozilla/ArrayUtils.h" - +#include "mozilla/dom/FallbackEncoding.h" +#include "mozilla/Encoding.h" #include "prprf.h" - -#include "nsDirIndexParser.h" +#include "nsCRT.h" #include "nsEscape.h" +#include "nsIDirIndex.h" #include "nsIInputStream.h" -#include "nsCRT.h" -#include "mozilla/dom/FallbackEncoding.h" #include "nsITextToSubURI.h" -#include "nsIDirIndex.h" #include "nsServiceManagerUtils.h" using namespace mozilla; NS_IMPL_ISUPPORTS(nsDirIndexParser, nsIRequestObserver, nsIStreamListener, nsIDirIndexParser) @@ -28,17 +28,18 @@ NS_IMPL_ISUPPORTS(nsDirIndexParser, nsDirIndexParser::nsDirIndexParser() { } nsresult nsDirIndexParser::Init() { mLineStart = 0; mHasDescription = false; mFormat[0] = -1; - mozilla::dom::FallbackEncoding::FromLocale(mEncoding); + auto encoding = mozilla::dom::FallbackEncoding::FromLocale(); + encoding->Name(mEncoding); nsresult rv; // XXX not threadsafe if (gRefCntParser++ == 0) rv = CallGetService(NS_ITEXTTOSUBURI_CONTRACTID, &gTextToSubURI); else rv = NS_OK;
--- a/parser/html/nsHtml5DocumentBuilder.cpp +++ b/parser/html/nsHtml5DocumentBuilder.cpp @@ -44,21 +44,22 @@ nsHtml5DocumentBuilder::~nsHtml5Document nsresult nsHtml5DocumentBuilder::MarkAsBroken(nsresult aReason) { mBroken = aReason; return aReason; } void -nsHtml5DocumentBuilder::SetDocumentCharsetAndSource(nsACString& aCharset, int32_t aCharsetSource) +nsHtml5DocumentBuilder::SetDocumentCharsetAndSource(NotNull<const Encoding*> aEncoding, + int32_t aCharsetSource) { if (mDocument) { mDocument->SetDocumentCharacterSetSource(aCharsetSource); - mDocument->SetDocumentCharacterSet(aCharset); + mDocument->SetDocumentCharacterSet(aEncoding); } } void nsHtml5DocumentBuilder::UpdateStyleSheet(nsIContent* aElement) { nsCOMPtr<nsIStyleSheetLinkingElement> ssle(do_QueryInterface(aElement)); if (!ssle) {
--- a/parser/html/nsHtml5DocumentBuilder.h +++ b/parser/html/nsHtml5DocumentBuilder.h @@ -18,16 +18,18 @@ enum eHtml5FlushState { eNotFlushing = 0, // not flushing eInFlush = 1, // the Flush() method is on the call stack eInDocUpdate = 2, // inside an update batch on the document eNotifying = 3 // flushing pending append notifications }; class nsHtml5DocumentBuilder : public nsContentSink { + using Encoding = mozilla::Encoding; + template <typename T> using NotNull = mozilla::NotNull<T>; public: NS_DECL_CYCLE_COLLECTION_CLASS_INHERITED(nsHtml5DocumentBuilder, nsContentSink) NS_DECL_ISUPPORTS_INHERITED inline void HoldElement(already_AddRefed<nsIContent> aContent) { @@ -82,17 +84,18 @@ public: } } bool IsInDocUpdate() { return mFlushState == eInDocUpdate; } - void SetDocumentCharsetAndSource(nsACString& aCharset, int32_t aCharsetSource); + void SetDocumentCharsetAndSource(NotNull<const Encoding*> aEncoding, + int32_t aCharsetSource); /** * Sets up style sheet load / parse */ void UpdateStyleSheet(nsIContent* aElement); void SetDocumentMode(nsHtml5DocumentMode m);
--- a/parser/html/nsHtml5MetaScanner.cpp +++ b/parser/html/nsHtml5MetaScanner.cpp @@ -72,16 +72,17 @@ nsHtml5MetaScanner::nsHtml5MetaScanner(n , contentTypeIndex(INT32_MAX) , stateSave(DATA) , strBufLen(0) , strBuf(jArray<char16_t, int32_t>::newJArray(36)) , content(nullptr) , charset(nullptr) , httpEquivState(HTTP_EQUIV_NOT_SEEN) , treeBuilder(tb) + , mEncoding(nullptr) { MOZ_COUNT_CTOR(nsHtml5MetaScanner); } nsHtml5MetaScanner::~nsHtml5MetaScanner() { MOZ_COUNT_DTOR(nsHtml5MetaScanner);
--- a/parser/html/nsHtml5MetaScannerCppSupplement.h +++ b/parser/html/nsHtml5MetaScannerCppSupplement.h @@ -1,44 +1,44 @@ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsISupportsImpl.h" #include "mozilla/Encoding.h" -void -nsHtml5MetaScanner::sniff(nsHtml5ByteReadable* bytes, nsACString& charset) +const Encoding* +nsHtml5MetaScanner::sniff(nsHtml5ByteReadable* bytes) { readable = bytes; stateLoop(stateSave); readable = nullptr; - charset.Assign(mCharset); + return mEncoding; } bool nsHtml5MetaScanner::tryCharset(nsHtml5String charset) { // This code needs to stay in sync with // nsHtml5StreamParser::internalEncodingDeclaration. Unfortunately, the // trickery with member fields here leads to some copy-paste reuse. :-( nsAutoCString label; nsString charset16; // Not Auto, because using it to hold nsStringBuffer* charset.ToString(charset16); CopyUTF16toUTF8(charset16, label); - const mozilla::Encoding* encoding = mozilla::Encoding::ForLabel(label); + const Encoding* encoding = Encoding::ForLabel(label); if (!encoding) { return false; } if (encoding == UTF_16BE_ENCODING || encoding == UTF_16LE_ENCODING) { - mCharset.AssignLiteral("UTF-8"); + mEncoding = UTF_8_ENCODING; return true; } if (encoding == X_USER_DEFINED_ENCODING) { // WebKit/Blink hack for Indian and Armenian legacy sites - mCharset.AssignLiteral("windows-1252"); + mEncoding = WINDOWS_1252_ENCODING; return true; } - encoding->Name(mCharset); + mEncoding = encoding; return true; }
--- a/parser/html/nsHtml5MetaScannerHSupplement.h +++ b/parser/html/nsHtml5MetaScannerHSupplement.h @@ -1,12 +1,12 @@ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - + using Encoding = mozilla::Encoding; private: - nsCString mCharset; + const Encoding* mEncoding; inline int32_t read() { return readable->read(); } public: - void sniff(nsHtml5ByteReadable* bytes, nsACString& charset); + const Encoding* sniff(nsHtml5ByteReadable* bytes);
--- a/parser/html/nsHtml5Parser.cpp +++ b/parser/html/nsHtml5Parser.cpp @@ -94,29 +94,25 @@ nsHtml5Parser::SetCommand(const char* aC NS_IMETHODIMP_(void) nsHtml5Parser::SetCommand(eParserCommands aParserCommand) { NS_ASSERTION(aParserCommand == eViewNormal, "Parser command was not eViewNormal."); } -NS_IMETHODIMP_(void) -nsHtml5Parser::SetDocumentCharset(const nsACString& aCharset, +void +nsHtml5Parser::SetDocumentCharset(NotNull<const Encoding*> aEncoding, int32_t aCharsetSource) { NS_PRECONDITION(!mExecutor->HasStarted(), "Document charset set too late."); NS_PRECONDITION(GetStreamParser(), "Setting charset on a script-only parser."); - nsAutoCString trimmed; - trimmed.Assign(aCharset); - trimmed.Trim(" \t\r\n\f"); - GetStreamParser()->SetDocumentCharset(trimmed, aCharsetSource); - mExecutor->SetDocumentCharsetAndSource(trimmed, - aCharsetSource); + GetStreamParser()->SetDocumentCharset(aEncoding, aCharsetSource); + mExecutor->SetDocumentCharsetAndSource(aEncoding, aCharsetSource); } NS_IMETHODIMP nsHtml5Parser::GetChannel(nsIChannel** aChannel) { if (GetStreamParser()) { return GetStreamParser()->GetChannel(aChannel); } else {
--- a/parser/html/nsHtml5Parser.h +++ b/parser/html/nsHtml5Parser.h @@ -62,28 +62,21 @@ class nsHtml5Parser final : public nsIPa * No-op for backwards compat. */ NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand) override; /** * Call this method once you've created a parser, and want to instruct it * about what charset to load * - * @param aCharset the charset of a document + * @param aEncoding the charset of a document * @param aCharsetSource the source of the charset */ - NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource) override; - - /** - * Don't call. For interface compat only. - */ - NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, int32_t& aSource) override - { - NS_NOTREACHED("No one should call this."); - } + virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding, + int32_t aSource) override; /** * Get the channel associated with this parser * @param aChannel out param that will contain the result * @return NS_OK if successful or NS_NOT_AVAILABLE if not */ NS_IMETHOD GetChannel(nsIChannel** aChannel) override;
--- a/parser/html/nsHtml5SpeculativeLoad.cpp +++ b/parser/html/nsHtml5SpeculativeLoad.cpp @@ -61,17 +61,17 @@ nsHtml5SpeculativeLoad::Perform(nsHtml5T aExecutor->ProcessOfflineManifest(mUrl); break; case eSpeculativeLoadSetDocumentCharset: { nsAutoCString narrowName; CopyUTF16toUTF8(mCharset, narrowName); NS_ASSERTION(mTypeOrCharsetSourceOrDocumentMode.Length() == 1, "Unexpected charset source string"); int32_t intSource = (int32_t)mTypeOrCharsetSourceOrDocumentMode.First(); - aExecutor->SetDocumentCharsetAndSource(narrowName, + aExecutor->SetDocumentCharsetAndSource(Encoding::ForName(narrowName), intSource); } break; case eSpeculativeLoadSetDocumentMode: { NS_ASSERTION(mTypeOrCharsetSourceOrDocumentMode.Length() == 1, "Unexpected document mode string"); nsHtml5DocumentMode mode = (nsHtml5DocumentMode)mTypeOrCharsetSourceOrDocumentMode.First();
--- a/parser/html/nsHtml5StreamParser.cpp +++ b/parser/html/nsHtml5StreamParser.cpp @@ -147,16 +147,17 @@ class nsHtml5LoadFlusher : public Runnab }; nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor, nsHtml5Parser* aOwner, eParserMode aMode) : mSniffingLength(0) , mBomState(eBomState::BOM_SNIFFING_NOT_STARTED) , mCharsetSource(kCharsetUninitialized) + , mEncoding(WINDOWS_1252_ENCODING) , mReparseForbidden(false) , mLastBuffer(nullptr) // Will be filled when starting , mExecutor(aExecutor) , mTreeBuilder(new nsHtml5TreeBuilder((aMode == VIEW_SOURCE_HTML || aMode == VIEW_SOURCE_XML) ? nullptr : mExecutor->GetStage(), aMode == NORMAL ? mExecutor->GetStage() : nullptr)) @@ -248,44 +249,42 @@ nsHtml5StreamParser::GetChannel(nsIChann } NS_IMETHODIMP nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) { NS_ASSERTION(IsParserThread(), "Wrong thread!"); if (aConf == eBestAnswer || aConf == eSureAnswer) { mFeedChardet = false; // just in case - const Encoding* encoding = Encoding::ForLabelNoReplacement( + auto encoding = Encoding::ForLabelNoReplacement( nsDependentCString(aCharset)); if (!encoding) { return NS_OK; } - nsAutoCString charset; - encoding->Name(charset); if (HasDecoder()) { - if (mCharset.Equals(charset)) { + if (mEncoding == encoding) { NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection, "Why are we running chardet at all?"); mCharsetSource = kCharsetFromAutoDetection; - mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); } else { // We've already committed to a decoder. Request a reload from the // docshell. - mTreeBuilder->NeedsCharsetSwitchTo(charset, + mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding), kCharsetFromAutoDetection, 0); FlushTreeOpsAndDisarmTimer(); Interrupt(); } } else { // Got a confident answer from the sniffing buffer. That code will // take care of setting up the decoder. - mCharset.Assign(charset); + mEncoding = WrapNotNull(encoding); mCharsetSource = kCharsetFromAutoDetection; - mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); } } return NS_OK; } void nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL) { @@ -316,39 +315,39 @@ nsHtml5StreamParser::SetViewSourceTitle( nsresult nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const uint8_t* aFromSegment, // can be null uint32_t aCount, uint32_t* aWriteCount) { NS_ASSERTION(IsParserThread(), "Wrong thread!"); nsresult rv = NS_OK; - mUnicodeDecoder = Encoding::ForName(mCharset)->NewDecoderWithBOMRemoval(); + mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval(); if (mSniffingBuffer) { uint32_t writeCount; rv = WriteStreamBytes(mSniffingBuffer.get(), mSniffingLength, &writeCount); NS_ENSURE_SUCCESS(rv, rv); mSniffingBuffer = nullptr; } mMetaScanner = nullptr; if (aFromSegment) { rv = WriteStreamBytes(aFromSegment, aCount, aWriteCount); } return rv; } nsresult -nsHtml5StreamParser::SetupDecodingFromBom(const char* aDecoderCharsetName) +nsHtml5StreamParser::SetupDecodingFromBom(NotNull<const Encoding*> aEncoding) { NS_ASSERTION(IsParserThread(), "Wrong thread!"); - mCharset.Assign(aDecoderCharsetName); - mUnicodeDecoder = Encoding::ForName(mCharset)->NewDecoderWithBOMRemoval(); + mEncoding = aEncoding; + mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval(); mCharsetSource = kCharsetFromByteOrderMark; mFeedChardet = false; - mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); mSniffingBuffer = nullptr; mMetaScanner = nullptr; mBomState = BOM_SNIFFING_OVER; return NS_OK; } void nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const uint8_t* aFromSegment, @@ -393,47 +392,48 @@ nsHtml5StreamParser::SniffBOMlessUTF16Ba return; } byteZero[(i + j) % 2] = true; } } } if (byteNonZero[0]) { - mCharset.AssignLiteral("UTF-16LE"); + mEncoding = UTF_16LE_ENCODING; } else { - mCharset.AssignLiteral("UTF-16BE"); + mEncoding = UTF_16BE_ENCODING; } mCharsetSource = kCharsetFromIrreversibleAutoDetection; - mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); mFeedChardet = false; mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16", true, 0); } void nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding) { if (aEncoding) { nsDependentString utf16(aEncoding); nsAutoCString utf8; CopyUTF16toUTF8(utf16, utf8); - if (PreferredForInternalEncodingDecl(utf8)) { - mCharset.Assign(utf8); + auto encoding = PreferredForInternalEncodingDecl(utf8); + if (encoding) { + mEncoding = WrapNotNull(encoding); mCharsetSource = kCharsetFromMetaTag; // closest for XML return; } // else the page declared an encoding Gecko doesn't support and we'd // end up defaulting to UTF-8 anyway. Might as well fall through here // right away and let the encoding be set to UTF-8 which we'd default to // anyway. } - mCharset.AssignLiteral("UTF-8"); // XML defaults to UTF-8 without a BOM + mEncoding = UTF_8_ENCODING; // XML defaults to UTF-8 without a BOM mCharsetSource = kCharsetFromMetaTag; // means confident } // A separate user data struct is used instead of passing the // nsHtml5StreamParser instance as user data in order to avoid including // expat.h in nsHtml5StreamParser.h. Doing that would cause naming conflicts. // Using a separate user data struct also avoids bloating nsHtml5StreamParser // by one pointer. @@ -555,17 +555,17 @@ nsHtml5StreamParser::FinalizeSniffing(co } XML_ParserFree(ud.mExpat); if (mCharsetSource < kCharsetFromMetaTag) { // Failed to get an encoding from the XML declaration. XML defaults // confidently to UTF-8 in this case. // It is also possible that the document has an XML declaration that is // longer than 1024 bytes, but that case is not worth worrying about. - mCharset.AssignLiteral("UTF-8"); + mEncoding = UTF_8_ENCODING; mCharsetSource = kCharsetFromMetaTag; // means confident } return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount); } @@ -607,44 +607,44 @@ nsHtml5StreamParser::FinalizeSniffing(co mFeedChardet = false; rv = mChardet->Done(); NS_ENSURE_SUCCESS(rv, rv); } // fall thru; callback may have changed charset } if (mCharsetSource == kCharsetUninitialized) { // Hopefully this case is never needed, but dealing with it anyway - mCharset.AssignLiteral("windows-1252"); + mEncoding = WINDOWS_1252_ENCODING; mCharsetSource = kCharsetFromFallback; - mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); } else if (mMode == LOAD_AS_DATA && mCharsetSource == kCharsetFromFallback) { NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR"); NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR"); - NS_ASSERTION(mCharset.EqualsLiteral("UTF-8"), + NS_ASSERTION(mEncoding == UTF_8_ENCODING, "XHR should default to UTF-8"); // Now mark charset source as non-weak to signal that we have a decision mCharsetSource = kCharsetFromDocTypeDefault; - mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); } return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount); } nsresult nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment, uint32_t aCount, uint32_t* aWriteCount) { NS_ASSERTION(IsParserThread(), "Wrong thread!"); nsresult rv = NS_OK; uint32_t writeCount; - // mCharset and mCharsetSource potentially have come from channel or higher + // mEncoding and mCharsetSource potentially have come from channel or higher // by now. If we find a BOM, SetupDecodingFromBom() will overwrite them. - // If we don't find a BOM, the previously set values of mCharset and + // If we don't find a BOM, the previously set values of mEncoding and // mCharsetSource are not modified by the BOM sniffing here. for (uint32_t i = 0; i < aCount && mBomState != BOM_SNIFFING_OVER; i++) { switch (mBomState) { case BOM_SNIFFING_NOT_STARTED: NS_ASSERTION(i == 0, "Bad BOM sniffing state."); switch (*aFromSegment) { case 0xEF: mBomState = SEEN_UTF_8_FIRST_BYTE; @@ -657,29 +657,29 @@ nsHtml5StreamParser::SniffStreamBytes(co break; default: mBomState = BOM_SNIFFING_OVER; break; } break; case SEEN_UTF_16_LE_FIRST_BYTE: if (aFromSegment[i] == 0xFE) { - rv = SetupDecodingFromBom("UTF-16LE"); // upper case is the raw form + rv = SetupDecodingFromBom(UTF_16LE_ENCODING); // upper case is the raw form NS_ENSURE_SUCCESS(rv, rv); uint32_t count = aCount - (i + 1); rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount); NS_ENSURE_SUCCESS(rv, rv); *aWriteCount = writeCount + (i + 1); return rv; } mBomState = BOM_SNIFFING_OVER; break; case SEEN_UTF_16_BE_FIRST_BYTE: if (aFromSegment[i] == 0xFF) { - rv = SetupDecodingFromBom("UTF-16BE"); // upper case is the raw form + rv = SetupDecodingFromBom(UTF_16BE_ENCODING); // upper case is the raw form NS_ENSURE_SUCCESS(rv, rv); uint32_t count = aCount - (i + 1); rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount); NS_ENSURE_SUCCESS(rv, rv); *aWriteCount = writeCount + (i + 1); return rv; } mBomState = BOM_SNIFFING_OVER; @@ -688,17 +688,17 @@ nsHtml5StreamParser::SniffStreamBytes(co if (aFromSegment[i] == 0xBB) { mBomState = SEEN_UTF_8_SECOND_BYTE; } else { mBomState = BOM_SNIFFING_OVER; } break; case SEEN_UTF_8_SECOND_BYTE: if (aFromSegment[i] == 0xBF) { - rv = SetupDecodingFromBom("UTF-8"); // upper case is the raw form + rv = SetupDecodingFromBom(UTF_8_ENCODING); // upper case is the raw form NS_ENSURE_SUCCESS(rv, rv); uint32_t count = aCount - (i + 1); rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount); NS_ENSURE_SUCCESS(rv, rv); *aWriteCount = writeCount + (i + 1); return rv; } mBomState = BOM_SNIFFING_OVER; @@ -713,23 +713,23 @@ nsHtml5StreamParser::SniffStreamBytes(co MOZ_ASSERT(mCharsetSource != kCharsetFromByteOrderMark, "Should not come here if BOM was found."); MOZ_ASSERT(mCharsetSource != kCharsetFromOtherComponent, "kCharsetFromOtherComponent is for XSLT."); if (mBomState == BOM_SNIFFING_OVER && mCharsetSource == kCharsetFromChannel) { - // There was no BOM and the charset came from channel. mCharset + // There was no BOM and the charset came from channel. mEncoding // still contains the charset from the channel as set by an // earlier call to SetDocumentCharset(), since we didn't find a BOM and - // overwrite mCharset. (Note that if the user has overridden the charset, + // overwrite mEncoding. (Note that if the user has overridden the charset, // we don't come here but check <meta> for XSS-dangerous charsets first.) mFeedChardet = false; - mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount); } if (!mMetaScanner && (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA)) { mMetaScanner = new nsHtml5MetaScanner(mTreeBuilder); @@ -738,38 +738,37 @@ nsHtml5StreamParser::SniffStreamBytes(co if (mSniffingLength + aCount >= NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE) { // this is the last buffer uint32_t countToSniffingLimit = NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE - mSniffingLength; if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) { nsHtml5ByteReadable readable(aFromSegment, aFromSegment + countToSniffingLimit); nsAutoCString charset; - mMetaScanner->sniff(&readable, charset); + auto encoding = mMetaScanner->sniff(&readable); // Due to the way nsHtml5Portability reports OOM, ask the tree buider nsresult rv; if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) { MarkAsBroken(rv); return rv; } - if (!charset.IsEmpty()) { - const Encoding* encoding = Encoding::ForName(charset); + if (encoding) { // meta scan successful; honor overrides unless meta is XSS-dangerous if ((mCharsetSource == kCharsetFromParentForced || mCharsetSource == kCharsetFromUserForced) && (encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) { // Honor override return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( aFromSegment, aCount, aWriteCount); } - mCharset.Assign(charset); + mEncoding = WrapNotNull(encoding); mCharsetSource = kCharsetFromMetaPrescan; mFeedChardet = false; - mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( aFromSegment, aCount, aWriteCount); } } if (mCharsetSource == kCharsetFromParentForced || mCharsetSource == kCharsetFromUserForced) { // meta not found, honor override return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( @@ -777,39 +776,37 @@ nsHtml5StreamParser::SniffStreamBytes(co } return FinalizeSniffing(aFromSegment, aCount, aWriteCount, countToSniffingLimit); } // not the last buffer if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) { nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount); - nsAutoCString charset; - mMetaScanner->sniff(&readable, charset); + auto encoding = mMetaScanner->sniff(&readable); // Due to the way nsHtml5Portability reports OOM, ask the tree buider nsresult rv; if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) { MarkAsBroken(rv); return rv; } - if (!charset.IsEmpty()) { - const Encoding* encoding = Encoding::ForName(charset); + if (encoding) { // meta scan successful; honor overrides unless meta is XSS-dangerous if ((mCharsetSource == kCharsetFromParentForced || mCharsetSource == kCharsetFromUserForced) && (encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) { // Honor override return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount); } - mCharset.Assign(charset); + mEncoding = WrapNotNull(encoding); mCharsetSource = kCharsetFromMetaPrescan; mFeedChardet = false; - mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount); } } if (!mSniffingBuffer) { mSniffingBuffer = MakeUniqueFallible<uint8_t[]>(NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE); @@ -1002,17 +999,17 @@ nsHtml5StreamParser::OnStartRequest(nsIR // We are reloading a document.open()ed doc or loading JSON/WebVTT/etc. into // a browsing context. In the latter case, there's no need to remove the // BOM manually here, because the UTF-8 decoder removes it. mReparseForbidden = true; mFeedChardet = false; // Instantiate the converter here to avoid BOM sniffing. - mUnicodeDecoder = Encoding::ForName(mCharset)->NewDecoderWithBOMRemoval(); + mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval(); return NS_OK; } nsresult nsHtml5StreamParser::CheckListenerChain() { NS_ASSERTION(NS_IsMainThread(), "Should be on the main thread!"); if (!mObserver) { @@ -1226,26 +1223,26 @@ nsHtml5StreamParser::CopySegmentsToParse nsHtml5StreamParser* parser = static_cast<nsHtml5StreamParser*>(aClosure); parser->DoDataAvailable((const uint8_t*)aFromSegment, aCount); // Assume DoDataAvailable consumed all available bytes. *aWriteCount = aCount; return NS_OK; } -bool -nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding) +const Encoding* +nsHtml5StreamParser::PreferredForInternalEncodingDecl(const nsACString& aEncoding) { const Encoding* newEncoding = Encoding::ForLabel(aEncoding); if (!newEncoding) { // the encoding name is bogus mTreeBuilder->MaybeComplainAboutCharset("EncMetaUnsupported", true, mTokenizer->getLineNumber()); - return false; + return nullptr; } if (newEncoding == UTF_16BE_ENCODING || newEncoding == UTF_16LE_ENCODING) { mTreeBuilder->MaybeComplainAboutCharset("EncMetaUtf16", true, mTokenizer->getLineNumber()); newEncoding = UTF_8_ENCODING; @@ -1254,35 +1251,34 @@ nsHtml5StreamParser::PreferredForInterna if (newEncoding == X_USER_DEFINED_ENCODING) { // WebKit/Blink hack for Indian and Armenian legacy sites mTreeBuilder->MaybeComplainAboutCharset("EncMetaUserDefined", true, mTokenizer->getLineNumber()); newEncoding = WINDOWS_1252_ENCODING; } - if (newEncoding == Encoding::ForName(mCharset)) { + if (newEncoding == mEncoding) { if (mCharsetSource < kCharsetFromMetaPrescan) { if (mInitialEncodingWasFromParentFrame) { mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaFrame", false, mTokenizer->getLineNumber()); } else { mTreeBuilder->MaybeComplainAboutCharset("EncLateMeta", false, mTokenizer->getLineNumber()); } } mCharsetSource = kCharsetFromMetaTag; // become confident mFeedChardet = false; // don't feed chardet when confident - return false; + return nullptr; } - newEncoding->Name(aEncoding); - return true; + return newEncoding; } bool nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding) { // This code needs to stay in sync with // nsHtml5MetaScanner::tryCharset. Unfortunately, the // trickery with member fields there leads to some copy-paste reuse. :-( @@ -1291,17 +1287,18 @@ nsHtml5StreamParser::internalEncodingDec return false; } nsString newEncoding16; // Not Auto, because using it to hold nsStringBuffer* aEncoding.ToString(newEncoding16); nsAutoCString newEncoding; CopyUTF16toUTF8(newEncoding16, newEncoding); - if (!PreferredForInternalEncodingDecl(newEncoding)) { + auto encoding = PreferredForInternalEncodingDecl(newEncoding); + if (!encoding) { return false; } if (mReparseForbidden) { // This mReparseForbidden check happens after the call to // PreferredForInternalEncodingDecl so that if that method calls // MaybeComplainAboutCharset, its charset complaint wins over the one // below. @@ -1309,17 +1306,17 @@ nsHtml5StreamParser::internalEncodingDec true, mTokenizer->getLineNumber()); return false; // not reparsing even if we wanted to } // Avoid having the chardet ask for another restart after this restart // request. mFeedChardet = false; - mTreeBuilder->NeedsCharsetSwitchTo(newEncoding, + mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding), kCharsetFromMetaTag, mTokenizer->getLineNumber()); FlushTreeOpsAndDisarmTimer(); Interrupt(); // the tree op executor will cause the stream parser to terminate // if the charset switch request is accepted or it'll uninterrupt // if the request failed. Note that if the restart request fails, // we don't bother trying to make chardet resume. Might as well
--- a/parser/html/nsHtml5StreamParser.h +++ b/parser/html/nsHtml5StreamParser.h @@ -96,17 +96,19 @@ enum eBomState { }; enum eHtml5StreamState { STREAM_NOT_STARTED = 0, STREAM_BEING_READ = 1, STREAM_ENDED = 2 }; -class nsHtml5StreamParser : public nsICharsetDetectionObserver { +class nsHtml5StreamParser final : public nsICharsetDetectionObserver { + template <typename T> using NotNull = mozilla::NotNull<T>; + using Encoding = mozilla::Encoding; friend class nsHtml5RequestStopper; friend class nsHtml5DataAvailable; friend class nsHtml5StreamParserContinuation; friend class nsHtml5TimerKungFu; public: NS_DECL_CYCLE_COLLECTING_ISUPPORTS @@ -148,24 +150,25 @@ class nsHtml5StreamParser : public nsICh bool internalEncodingDeclaration(nsHtml5String aEncoding); // Not from an external interface /** * Call this method once you've created a parser, and want to instruct it * about what charset to load * - * @param aCharset the charset of a document + * @param aEncoding the charset of a document * @param aCharsetSource the source of the charset */ - inline void SetDocumentCharset(const nsACString& aCharset, int32_t aSource) { + inline void SetDocumentCharset(NotNull<const Encoding*> aEncoding, + int32_t aSource) { NS_PRECONDITION(mStreamState == STREAM_NOT_STARTED, "SetDocumentCharset called too late."); NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); - mCharset = aCharset; + mEncoding = aEncoding; mCharsetSource = aSource; } inline void SetObserver(nsIRequestObserver* aObserver) { NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); mObserver = aObserver; } @@ -189,19 +192,19 @@ class nsHtml5StreamParser : public nsICh { mozilla::MutexAutoLock autoLock(mTerminatedMutex); mTerminated = true; } void DropTimer(); /** - * Sets mCharset and mCharsetSource appropriately for the XML View Source + * Sets mEncoding and mCharsetSource appropriately for the XML View Source * case if aEncoding names a supported rough ASCII superset and sets - * the mCharset and mCharsetSource to the UTF-8 default otherwise. + * the mEncoding and mCharsetSource to the UTF-8 default otherwise. */ void SetEncodingFromExpat(const char16_t* aEncoding); /** * Sets the URL for View Source title in case this parser ends up being * used for View Source. If aURL is a view-source: URL, takes the inner * URL. data: URLs are shown with an ellipsis instead of the actual data. */ @@ -334,27 +337,27 @@ class nsHtml5StreamParser : public nsICh /** * Initialize the Unicode decoder, mark the BOM as the source and * drop the sniffer. * * @param aDecoderCharsetName The name for the decoder's charset * (UTF-16BE, UTF-16LE or UTF-8; the BOM has * been swallowed) */ - nsresult SetupDecodingFromBom(const char* aDecoderCharsetName); + nsresult SetupDecodingFromBom(NotNull<const Encoding*> aEncoding); /** * Become confident or resolve and encoding name to its preferred form. * @param aEncoding the value of an internal encoding decl. Acts as an * out param, too, when the method returns true. * @return true if the parser needs to start using the new value of * aEncoding and false if the parser became confident or if * the encoding name did not specify a usable encoding */ - bool PreferredForInternalEncodingDecl(nsACString& aEncoding); + const Encoding* PreferredForInternalEncodingDecl(const nsACString& aEncoding); /** * Callback for mFlushTimer. */ static void TimerCallback(nsITimer* aTimer, void* aClosure); /** * Parser thread entry point for (maybe) flushing the ops and posting @@ -418,17 +421,17 @@ class nsHtml5StreamParser : public nsICh /** * The source (confidence) of the character encoding in use */ int32_t mCharsetSource; /** * The character encoding in use */ - nsCString mCharset; + NotNull<const Encoding*> mEncoding; /** * Whether reparse is forbidden */ bool mReparseForbidden; // Portable parser objects /**
--- a/parser/html/nsHtml5TreeBuilderCppSupplement.h +++ b/parser/html/nsHtml5TreeBuilderCppSupplement.h @@ -1073,53 +1073,55 @@ nsHtml5TreeBuilder::FlushLoads() return; } if (!mSpeculativeLoadQueue.IsEmpty()) { mSpeculativeLoadStage->MoveSpeculativeLoadsFrom(mSpeculativeLoadQueue); } } void -nsHtml5TreeBuilder::SetDocumentCharset(nsACString& aCharset, +nsHtml5TreeBuilder::SetDocumentCharset(NotNull<const Encoding*> aEncoding, int32_t aCharsetSource) { if (mBuilder) { - mBuilder->SetDocumentCharsetAndSource(aCharset, aCharsetSource); + mBuilder->SetDocumentCharsetAndSource(aEncoding, aCharsetSource); } else if (mSpeculativeLoadStage) { + nsAutoCString charset; + aEncoding->Name(charset); mSpeculativeLoadQueue.AppendElement()->InitSetDocumentCharset( - aCharset, aCharsetSource); + charset, aCharsetSource); } else { mOpQueue.AppendElement()->Init( - eTreeOpSetDocumentCharset, aCharset, aCharsetSource); + eTreeOpSetDocumentCharset, aEncoding, aCharsetSource); } } void nsHtml5TreeBuilder::StreamEnded() { MOZ_ASSERT(!mBuilder, "Must not call StreamEnded with builder."); MOZ_ASSERT(!fragment, "Must not parse fragments off the main thread."); nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); NS_ASSERTION(treeOp, "Tree op allocation failed."); treeOp->Init(eTreeOpStreamEnded); } void -nsHtml5TreeBuilder::NeedsCharsetSwitchTo(const nsACString& aCharset, +nsHtml5TreeBuilder::NeedsCharsetSwitchTo(NotNull<const Encoding*> aEncoding, int32_t aCharsetSource, int32_t aLineNumber) { if (MOZ_UNLIKELY(mBuilder)) { MOZ_ASSERT_UNREACHABLE("Must never switch charset with builder."); return; } nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); NS_ASSERTION(treeOp, "Tree op allocation failed."); treeOp->Init(eTreeOpNeedsCharsetSwitchTo, - aCharset, + aEncoding, aCharsetSource, aLineNumber); } void nsHtml5TreeBuilder::MaybeComplainAboutCharset(const char* aMsgId, bool aError, int32_t aLineNumber)
--- a/parser/html/nsHtml5TreeBuilderHSupplement.h +++ b/parser/html/nsHtml5TreeBuilderHSupplement.h @@ -1,15 +1,17 @@ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #define NS_HTML5_TREE_BUILDER_HANDLE_ARRAY_LENGTH 512 + private: + using Encoding = mozilla::Encoding; + template <typename T> using NotNull = mozilla::NotNull<T>; - private: nsHtml5OplessBuilder* mBuilder; // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // If mBuilder is not null, the tree op machinery is not in use and // the fields below aren't in use, either. // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! nsHtml5Highlighter* mViewSource; nsTArray<nsHtml5TreeOperation> mOpQueue; nsTArray<nsHtml5SpeculativeLoad> mSpeculativeLoadQueue; @@ -99,21 +101,22 @@ { mOpQueue.Clear(); } bool Flush(bool aDiscretionary = false); void FlushLoads(); - void SetDocumentCharset(nsACString& aCharset, int32_t aCharsetSource); + void SetDocumentCharset(NotNull<const Encoding*> aEncoding, + int32_t aCharsetSource); void StreamEnded(); - void NeedsCharsetSwitchTo(const nsACString& aEncoding, + void NeedsCharsetSwitchTo(NotNull<const Encoding*> aEncoding, int32_t aSource, int32_t aLineNumber); void MaybeComplainAboutCharset(const char* aMsgId, bool aError, int32_t aLineNumber); void AddSnapshotToScript(nsAHtml5TreeBuilderState* aSnapshot, int32_t aLine);
--- a/parser/html/nsHtml5TreeOpExecutor.cpp +++ b/parser/html/nsHtml5TreeOpExecutor.cpp @@ -714,17 +714,17 @@ nsHtml5TreeOpExecutor::RunScript(nsICont void nsHtml5TreeOpExecutor::Start() { NS_PRECONDITION(!mStarted, "Tried to start when already started."); mStarted = true; } void -nsHtml5TreeOpExecutor::NeedsCharsetSwitchTo(const char* aEncoding, +nsHtml5TreeOpExecutor::NeedsCharsetSwitchTo(NotNull<const Encoding*> aEncoding, int32_t aSource, uint32_t aLineNumber) { EndDocUpdate(); if (MOZ_UNLIKELY(!mParser)) { // got terminate return; @@ -732,17 +732,19 @@ nsHtml5TreeOpExecutor::NeedsCharsetSwitc nsCOMPtr<nsIWebShellServices> wss = do_QueryInterface(mDocShell); if (!wss) { return; } // ask the webshellservice to load the URL if (NS_SUCCEEDED(wss->StopDocumentLoad())) { - wss->ReloadDocument(aEncoding, aSource); + nsAutoCString charset; + aEncoding->Name(charset); + wss->ReloadDocument(charset.get(), aSource); } // if the charset switch was accepted, wss has called Terminate() on the // parser by now if (!mParser) { // success if (aSource == kCharsetFromMetaTag) { MaybeComplainAboutCharset("EncLateMetaReload", false, aLineNumber); @@ -906,19 +908,19 @@ nsHtml5TreeOpExecutor::BaseURIForPreload already_AddRefed<nsIURI> nsHtml5TreeOpExecutor::ConvertIfNotPreloadedYet(const nsAString& aURL) { if (aURL.IsEmpty()) { return nullptr; } nsIURI* base = BaseURIForPreload(); - const nsCString& charset = mDocument->GetDocumentCharacterSet(); + auto encoding = mDocument->GetDocumentCharacterSet(); nsCOMPtr<nsIURI> uri; - nsresult rv = NS_NewURI(getter_AddRefs(uri), aURL, charset.get(), base); + nsresult rv = NS_NewURI(getter_AddRefs(uri), aURL, encoding, base); if (NS_FAILED(rv)) { NS_WARNING("Failed to create a URI"); return nullptr; } if (ShouldPreloadURI(uri)) { return uri.forget(); } @@ -1010,33 +1012,33 @@ void nsHtml5TreeOpExecutor::PreloadEndPicture() { mDocument->PreloadPictureClosed(); } void nsHtml5TreeOpExecutor::AddBase(const nsAString& aURL) { - const nsCString& charset = mDocument->GetDocumentCharacterSet(); + auto encoding = mDocument->GetDocumentCharacterSet(); nsresult rv = NS_NewURI(getter_AddRefs(mViewSourceBaseURI), aURL, - charset.get(), GetViewSourceBaseURI()); + encoding, GetViewSourceBaseURI()); if (NS_FAILED(rv)) { mViewSourceBaseURI = nullptr; } } void nsHtml5TreeOpExecutor::SetSpeculationBase(const nsAString& aURL) { if (mSpeculationBaseURI) { // the first one wins return; } - const nsCString& charset = mDocument->GetDocumentCharacterSet(); + auto encoding = mDocument->GetDocumentCharacterSet(); DebugOnly<nsresult> rv = NS_NewURI(getter_AddRefs(mSpeculationBaseURI), aURL, - charset.get(), mDocument->GetDocumentURI()); + encoding, mDocument->GetDocumentURI()); NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to create a URI"); } void nsHtml5TreeOpExecutor::SetSpeculationReferrerPolicy(const nsAString& aReferrerPolicy) { // Specs says: // - Let value be the result of stripping leading and trailing whitespace from
--- a/parser/html/nsHtml5TreeOpExecutor.h +++ b/parser/html/nsHtml5TreeOpExecutor.h @@ -31,16 +31,18 @@ class nsIDocument; class nsHtml5TreeOpExecutor final : public nsHtml5DocumentBuilder, public nsIContentSink, public nsAHtml5TreeOpSink, public mozilla::LinkedListElement<nsHtml5TreeOpExecutor> { friend class nsHtml5FlushLoopGuard; typedef mozilla::net::ReferrerPolicy ReferrerPolicy; + using Encoding = mozilla::Encoding; + template <typename T> using NotNull = mozilla::NotNull<T>; public: NS_DECL_ISUPPORTS_INHERITED private: static bool sExternalViewSource; #ifdef DEBUG_NS_HTML5_TREE_OP_EXECUTOR_FLUSH static uint32_t sAppendBatchMaxSize; @@ -135,19 +137,19 @@ class nsHtml5TreeOpExecutor final : publ /** * No-op for backwards compat. */ virtual void FlushPendingNotifications(mozilla::FlushType aType) override; /** * Don't call. For interface compat only. */ - NS_IMETHOD SetDocumentCharset(nsACString& aCharset) override { - NS_NOTREACHED("No one should call this."); - return NS_ERROR_NOT_IMPLEMENTED; + virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding) override + { + NS_NOTREACHED("No one should call this."); } /** * Returns the document. */ virtual nsISupports *GetTarget() override; virtual void ContinueInterruptedParsingAsync() override; @@ -179,17 +181,17 @@ class nsHtml5TreeOpExecutor final : publ void RunFlushLoop(); nsresult FlushDocumentWrite(); void MaybeSuspend(); void Start(); - void NeedsCharsetSwitchTo(const char* aEncoding, + void NeedsCharsetSwitchTo(NotNull<const Encoding*> aEncoding, int32_t aSource, uint32_t aLineNumber); void MaybeComplainAboutCharset(const char* aMsgId, bool aError, uint32_t aLineNumber); void ComplainAboutBogusProtocolCharset(nsIDocument* aDoc);
--- a/parser/html/nsHtml5TreeOperation.cpp +++ b/parser/html/nsHtml5TreeOperation.cpp @@ -100,17 +100,16 @@ nsHtml5TreeOperation::~nsHtml5TreeOperat case eTreeOpAppendComment: case eTreeOpAppendCommentToDocument: case eTreeOpAddViewSourceHref: case eTreeOpAddViewSourceBase: delete[] mTwo.unicharPtr; break; case eTreeOpSetDocumentCharset: case eTreeOpNeedsCharsetSwitchTo: - delete[] mOne.charPtr; break; case eTreeOpProcessOfflineManifest: free(mOne.unicharPtr); break; default: // keep the compiler happy break; } } @@ -784,27 +783,26 @@ nsHtml5TreeOperation::Perform(nsHtml5Tre return NS_OK; } case eTreeOpDoneCreatingElement: { nsIContent* node = *(mOne.node); DoneCreatingElement(node); return NS_OK; } case eTreeOpSetDocumentCharset: { - char* str = mOne.charPtr; + auto encoding = WrapNotNull(mOne.encoding); int32_t charsetSource = mFour.integer; - nsDependentCString dependentString(str); - aBuilder->SetDocumentCharsetAndSource(dependentString, charsetSource); + aBuilder->SetDocumentCharsetAndSource(encoding, charsetSource); return NS_OK; } case eTreeOpNeedsCharsetSwitchTo: { - char* str = mOne.charPtr; + auto encoding = WrapNotNull(mOne.encoding); int32_t charsetSource = mFour.integer; int32_t lineNumber = mTwo.integer; - aBuilder->NeedsCharsetSwitchTo(str, charsetSource, (uint32_t)lineNumber); + aBuilder->NeedsCharsetSwitchTo(encoding, charsetSource, (uint32_t)lineNumber); return NS_OK; } case eTreeOpUpdateStyleSheet: { nsIContent* node = *(mOne.node); aBuilder->UpdateStyleSheet(node); return NS_OK; } case eTreeOpProcessMeta: { @@ -879,21 +877,21 @@ nsHtml5TreeOperation::Perform(nsHtml5Tre nsIContent* node = *mOne.node; char16_t* buffer = mTwo.unicharPtr; int32_t length = mFour.integer; nsDependentString relative(buffer, length); nsIDocument* doc = aBuilder->GetDocument(); - const nsCString& charset = doc->GetDocumentCharacterSet(); + auto encoding = doc->GetDocumentCharacterSet(); nsCOMPtr<nsIURI> uri; nsresult rv = NS_NewURI(getter_AddRefs(uri), relative, - charset.get(), + encoding, aBuilder->GetViewSourceBaseURI()); NS_ENSURE_SUCCESS(rv, NS_OK); // Reuse the fix for bug 467852 // URLs that execute script (e.g. "javascript:" URLs) should just be // ignored. There's nothing reasonable we can do with them, and allowing // them to execute in the context of the view-source window presents a // security risk. Just return the empty string in this case.
--- a/parser/html/nsHtml5TreeOperation.h +++ b/parser/html/nsHtml5TreeOperation.h @@ -3,20 +3,24 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef nsHtml5TreeOperation_h #define nsHtml5TreeOperation_h #include "nsHtml5DocumentMode.h" #include "nsHtml5HtmlAttributes.h" #include "mozilla/dom/FromParser.h" +#include "mozilla/NotNull.h" class nsIContent; class nsHtml5TreeOpExecutor; class nsHtml5DocumentBuilder; +namespace mozilla { +class Encoding; +} enum eHtml5TreeOperation { eTreeOpUninitialized, // main HTML5 ops eTreeOpAppend, eTreeOpDetach, eTreeOpAppendChildrenToNewParent, eTreeOpFosterParent, @@ -80,17 +84,19 @@ class nsHtml5TreeOperationStringPair { inline void Get(nsAString& aPublicId, nsAString& aSystemId) { aPublicId.Assign(mPublicId); aSystemId.Assign(mSystemId); } }; -class nsHtml5TreeOperation { +class nsHtml5TreeOperation final { + template <typename T> using NotNull = mozilla::NotNull<T>; + using Encoding = mozilla::Encoding; public: /** * Atom is used inside the parser core are either static atoms that are * the same as Gecko-wide static atoms or they are dynamic atoms scoped by * both thread and parser to a particular nsHtml5AtomTable. In order to * such scoped atoms coming into contact with the rest of Gecko, atoms * that are about to exit the parser must go through this method which @@ -245,16 +251,37 @@ class nsHtml5TreeOperation { const nsACString& aString, int32_t aInt32, int32_t aLineNumber) { Init(aOpCode, aString, aInt32); mTwo.integer = aLineNumber; } + inline void Init(eHtml5TreeOperation aOpCode, + NotNull<const Encoding*> aEncoding, + int32_t aInt32) + { + NS_PRECONDITION(mOpCode == eTreeOpUninitialized, + "Op code must be uninitialized when initializing."); + + mOpCode = aOpCode; + mOne.encoding = aEncoding; + mFour.integer = aInt32; + } + + inline void Init(eHtml5TreeOperation aOpCode, + NotNull<const Encoding*> aEncoding, + int32_t aInt32, + int32_t aLineNumber) + { + Init(aOpCode, aEncoding, aInt32); + mTwo.integer = aLineNumber; + } + inline void Init(eHtml5TreeOperation aOpCode, nsIContentHandle* aNode, nsIContentHandle* aParent, nsIContentHandle* aTable) { NS_PRECONDITION(mOpCode == eTreeOpUninitialized, "Op code must be uninitialized when initializing."); NS_PRECONDITION(aNode, "Initialized tree op with null node."); @@ -502,12 +529,13 @@ class nsHtml5TreeOperation { nsHtml5HtmlAttributes* attributes; nsHtml5DocumentMode mode; char16_t* unicharPtr; char* charPtr; nsHtml5TreeOperationStringPair* stringPair; nsAHtml5TreeBuilderState* state; int32_t integer; nsresult result; + const Encoding* encoding; } mOne, mTwo, mThree, mFour, mFive; }; #endif // nsHtml5TreeOperation_h
--- a/parser/htmlparser/nsIContentSink.h +++ b/parser/htmlparser/nsIContentSink.h @@ -14,25 +14,32 @@ * * The icontentsink interface is a very lightweight wrapper that represents the * content-sink model building process. There is another one that you may care * about more, which is the IHTMLContentSink interface. (See that file for details). */ #include "nsISupports.h" #include "nsString.h" #include "mozilla/FlushType.h" +#include "mozilla/NotNull.h" #include "nsIDTD.h" class nsParserBase; +namespace mozilla { +class Encoding; +} #define NS_ICONTENT_SINK_IID \ { 0xcf9a7cbb, 0xfcbc, 0x4e13, \ { 0x8e, 0xf5, 0x18, 0xef, 0x2d, 0x3d, 0x58, 0x29 } } class nsIContentSink : public nsISupports { +protected: + using Encoding = mozilla::Encoding; + template <typename T> using NotNull = mozilla::NotNull<T>; public: NS_DECLARE_STATIC_IID_ACCESSOR(NS_ICONTENT_SINK_IID) /** * This method is called by the parser when it is entered from * the event loop. The content sink wants to know how long the * parser has been active since we last processed events on the @@ -98,17 +105,17 @@ public: * @param aType the type of flush to perform */ virtual void FlushPendingNotifications(mozilla::FlushType aType)=0; /** * Set the document character set. This should be passed on to the * document itself. */ - NS_IMETHOD SetDocumentCharset(nsACString& aCharset)=0; + virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding) = 0; /** * Returns the target object (often a document object) into which * the content built by this content sink is being added, if any * (IOW, may return null). */ virtual nsISupports *GetTarget()=0;
--- a/parser/htmlparser/nsIParser.h +++ b/parser/htmlparser/nsIParser.h @@ -20,26 +20,30 @@ #include "nsISupports.h" #include "nsIStreamListener.h" #include "nsIDTD.h" #include "nsString.h" #include "nsTArray.h" #include "nsIAtom.h" #include "nsParserBase.h" +#include "mozilla/NotNull.h" #define NS_IPARSER_IID \ { 0x2c4ad90a, 0x740e, 0x4212, \ { 0xba, 0x3f, 0xfe, 0xac, 0xda, 0x4b, 0x92, 0x9e } } class nsIContentSink; class nsIRequestObserver; class nsString; class nsIURI; class nsIChannel; +namespace mozilla { +class Encoding; +} enum eParserCommands { eViewNormal, eViewSource, eViewFragment, eViewErrors }; @@ -58,16 +62,19 @@ enum eStreamState {eNone,eOnStart,eOnDat * * Please DO NOT #include this file in comm-central code, in your XULRunner * app or binary extensions. * * Please DO NOT #include this into new files even inside Gecko. It is more * likely than not that #including this header is the wrong thing to do. */ class nsIParser : public nsParserBase { + protected: + using Encoding = mozilla::Encoding; + template <typename T> using NotNull = mozilla::NotNull<T>; public: NS_DECLARE_STATIC_IID_ACCESSOR(NS_IPARSER_IID) /** * Select given content sink into parser for parser output * @update gess5/11/98 * @param aSink is the new sink to be used by parser @@ -100,18 +107,18 @@ class nsIParser : public nsParserBase { * Call this method once you've created a parser, and want to instruct it * about what charset to load * * @update ftang 4/23/99 * @param aCharset- the charest of a document * @param aCharsetSource- the soure of the chares * @return nada */ - NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource)=0; - NS_IMETHOD_(void) GetDocumentCharset(nsACString& oCharset, int32_t& oSource)=0; + virtual void SetDocumentCharset(NotNull<const Encoding*> aCharset, + int32_t aSource) = 0; /** * Get the channel associated with this parser * @update harishd,gagan 07/17/01 * @param aChannel out param that will contain the result * @return NS_OK if successful */ NS_IMETHOD GetChannel(nsIChannel** aChannel) override = 0;
--- a/parser/htmlparser/nsParser.cpp +++ b/parser/htmlparser/nsParser.cpp @@ -121,16 +121,17 @@ public: }; //-------------- End ParseContinue Event Definition ------------------------ /** * default constructor */ nsParser::nsParser() + : mCharset(WINDOWS_1252_ENCODING) { Initialize(true); } nsParser::~nsParser() { Cleanup(); } @@ -145,17 +146,17 @@ nsParser::Initialize(bool aConstructor) else { // nsCOMPtrs mObserver = nullptr; mUnusedInput.Truncate(); } mContinueEvent = nullptr; mCharsetSource = kCharsetUninitialized; - mCharset.AssignLiteral("windows-1252"); + mCharset = WINDOWS_1252_ENCODING; mInternalState = NS_OK; mStreamStatus = NS_OK; mCommand = eViewNormal; mBlocked = 0; mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED | NS_PARSER_FLAG_CAN_TOKENIZE; mProcessingNetworkData = false; @@ -278,28 +279,29 @@ nsParser::SetCommand(eParserCommands aPa /** * Call this method once you've created a parser, and want to instruct it * about what charset to load * * @param aCharset- the charset of a document * @param aCharsetSource- the source of the charset */ -NS_IMETHODIMP_(void) -nsParser::SetDocumentCharset(const nsACString& aCharset, int32_t aCharsetSource) +void +nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset, + int32_t aCharsetSource) { mCharset = aCharset; mCharsetSource = aCharsetSource; if (mParserContext && mParserContext->mScanner) { mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource); } } void -nsParser::SetSinkCharset(nsACString& aCharset) +nsParser::SetSinkCharset(NotNull<const Encoding*> aCharset) { if (mSink) { mSink->SetDocumentCharset(aCharset); } } /** * This method gets called in order to set the content @@ -1326,38 +1328,37 @@ ParserWriteFunc(nsIInputStream* in, if (!pws) { return NS_ERROR_FAILURE; } if (pws->mNeedCharsetCheck) { pws->mNeedCharsetCheck = false; int32_t source; - nsAutoCString preferred; - pws->mParser->GetDocumentCharset(preferred, source); + auto preferred = pws->mParser->GetDocumentCharset(source); // This code was bogus when I found it. It expects the BOM or the XML // declaration to be entirely in the first network buffer. -- hsivonen const Encoding* encoding; size_t bomLength; Tie(encoding, bomLength) = Encoding::ForBOM(MakeSpan(buf, count)); Unused << bomLength; if (encoding) { // The decoder will swallow the BOM. The UTF-16 will re-sniff for // endianness. The value of preferred is now "UTF-8", "UTF-16LE" // or "UTF-16BE". - encoding->Name(preferred); + preferred = WrapNotNull(encoding); source = kCharsetFromByteOrderMark; } else if (source < kCharsetFromChannel) { nsAutoCString declCharset; if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) { encoding = Encoding::ForLabel(declCharset); if (encoding) { - encoding->Name(preferred); + preferred = WrapNotNull(encoding); source = kCharsetFromMetaTag; } } } pws->mParser->SetDocumentCharset(preferred, source); pws->mParser->SetSinkCharset(preferred);
--- a/parser/htmlparser/nsParser.h +++ b/parser/htmlparser/nsParser.h @@ -124,22 +124,23 @@ class nsParser final : public nsIParser, * Call this method once you've created a parser, and want to instruct it * about what charset to load * * @update ftang 4/23/99 * @param aCharset- the charset of a document * @param aCharsetSource- the source of the charset * @return nada */ - NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource) override; + virtual void SetDocumentCharset(NotNull<const Encoding*> aCharset, + int32_t aSource) override; - NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, int32_t& aSource) override + NotNull<const Encoding*> GetDocumentCharset(int32_t& aSource) { - aCharset = mCharset; aSource = mCharsetSource; + return mCharset; } /** * Cause parser to parse input from given URL * @update gess5/11/98 * @param aURL is a descriptor for source document * @param aListener is a listener to forward notifications to * @return TRUE if all went well -- FALSE otherwise @@ -236,17 +237,17 @@ class nsParser final : public nsIParser, */ NS_IMETHOD GetDTD(nsIDTD** aDTD) override; /** * Get the nsIStreamListener for this parser */ virtual nsIStreamListener* GetStreamListener() override; - void SetSinkCharset(nsACString& aCharset); + void SetSinkCharset(NotNull<const Encoding*> aCharset); /** * Removes continue parsing events * @update kmcclusk 5/18/98 */ NS_IMETHOD CancelParsingEvents() override; @@ -383,17 +384,17 @@ protected: nsresult mInternalState; nsresult mStreamStatus; int32_t mCharsetSource; uint16_t mFlags; uint32_t mBlocked; nsString mUnusedInput; - nsCString mCharset; + NotNull<const Encoding*> mCharset; nsCString mCommandStr; bool mProcessingNetworkData; bool mIsAboutBlank; }; #endif
--- a/parser/htmlparser/nsScanner.cpp +++ b/parser/htmlparser/nsScanner.cpp @@ -88,45 +88,37 @@ nsScanner::nsScanner(nsString& aFilename mMarkPosition = mCurrentPosition; mEndPosition = mCurrentPosition; mIncremental = true; mUnicodeDecoder = nullptr; mCharsetSource = kCharsetUninitialized; // XML defaults to UTF-8 and about:blank is UTF-8, too. - SetDocumentCharset(NS_LITERAL_CSTRING("UTF-8"), kCharsetFromDocTypeDefault); + SetDocumentCharset(UTF_8_ENCODING, kCharsetFromDocTypeDefault); } -nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSource) +nsresult nsScanner::SetDocumentCharset(NotNull<const Encoding*> aEncoding, + int32_t aSource) { if (aSource < mCharsetSource) // priority is lower than the current one return NS_OK; mCharsetSource = aSource; - - const Encoding* encoding; - if (aCharset.EqualsLiteral("replacement")) { - encoding = REPLACEMENT_ENCODING; - } else { - encoding = Encoding::ForLabel(aCharset); - MOZ_ASSERT(encoding, "Should never call with a bogus aCharset."); - } - nsCString charsetName; - encoding->Name(charsetName); + aEncoding->Name(charsetName); if (!mCharset.IsEmpty() && charsetName.Equals(mCharset)) { return NS_OK; // no difference, don't change it } // different, need to change it mCharset.Assign(charsetName); - mUnicodeDecoder = encoding->NewDecoderWithBOMRemoval(); + mUnicodeDecoder = aEncoding->NewDecoderWithBOMRemoval(); return NS_OK; } /** * default destructor *
--- a/parser/htmlparser/nsScanner.h +++ b/parser/htmlparser/nsScanner.h @@ -31,17 +31,19 @@ public: const char16_t *mChars; char16_t mFilter; explicit nsReadEndCondition(const char16_t* aTerminateChars); private: nsReadEndCondition(const nsReadEndCondition& aOther); // No copying void operator=(const nsReadEndCondition& aOther); // No assigning }; -class nsScanner { +class nsScanner final { + using Encoding = mozilla::Encoding; + template <typename T> using NotNull = mozilla::NotNull<T>; public: /** * Use this constructor for the XML fragment parsing case */ explicit nsScanner(const nsAString& anHTMLString); /** @@ -137,17 +139,18 @@ class nsScanner { /** * Use this setter to change the scanner's unicode decoder * * @update ftang 3/02/99 * @param aCharset a normalized (alias resolved) charset name * @param aCharsetSource- where the charset info came from * @return */ - nsresult SetDocumentCharset(const nsACString& aCharset, int32_t aSource); + nsresult SetDocumentCharset(NotNull<const Encoding*> aEncoding, + int32_t aSource); void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd); void CurrentPosition(nsScannerIterator& aPosition); void EndReading(nsScannerIterator& aPosition); void SetPosition(nsScannerIterator& aPosition, bool aTruncate = false); /**
--- a/parser/xml/nsSAXXMLReader.cpp +++ b/parser/xml/nsSAXXMLReader.cpp @@ -15,16 +15,17 @@ #include "nsStreamUtils.h" #include "nsStringStream.h" #include "nsIScriptError.h" #include "nsSAXAttributes.h" #include "nsSAXLocator.h" #include "nsCharsetSource.h" using mozilla::Encoding; +using mozilla::NotNull; #define XMLNS_URI "http://www.w3.org/2000/xmlns/" static NS_DEFINE_CID(kParserCID, NS_PARSER_CID); NS_IMPL_CYCLE_COLLECTION(nsSAXXMLReader, mContentHandler, mDTDHandler, @@ -623,46 +624,46 @@ nsSAXXMLReader::InitParser(nsIRequestObs // setup the parser nsCOMPtr<nsIParser> parser = do_CreateInstance(kParserCID, &rv); NS_ENSURE_SUCCESS(rv, rv); parser->SetContentSink(this); int32_t charsetSource = kCharsetFromDocTypeDefault; - nsAutoCString charset(NS_LITERAL_CSTRING("UTF-8")); - TryChannelCharset(aChannel, charsetSource, charset); - parser->SetDocumentCharset(charset, charsetSource); + auto encoding = UTF_8_ENCODING; + TryChannelCharset(aChannel, charsetSource, encoding); + parser->SetDocumentCharset(encoding, charsetSource); rv = parser->Parse(mBaseURI, aObserver); NS_ENSURE_SUCCESS(rv, rv); mListener = do_QueryInterface(parser, &rv); return rv; } // from nsDocument.cpp bool nsSAXXMLReader::TryChannelCharset(nsIChannel *aChannel, int32_t& aCharsetSource, - nsACString& aCharset) + NotNull<const Encoding*>& aEncoding) { if (aCharsetSource >= kCharsetFromChannel) return true; if (aChannel) { nsAutoCString charsetVal; nsresult rv = aChannel->GetContentCharset(charsetVal); if (NS_SUCCEEDED(rv)) { const Encoding* preferred = Encoding::ForLabel(charsetVal); if (!preferred) return false; - preferred->Name(aCharset); + aEncoding = WrapNotNull(preferred); aCharsetSource = kCharsetFromChannel; return true; } } return false; }
--- a/parser/xml/nsSAXXMLReader.h +++ b/parser/xml/nsSAXXMLReader.h @@ -14,16 +14,17 @@ #include "nsISAXXMLReader.h" #include "nsISAXContentHandler.h" #include "nsISAXDTDHandler.h" #include "nsISAXErrorHandler.h" #include "nsISAXLexicalHandler.h" #include "nsIMozSAXXMLDeclarationHandler.h" #include "nsCycleCollectionParticipant.h" #include "mozilla/Attributes.h" +#include "mozilla/NotNull.h" #define NS_SAXXMLREADER_CONTRACTID "@mozilla.org/saxparser/xmlreader;1" #define NS_SAXXMLREADER_CID \ { 0xab1da296, 0x6125, 0x40ba, \ { 0x96, 0xd0, 0x47, 0xa8, 0x28, 0x2a, 0xe3, 0xdb} } class nsSAXXMLReader final : public nsISAXXMLReader, public nsIExtendedExpatSink, @@ -59,19 +60,18 @@ public: { return NS_OK; } virtual void FlushPendingNotifications(mozilla::FlushType aType) override { } - NS_IMETHOD SetDocumentCharset(nsACString& aCharset) override + virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding) override { - return NS_OK; } virtual nsISupports *GetTarget() override { return nullptr; } private: @@ -82,18 +82,18 @@ private: nsCOMPtr<nsISAXErrorHandler> mErrorHandler; nsCOMPtr<nsISAXLexicalHandler> mLexicalHandler; nsCOMPtr<nsIMozSAXXMLDeclarationHandler> mDeclarationHandler; nsCOMPtr<nsIURI> mBaseURI; nsCOMPtr<nsIStreamListener> mListener; nsCOMPtr<nsIRequestObserver> mParserObserver; bool mIsAsyncParse; static bool TryChannelCharset(nsIChannel *aChannel, - int32_t& aCharsetSource, - nsACString& aCharset); + int32_t& aCharsetSource, + NotNull<const Encoding*>& aEncoding); nsresult EnsureBaseURI(); nsresult InitParser(nsIRequestObserver *aListener, nsIChannel *aChannel); nsresult SplitExpatName(const char16_t *aExpatName, nsString &aURI, nsString &aLocalName, nsString &aQName); nsString mPublicId; nsString mSystemId;
--- a/rdf/base/nsRDFContentSink.cpp +++ b/rdf/base/nsRDFContentSink.cpp @@ -101,17 +101,18 @@ public: // nsIContentSink NS_IMETHOD WillParse(void) override; NS_IMETHOD WillBuildModel(nsDTDMode aDTDMode) override; NS_IMETHOD DidBuildModel(bool aTerminated) override; NS_IMETHOD WillInterrupt(void) override; NS_IMETHOD WillResume(void) override; NS_IMETHOD SetParser(nsParserBase* aParser) override; virtual void FlushPendingNotifications(mozilla::FlushType aType) override { } - NS_IMETHOD SetDocumentCharset(nsACString& aCharset) override { return NS_OK; } + virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding) + override { } virtual nsISupports *GetTarget() override { return nullptr; } // nsIRDFContentSink NS_IMETHOD Init(nsIURI* aURL) override; NS_IMETHOD SetDataSource(nsIRDFDataSource* aDataSource) override; NS_IMETHOD GetDataSource(nsIRDFDataSource*& aDataSource) override; // pseudo constants
--- a/rdf/base/nsRDFXMLParser.cpp +++ b/rdf/base/nsRDFXMLParser.cpp @@ -1,16 +1,17 @@ /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsRDFXMLParser.h" +#include "mozilla/Encoding.h" #include "nsIComponentManager.h" #include "nsIParser.h" #include "nsCharsetSource.h" #include "nsIRDFContentSink.h" #include "nsParserCIID.h" #include "nsStringStream.h" #include "nsNetUtil.h" #include "NullPrincipal.h" @@ -60,17 +61,17 @@ nsRDFXMLParser::ParseAsync(nsIRDFDataSou // We set the content sink's data source directly to our in-memory // store. This allows the initial content to be generated "directly". rv = sink->SetDataSource(aSink); if (NS_FAILED(rv)) return rv; nsCOMPtr<nsIParser> parser = do_CreateInstance(kParserCID, &rv); if (NS_FAILED(rv)) return rv; - parser->SetDocumentCharset(NS_LITERAL_CSTRING("UTF-8"), + parser->SetDocumentCharset(UTF_8_ENCODING, kCharsetFromDocTypeDefault); parser->SetContentSink(sink); rv = parser->Parse(aBaseURI); if (NS_FAILED(rv)) return rv; return CallQueryInterface(parser, aResult); } @@ -91,17 +92,17 @@ nsRDFXMLParser::ParseString(nsIRDFDataSo // We set the content sink's data source directly to our in-memory // store. This allows the initial content to be generated "directly". rv = sink->SetDataSource(aSink); if (NS_FAILED(rv)) return rv; nsCOMPtr<nsIParser> parser = do_CreateInstance(kParserCID, &rv); if (NS_FAILED(rv)) return rv; - parser->SetDocumentCharset(NS_LITERAL_CSTRING("UTF-8"), + parser->SetDocumentCharset(UTF_8_ENCODING, kCharsetFromOtherComponent); parser->SetContentSink(sink); rv = parser->Parse(aBaseURI); if (NS_FAILED(rv)) return rv; nsCOMPtr<nsIStreamListener> listener = do_QueryInterface(parser);