Bug 910211 - Guess the fallback encoding from the top-level domain when feasible. r=emk.
authorHenri Sivonen <hsivonen@hsivonen.fi>
Thu, 06 Feb 2014 11:08:01 +0200
changeset 178225 a4e9e8bead92c9d51d4e478a73e8e589263e92ae
parent 178224 84b6f0aba30d13304476993becd8cb89a65526fd
child 178226 246619a2799221016d400aec982cbf02d378c28e
push id5439
push userffxbld
push dateMon, 17 Mar 2014 23:08:15 +0000
treeherdermozilla-aurora@c0befb3c8038 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersemk
bugs910211
milestone30.0a1
Bug 910211 - Guess the fallback encoding from the top-level domain when feasible. r=emk.
build/pgo/server-locations.txt
content/html/document/src/nsHTMLDocument.cpp
content/html/document/src/nsHTMLDocument.h
docshell/base/nsDocShell.cpp
dom/encoding/FallbackEncoding.cpp
dom/encoding/FallbackEncoding.h
dom/encoding/Makefile.in
dom/encoding/domainsfallbacks.properties
dom/encoding/moz.build
dom/encoding/nonparticipatingdomains.properties
dom/encoding/test/file_TLD.html
dom/encoding/test/mochitest.ini
dom/encoding/test/test_TLD.html
modules/libpref/src/init/all.js
parser/nsCharsetSource.h
--- a/build/pgo/server-locations.txt
+++ b/build/pgo/server-locations.txt
@@ -200,8 +200,15 @@ http://xn--lve-6lad.w3c-test.org:83
 # HTTPS versions of the above
 https://w3c-test.org:443
 https://www.w3c-test.org:443
 https://www1.w3c-test.org:443
 https://www2.w3c-test.org:443
 https://xn--n8j6ds53lwwkrqhv28a.w3c-test.org:443
 https://xn--lve-6lad.w3c-test.org:443
 http://test.w3.org:80
+
+# Hosts for testing TLD-based fallback encoding
+http://example.tw:80                privileged
+http://example.cn:80                privileged
+http://example.co.jp:80             privileged
+http://example.fi:80                privileged
+
--- a/content/html/document/src/nsHTMLDocument.cpp
+++ b/content/html/document/src/nsHTMLDocument.cpp
@@ -431,16 +431,76 @@ nsHTMLDocument::TryParentCharset(nsIDocS
     }
 
     aCharset.Assign(parentCharset);
     aCharsetSource = kCharsetFromParentFrame;
   }
 }
 
 void
+nsHTMLDocument::TryTLD(int32_t& aCharsetSource, nsACString& aCharset)
+{
+  if (aCharsetSource >= kCharsetFromTopLevelDomain) {
+    return;
+  }
+  if (!FallbackEncoding::sGuessFallbackFromTopLevelDomain) {
+    return;
+  }
+  if (!mDocumentURI) {
+    return;
+  }
+  nsAutoCString host;
+  mDocumentURI->GetAsciiHost(host);
+  if (host.IsEmpty()) {
+    return;
+  }
+  // First let's see if the host is DNS-absolute and ends with a dot and
+  // get rid of that one.
+  if (host.Last() == '.') {
+    host.SetLength(host.Length() - 1);
+    if (host.IsEmpty()) {
+      return;
+    }
+  }
+  // If we still have a dot, the host is weird, so let's continue only
+  // if we have something other than a dot now.
+  if (host.Last() == '.') {
+    return;
+  }
+  int32_t index = host.RFindChar('.');
+  if (index == kNotFound) {
+    // We have an intranet host, Gecko-internal URL or an IPv6 address.
+    return;
+  }
+  // Since the string didn't end with a dot and we found a dot,
+  // there is at least one character between the dot and the end of
+  // the string, so taking the substring below is safe.
+  nsAutoCString tld;
+  ToLowerCase(Substring(host, index + 1, host.Length() - (index + 1)), tld);
+  // Reject generic TLDs and country TLDs that need more research
+  if (!FallbackEncoding::IsParticipatingTopLevelDomain(tld)) {
+    return;
+  }
+  // Check if we have an IPv4 address
+  bool seenNonDigit = false;
+  for (size_t i = 0; i < tld.Length(); ++i) {
+    char c = tld.CharAt(i);
+    if (c < '0' || c > '9') {
+      seenNonDigit = true;
+      break;
+    }
+  }
+  if (!seenNonDigit) {
+    return;
+  }
+  aCharsetSource = kCharsetFromTopLevelDomain;
+  FallbackEncoding::FromTopLevelDomain(tld, aCharset);
+}
+
+void
 nsHTMLDocument::TryFallback(int32_t& aCharsetSource, nsACString& aCharset)
 {
   if (kCharsetFromFallback <= aCharsetSource)
     return;
 
   aCharsetSource = kCharsetFromFallback;
   FallbackEncoding::FromLocale(aCharset);
 }
@@ -656,16 +716,17 @@ nsHTMLDocument::StartDocumentLoad(const 
 
     TryHintCharset(muCV, charsetSource, charset); // XXX mailnews-only
     TryParentCharset(docShell, charsetSource, charset);
 
     if (cachingChan && !urlSpec.IsEmpty()) {
       TryCacheCharset(cachingChan, charsetSource, charset);
     }
 
+    TryTLD(charsetSource, charset);
     TryFallback(charsetSource, charset);
 
     if (wyciwygChannel) {
       // We know for sure that the parser needs to be using UTF16.
       parserCharset = "UTF-16";
       parserCharsetSource = charsetSource < kCharsetFromChannel ?
         kCharsetFromChannel : charsetSource;
         
--- a/content/html/document/src/nsHTMLDocument.h
+++ b/content/html/document/src/nsHTMLDocument.h
@@ -308,16 +308,17 @@ protected:
                             nsIDocShell*  aDocShell,
                             int32_t& aCharsetSource,
                             nsACString& aCharset);
   static void TryCacheCharset(nsICachingChannel* aCachingChannel,
                                 int32_t& aCharsetSource,
                                 nsACString& aCharset);
   void TryParentCharset(nsIDocShell*  aDocShell,
                         int32_t& charsetSource, nsACString& aCharset);
+  void TryTLD(int32_t& aCharsetSource, nsACString& aCharset);
   static void TryFallback(int32_t& aCharsetSource, nsACString& aCharset);
 
   // Override so we can munge the charset on our wyciwyg channel as needed.
   virtual void SetDocumentCharacterSet(const nsACString& aCharSetID) MOZ_OVERRIDE;
 
   // Tracks if we are currently processing any document.write calls (either
   // implicit or explicit). Note that if a write call writes out something which
   // would block the parser, then mWriteLevel will be incorrect until the parser
--- a/docshell/base/nsDocShell.cpp
+++ b/docshell/base/nsDocShell.cpp
@@ -1989,16 +1989,20 @@ nsDocShell::GatherCharsetMenuTelemetry()
   bool isFileURL = false;
   nsIURI* url = doc->GetOriginalURI();
   if (url) {
     url->SchemeIs("file", &isFileURL);
   }
 
   int32_t charsetSource = doc->GetDocumentCharacterSetSource();
   switch (charsetSource) {
+    case kCharsetFromTopLevelDomain:
+      // Unlabeled doc on a domain that we map to a fallback encoding
+      Telemetry::Accumulate(Telemetry::CHARSET_OVERRIDE_SITUATION, 7);
+      break;
     case kCharsetFromFallback:
     case kCharsetFromDocTypeDefault:
     case kCharsetFromCache:
     case kCharsetFromParentFrame:
     case kCharsetFromHintPrevDoc:
       // Changing charset on an unlabeled doc.
       if (isFileURL) {
         Telemetry::Accumulate(Telemetry::CHARSET_OVERRIDE_SITUATION, 0);
--- a/dom/encoding/FallbackEncoding.cpp
+++ b/dom/encoding/FallbackEncoding.cpp
@@ -12,17 +12,26 @@
 
 namespace mozilla {
 namespace dom {
 
 static const char* localesFallbacks[][3] = {
 #include "localesfallbacks.properties.h"
 };
 
+static const char* domainsFallbacks[][3] = {
+#include "domainsfallbacks.properties.h"
+};
+
+static const char* nonParticipatingDomains[][3] = {
+#include "nonparticipatingdomains.properties.h"
+};
+
 FallbackEncoding* FallbackEncoding::sInstance = nullptr;
+bool FallbackEncoding::sGuessFallbackFromTopLevelDomain = true;
 
 FallbackEncoding::FallbackEncoding()
 {
   MOZ_COUNT_CTOR(FallbackEncoding);
   MOZ_ASSERT(!FallbackEncoding::sInstance,
              "Singleton already exists.");
 }
 
@@ -116,21 +125,44 @@ FallbackEncoding::Initialize()
              "Initializing pre-existing fallback cache.");
   FallbackEncoding::sInstance = new FallbackEncoding;
   Preferences::RegisterCallback(FallbackEncoding::PrefChanged,
                                 "intl.charset.fallback.override",
                                 nullptr);
   Preferences::RegisterCallback(FallbackEncoding::PrefChanged,
                                 "general.useragent.locale",
                                 nullptr);
+  Preferences::AddBoolVarCache(&sGuessFallbackFromTopLevelDomain,
+                               "intl.charset.fallback.tld");
 }
 
 void
 FallbackEncoding::Shutdown()
 {
   MOZ_ASSERT(FallbackEncoding::sInstance,
              "Releasing non-existent fallback cache.");
   delete FallbackEncoding::sInstance;
   FallbackEncoding::sInstance = nullptr;
 }
 
+bool
+FallbackEncoding::IsParticipatingTopLevelDomain(const nsACString& aTLD)
+{
+  nsAutoCString dummy;
+  return NS_FAILED(nsUConvPropertySearch::SearchPropertyValue(
+      nonParticipatingDomains,
+      ArrayLength(nonParticipatingDomains),
+      aTLD,
+      dummy));
+}
+
+void
+FallbackEncoding::FromTopLevelDomain(const nsACString& aTLD,
+                                     nsACString& aFallback)
+{
+  if (NS_FAILED(nsUConvPropertySearch::SearchPropertyValue(
+      domainsFallbacks, ArrayLength(domainsFallbacks), aTLD, aFallback))) {
+    aFallback.AssignLiteral("windows-1252");
+  }
+}
+
 } // namespace dom
 } // namespace mozilla
--- a/dom/encoding/FallbackEncoding.h
+++ b/dom/encoding/FallbackEncoding.h
@@ -10,23 +10,45 @@
 namespace mozilla {
 namespace dom {
 
 class FallbackEncoding
 {
 public:
 
   /**
+   * Whether FromTopLevelDomain() should be used.
+   */
+  static bool sGuessFallbackFromTopLevelDomain;
+
+  /**
    * Gets the locale-dependent fallback encoding for legacy HTML and plain
    * text content.
    *
    * @param aFallback the outparam for the fallback encoding
    */
   static void FromLocale(nsACString& aFallback);
 
+  /**
+   * Checks if it is appropriate to call FromTopLevelDomain() for a given TLD.
+   *
+   * @param aTLD the top-level domain (in Punycode)
+   * @return true if OK to call FromTopLevelDomain()
+   */
+  static bool IsParticipatingTopLevelDomain(const nsACString& aTLD);
+
+  /**
+   * Gets a top-level domain-depedendent fallback encoding for legacy HTML
+   * and plain text content
+   *
+   * @param aTLD the top-level domain (in Punycode)
+   * @param aFallback the outparam for the fallback encoding
+   */
+  static void FromTopLevelDomain(const nsACString& aTLD, nsACString& aFallback);
+
   // public API ends here!
 
   /**
    * Allocate sInstance used by FromLocale().
    * To be called from nsLayoutStatics only.
    */
   static void Initialize();
 
--- a/dom/encoding/Makefile.in
+++ b/dom/encoding/Makefile.in
@@ -4,8 +4,12 @@
 
 include $(topsrcdir)/config/rules.mk
 
 PROPS2ARRAYS = $(topsrcdir)/intl/locale/src/props2arrays.py
 labelsencodings.properties.h: $(PROPS2ARRAYS) labelsencodings.properties
 	$(PYTHON) $^ $@
 localesfallbacks.properties.h: $(PROPS2ARRAYS) localesfallbacks.properties
 	$(PYTHON) $^ $@
+domainsfallbacks.properties.h: $(PROPS2ARRAYS) domainsfallbacks.properties
+	$(PYTHON) $^ $@
+nonparticipatingdomains.properties.h: $(PROPS2ARRAYS) nonparticipatingdomains.properties
+	$(PYTHON) $^ $@
new file mode 100644
--- /dev/null
+++ b/dom/encoding/domainsfallbacks.properties
@@ -0,0 +1,167 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This file contains educated guesses about which top-level domains are
+# likely to host legacy content that assumes a non-windows-1252 encoding.
+# Punycode TLDs are included on the theory that legacy content might appear
+# behind those relatively new TLDs if DNS just points to a legacy server.
+#
+# Encodings for which a confident-enough educated guess is missing are
+# listed in nonparticipatingdomains.properties. Domains that are listed 
+# neither there nor here get windows-1252 as the associated fallback.
+#
+# The list below includes Arabic-script TLDs not on IANA list but on the 
+# ICANN list:
+# http://www.icann.org/en/resources/idn/fast-track/string-evaluation-completion
+# Otherwise, the list includes non-windows-1252-affilited country TLDs from
+# https://data.iana.org/TLD/tlds-alpha-by-domain.txt
+#
+# The guesses are assigned as follows:
+# * If the country has a dominant country-affiliated language and that language
+#   is part of the languages to fallbacks mapping, use the encoding for that
+#   language from that mapping.
+# * Use windows-1256 for countries that have a dominant Arabic-script
+#   language or whose all languages are Arabic-script languages.
+# * Use windows-1251 likewise but for Cyrillic script.
+
+ae=windows-1256
+xn--mgbaam7a8h=windows-1256
+
+af=windows-1256
+
+bg=windows-1251
+
+bh=windows-1256
+
+by=windows-1251
+
+cn=gbk
+xn--fiqs8s=gbk
+# Assume that Traditional Chinese TLD is meant to work if URL input happens to 
+# be in the traditional mode. Expect content to be simplified anyway.
+xn--fiqz9s=gbk
+
+cz=windows-1250
+
+dz=windows-1256
+xn--lgbbat1ad8j=windows-1256
+
+ee=windows-1257
+
+eg=windows-1256
+xn--wgbh1c=windows-1256
+
+gr=ISO-8859-7
+
+hk=Big5-HKSCS
+xn--j6w193g=Big5-HKSCS
+
+hr=windows-1250
+
+hu=ISO-8859-2
+
+iq=windows-1256
+
+ir=windows-1256
+xn--mgba3a4f16a=windows-1256
+
+jo=windows-1256
+xn--mgbayh7gpa=windows-1256
+
+jp=Shift_JIS
+
+kg=windows-1251
+
+kp=EUC-KR
+
+kr=EUC-KR
+xn--3e0b707e=EUC-KR
+
+kw=windows-1256
+
+kz=windows-1251
+xn--80ao21a=windows-1251
+
+lb=windows-1256
+
+lt=windows-1257
+
+lv=windows-1257
+
+ma=windows-1256
+xn--mgbc0a9azcg=windows-1256
+
+mk=windows-1251
+
+mn=windows-1251
+xn--l1acc=windows-1251
+
+mo=Big5
+
+# my
+xn--mgbx4cd0ab=windows-1256
+
+om=windows-1256
+xn--mgb9awbf=windows-1256
+
+#pk
+xn--mgbai9azgqp6j=windows-1256
+
+pl=ISO-8859-2
+
+ps=windows-1256
+xn--ygbi2ammx=windows-1256
+
+qa=windows-1256
+xn--wgbl6a=windows-1256
+
+rs=windows-1251
+xn--90a3ac=windows-1251
+
+ru=windows-1251
+xn--p1ai=windows-1251
+
+sa=windows-1256
+xn--mgberp4a5d4ar=windows-1256
+
+sd=windows-1256
+xn--mgbpl2fh=windows-1256
+
+sg=gbk
+xn--yfro4i67o=gbk
+
+si=ISO-8859-2
+
+sk=windows-1250
+
+su=windows-1251
+
+sy=windows-1256
+xn--mgbtf8fl=windows-1256
+
+th=windows-874
+xn--o3cw4h=windows-874
+
+tj=windows-1251
+
+tn=windows-1256
+xn--pgbs0dh=windows-1256
+
+tr=windows-1254
+
+tw=Big5
+# Assume that the Simplified Chinese TLD is meant to work when URL input
+# happens in the simplified mode. Assume content is tradition anyway.
+xn--kprw13d=Big5
+xn--kpry57d=Big5
+
+ua=windows-1251
+xn--j1amh=windows-1251
+
+uz=windows-1251
+
+vn=windows-1258
+
+ye=windows-1256
+xn--mgb2ddes=windows-1256
--- a/dom/encoding/moz.build
+++ b/dom/encoding/moz.build
@@ -23,11 +23,13 @@ UNIFIED_SOURCES += [
 FAIL_ON_WARNINGS = True
 
 FINAL_LIBRARY = 'gklayout'
 LOCAL_INCLUDES += [
     '/intl/locale/src',
 ]
 
 GENERATED_FILES += [
+    'domainsfallbacks.properties.h',
     'labelsencodings.properties.h',
     'localesfallbacks.properties.h',
+    'nonparticipatingdomains.properties.h',
 ]
new file mode 100644
--- /dev/null
+++ b/dom/encoding/nonparticipatingdomains.properties
@@ -0,0 +1,51 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Top-level domains listed here do not participate in TLD-based guessing.
+#
+# We should do Web crawls to see if domains listed here can migrate to
+# domainsfallbacks.properties.
+#
+# The value to the right of the = sign is ignored and serves as a placeholder.
+
+# Generic
+com=windows-1252
+net=windows-1252
+org=windows-1252
+
+# No Firefox localization for Azeri
+az=windows-1254
+
+# windows-1251 or windows-1250?
+ba=???
+
+# ISO-8859-7 or windows-1254?
+cy=???
+
+# Is there enough unlabeled windows-1256 content for a windows-1255 to break
+# too much?
+il=windows-1255
+
+# Out-of-country English use
+ly=windows-1256
+
+# Out-of-country English use
+# md=windows-1250
+
+# Out-of-country English use
+# me=windows-1251
+
+# Malaysia has an Arabic-script TLD, official script is latin, possibly Chinese-script publications
+my=???
+
+# No Firefox localization for Urdu; potential for minority-language sites
+# relying on windows-1252 hacks.
+pk=windows-1256
+
+# The Romanian localization says windows-1252, even though the Windows legacy
+# differs.
+ro=windows-1250
+
+tm=windows-1250
+
new file mode 100644
--- /dev/null
+++ b/dom/encoding/test/file_TLD.html
@@ -0,0 +1,7 @@
+<!DOCTYPE html>
+<script>
+function report() {
+  window.parent.postMessage(document.characterSet, "*");
+}
+</script>
+<body onload="report();">
--- a/dom/encoding/test/mochitest.ini
+++ b/dom/encoding/test/mochitest.ini
@@ -2,18 +2,20 @@
 support-files =
   file_utf16_be_bom.css
   file_utf16_be_bom.js
   file_utf16_be_bom.xhtml
   file_utf16_le_bom.css
   file_utf16_le_bom.js
   file_utf16_le_bom.xhtml
   file_utf16_le_nobom.xhtml
+  file_TLD.html
   worker_helper.js
 
 [test_BOMEncoding.js]
 [test_TextDecoder.html]
 [test_TextDecoder.js]
 [test_TextEncoder.html]
 [test_TextEncoder.js]
 [test_stringencoding.html]
 [test_submit_euckr.html]
+[test_TLD.html]
 [test_utf16_files.html]
new file mode 100644
--- /dev/null
+++ b/dom/encoding/test/test_TLD.html
@@ -0,0 +1,57 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=910211
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 910211</title>
+  <script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+  <script type="application/javascript">
+
+  /** Test for Bug 910211 **/
+
+  SimpleTest.waitForExplicitFinish();
+
+  var tlds = [
+    {'tld': 'tw', 'encoding': 'Big5'},
+    {'tld': 'cn', 'encoding': 'gbk'},
+    {'tld': 'co.jp', 'encoding': 'Shift_JIS'},
+    {'tld': 'fi', 'encoding': 'windows-1252'},
+  ];
+
+  var iframe = null;
+
+  var current = null;
+
+  function runTest() {
+    iframe = document.getElementsByTagName("iframe")[0];
+    window.addEventListener("message", next);
+    next(null);
+  }
+
+  function next(event) {
+    if (event) {
+      is(event.data, current['encoding'], "Got bad encoding for " + current["tld"]);
+    }
+    current = tlds.shift();
+    if (!current) {
+      SimpleTest.finish();
+      return;
+    }
+    iframe.src = "http://example." + current["tld"] + "/tests/dom/encoding/test/file_TLD.html";
+  }
+
+  </script>
+</head>
+<body onload="runTest();">
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=910211">Mozilla Bug 910211</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+<iframe></iframe>
+</div>
+<pre id="test">
+</pre>
+</body>
+</html>
--- a/modules/libpref/src/init/all.js
+++ b/modules/libpref/src/init/all.js
@@ -1387,16 +1387,17 @@ pref("intl.charsetmenu.browser.more4",  
 pref("intl.charsetmenu.browser.more5",      "ISO-8859-6, windows-1256, ISO-8859-8-I, windows-1255, ISO-8859-8, IBM862");
 pref("intl.charsetmenu.mailedit",           "chrome://global/locale/intl.properties");
 pref("intl.charsetmenu.browser.cache",      "");
 pref("intl.charsetmenu.mailview.cache",     "");
 pref("intl.charsetmenu.composer.cache",     "");
 pref("intl.charsetmenu.browser.cache.size", 5);
 pref("intl.charset.detector",               "chrome://global/locale/intl.properties");
 pref("intl.charset.fallback.override",      "");
+pref("intl.charset.fallback.tld",           true);
 pref("intl.ellipsis",                       "chrome://global-platform/locale/intl.properties");
 pref("intl.locale.matchOS",                 false);
 // fallback charset list for Unicode conversion (converting from Unicode)
 // currently used for mail send only to handle symbol characters (e.g Euro, trademark, smartquotes)
 // for ISO-8859-1
 pref("intl.fallbackCharsetList.ISO-8859-1", "windows-1252");
 pref("font.language.group",                 "chrome://global/locale/intl.properties");
 
--- a/parser/nsCharsetSource.h
+++ b/parser/nsCharsetSource.h
@@ -3,23 +3,24 @@
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef nsCharsetSource_h_
 #define nsCharsetSource_h_
 
 // note: the value order defines the priority; higher numbers take priority
 #define kCharsetUninitialized           0
 #define kCharsetFromFallback            1
-#define kCharsetFromDocTypeDefault      2 // This and up confident for XHR
-#define kCharsetFromCache               3
-#define kCharsetFromParentFrame         4
-#define kCharsetFromAutoDetection       5
-#define kCharsetFromHintPrevDoc         6
-#define kCharsetFromMetaPrescan         7 // this one and smaller: HTML5 Tentative
-#define kCharsetFromMetaTag             8 // this one and greater: HTML5 Confident
-#define kCharsetFromIrreversibleAutoDetection 9
-#define kCharsetFromChannel            10
-#define kCharsetFromOtherComponent     11
-#define kCharsetFromParentForced       12 // propagates to child frames
-#define kCharsetFromUserForced         13 // propagates to child frames
-#define kCharsetFromByteOrderMark      14
+#define kCharsetFromTopLevelDomain      2
+#define kCharsetFromDocTypeDefault      3 // This and up confident for XHR
+#define kCharsetFromCache               4
+#define kCharsetFromParentFrame         5
+#define kCharsetFromAutoDetection       6
+#define kCharsetFromHintPrevDoc         7
+#define kCharsetFromMetaPrescan         8 // this one and smaller: HTML5 Tentative
+#define kCharsetFromMetaTag             9 // this one and greater: HTML5 Confident
+#define kCharsetFromIrreversibleAutoDetection 10
+#define kCharsetFromChannel            11
+#define kCharsetFromOtherComponent     12
+#define kCharsetFromParentForced       13 // propagates to child frames
+#define kCharsetFromUserForced         14 // propagates to child frames
+#define kCharsetFromByteOrderMark      15
 
 #endif /* nsCharsetSource_h_ */