Bug 1334246. r=mrbkap.
authorHenri Sivonen <hsivonen@hsivonen.fi>
Thu, 16 Feb 2017 10:05:27 +0200
changeset 343276 07f8e7798ab58d7068119f372f443fb84649b016
parent 343275 1bcbd44a34a82c9c96b8e651c867fc94f1690f82
child 343277 208a7d1895d7a777aa7aae3bd5097d9e1e4e8261
push id31375
push userkwierso@gmail.com
push dateThu, 16 Feb 2017 17:28:15 +0000
treeherdermozilla-central@bf6b9caab2c7 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmrbkap
bugs1334246
milestone54.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1334246. r=mrbkap. nsScannerString changes by erahm. MozReview-Commit-ID: 6lh8SYjpGlj
parser/htmlparser/nsParser.cpp
parser/htmlparser/nsScanner.cpp
parser/htmlparser/nsScanner.h
parser/htmlparser/nsScannerString.cpp
--- a/parser/htmlparser/nsParser.cpp
+++ b/parser/htmlparser/nsParser.cpp
@@ -330,424 +330,16 @@ nsParser::SetContentSink(nsIContentSink*
  * @return  current sink
  */
 NS_IMETHODIMP_(nsIContentSink*)
 nsParser::GetContentSink()
 {
   return mSink;
 }
 
-/**
- * Determine what DTD mode (and thus what layout nsCompatibility mode)
- * to use for this document based on the first chunk of data received
- * from the network (each parsercontext can have its own mode).  (No,
- * this is not an optimal solution -- we really don't need to know until
- * after we've received the DOCTYPE, and this could easily be part of
- * the regular parsing process if the parser were designed in a way that
- * made such modifications easy.)
- */
-
-// Parse the PS production in the SGML spec (excluding the part dealing
-// with entity references) starting at theIndex into theBuffer, and
-// return the first index after the end of the production.
-static int32_t
-ParsePS(const nsString& aBuffer, int32_t aIndex)
-{
-  for (;;) {
-    char16_t ch = aBuffer.CharAt(aIndex);
-    if ((ch == char16_t(' ')) || (ch == char16_t('\t')) ||
-        (ch == char16_t('\n')) || (ch == char16_t('\r'))) {
-      ++aIndex;
-    } else if (ch == char16_t('-')) {
-      int32_t tmpIndex;
-      if (aBuffer.CharAt(aIndex+1) == char16_t('-') &&
-          kNotFound != (tmpIndex=aBuffer.Find("--",false,aIndex+2,-1))) {
-        aIndex = tmpIndex + 2;
-      } else {
-        return aIndex;
-      }
-    } else {
-      return aIndex;
-    }
-  }
-}
-
-#define PARSE_DTD_HAVE_DOCTYPE          (1<<0)
-#define PARSE_DTD_HAVE_PUBLIC_ID        (1<<1)
-#define PARSE_DTD_HAVE_SYSTEM_ID        (1<<2)
-#define PARSE_DTD_HAVE_INTERNAL_SUBSET  (1<<3)
-
-// return true on success (includes not present), false on failure
-static bool
-ParseDocTypeDecl(const nsString &aBuffer,
-                 int32_t *aResultFlags,
-                 nsString &aPublicID,
-                 nsString &aSystemID)
-{
-  bool haveDoctype = false;
-  *aResultFlags = 0;
-
-  // Skip through any comments and processing instructions
-  // The PI-skipping is a bit of a hack.
-  int32_t theIndex = 0;
-  do {
-    theIndex = aBuffer.FindChar('<', theIndex);
-    if (theIndex == kNotFound) break;
-    char16_t nextChar = aBuffer.CharAt(theIndex+1);
-    if (nextChar == char16_t('!')) {
-      int32_t tmpIndex = theIndex + 2;
-      if (kNotFound !=
-          (theIndex=aBuffer.Find("DOCTYPE", true, tmpIndex, 0))) {
-        haveDoctype = true;
-        theIndex += 7; // skip "DOCTYPE"
-        break;
-      }
-      theIndex = ParsePS(aBuffer, tmpIndex);
-      theIndex = aBuffer.FindChar('>', theIndex);
-    } else if (nextChar == char16_t('?')) {
-      theIndex = aBuffer.FindChar('>', theIndex);
-    } else {
-      break;
-    }
-  } while (theIndex != kNotFound);
-
-  if (!haveDoctype)
-    return true;
-  *aResultFlags |= PARSE_DTD_HAVE_DOCTYPE;
-
-  theIndex = ParsePS(aBuffer, theIndex);
-  theIndex = aBuffer.Find("HTML", true, theIndex, 0);
-  if (kNotFound == theIndex)
-    return false;
-  theIndex = ParsePS(aBuffer, theIndex+4);
-  int32_t tmpIndex = aBuffer.Find("PUBLIC", true, theIndex, 0);
-
-  if (kNotFound != tmpIndex) {
-    theIndex = ParsePS(aBuffer, tmpIndex+6);
-
-    // We get here only if we've read <!DOCTYPE HTML PUBLIC
-    // (not case sensitive) possibly with comments within.
-
-    // Now find the beginning and end of the public identifier
-    // and the system identifier (if present).
-
-    char16_t lit = aBuffer.CharAt(theIndex);
-    if ((lit != char16_t('\"')) && (lit != char16_t('\'')))
-      return false;
-
-    // Start is the first character, excluding the quote, and End is
-    // the final quote, so there are (end-start) characters.
-
-    int32_t PublicIDStart = theIndex + 1;
-    int32_t PublicIDEnd = aBuffer.FindChar(lit, PublicIDStart);
-    if (kNotFound == PublicIDEnd)
-      return false;
-    theIndex = ParsePS(aBuffer, PublicIDEnd + 1);
-    char16_t next = aBuffer.CharAt(theIndex);
-    if (next == char16_t('>')) {
-      // There was a public identifier, but no system
-      // identifier,
-      // so do nothing.
-      // This is needed to avoid the else at the end, and it's
-      // also the most common case.
-    } else if ((next == char16_t('\"')) ||
-               (next == char16_t('\''))) {
-      // We found a system identifier.
-      *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
-      int32_t SystemIDStart = theIndex + 1;
-      int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
-      if (kNotFound == SystemIDEnd)
-        return false;
-      aSystemID =
-        Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
-    } else if (next == char16_t('[')) {
-      // We found an internal subset.
-      *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
-    } else {
-      // Something's wrong.
-      return false;
-    }
-
-    // Since a public ID is a minimum literal, we must trim
-    // and collapse whitespace
-    aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart);
-    aPublicID.CompressWhitespace(true, true);
-    *aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID;
-  } else {
-    tmpIndex=aBuffer.Find("SYSTEM", true, theIndex, 0);
-    if (kNotFound != tmpIndex) {
-      // DOCTYPES with system ID but no Public ID
-      *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
-
-      theIndex = ParsePS(aBuffer, tmpIndex+6);
-      char16_t next = aBuffer.CharAt(theIndex);
-      if (next != char16_t('\"') && next != char16_t('\''))
-        return false;
-
-      int32_t SystemIDStart = theIndex + 1;
-      int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
-
-      if (kNotFound == SystemIDEnd)
-        return false;
-      aSystemID =
-        Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
-      theIndex = ParsePS(aBuffer, SystemIDEnd + 1);
-    }
-
-    char16_t nextChar = aBuffer.CharAt(theIndex);
-    if (nextChar == char16_t('['))
-      *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
-    else if (nextChar != char16_t('>'))
-      return false;
-  }
-  return true;
-}
-
-struct PubIDInfo
-{
-  enum eMode {
-    eQuirks,         /* always quirks mode, unless there's an internal subset */
-    eAlmostStandards,/* eCompatibility_AlmostStandards */
-    eFullStandards   /* eCompatibility_FullStandards */
-      /*
-       * public IDs that should trigger strict mode are not listed
-       * since we want all future public IDs to trigger strict mode as
-       * well
-       */
-  };
-
-  const char* name;
-  eMode mode_if_no_sysid;
-  eMode mode_if_sysid;
-};
-
-#define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0]))
-
-// These must be in nsCRT::strcmp order so binary-search can be used.
-// This is verified, |#ifdef DEBUG|, below.
-
-// Even though public identifiers should be case sensitive, we will do
-// all comparisons after converting to lower case in order to do
-// case-insensitive comparison since there are a number of existing web
-// sites that use the incorrect case.  Therefore all of the public
-// identifiers below are in lower case (with the correct case following,
-// in comments).  The case is verified, |#ifdef DEBUG|, below.
-static const PubIDInfo kPublicIDs[] = {
-  {"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
-  {"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
-  {"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
-  {"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
-  {"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-  {"html" /* "HTML" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
-};
-
-#ifdef DEBUG
-static void
-VerifyPublicIDs()
-{
-  static bool gVerified = false;
-  if (!gVerified) {
-    gVerified = true;
-    uint32_t i;
-    for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) {
-      if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) {
-        NS_NOTREACHED("doctypes out of order");
-        printf("Doctypes %s and %s out of order.\n",
-               kPublicIDs[i].name, kPublicIDs[i+1].name);
-      }
-    }
-    for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) {
-      nsAutoCString lcPubID(kPublicIDs[i].name);
-      ToLowerCase(lcPubID);
-      if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) {
-        NS_NOTREACHED("doctype not lower case");
-        printf("Doctype %s not lower case.\n", kPublicIDs[i].name);
-      }
-    }
-  }
-}
-#endif
-
-namespace {
-
-struct PublicIdComparator
-{
-  const nsAutoCString& mPublicId;
-  explicit PublicIdComparator(const nsAutoCString& aPublicId)
-    : mPublicId(aPublicId) {}
-  int operator()(const PubIDInfo& aInfo) const {
-    return nsCRT::strcmp(mPublicId.get(), aInfo.name);
-  }
-};
-
-} // namespace
-
-static void
-DetermineHTMLParseMode(const nsString& aBuffer,
-                       nsDTDMode& aParseMode,
-                       eParserDocType& aDocType)
-{
-#ifdef DEBUG
-  VerifyPublicIDs();
-#endif
-  int32_t resultFlags;
-  nsAutoString publicIDUCS2, sysIDUCS2;
-  if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) {
-    if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) {
-      // no DOCTYPE
-      aParseMode = eDTDMode_quirks;
-      aDocType = eHTML_Quirks;
-    } else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) ||
-               !(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) {
-      // A doctype with an internal subset is always full_standards.
-      // A doctype without a public ID is always full_standards.
-      aDocType = eHTML_Strict;
-      aParseMode = eDTDMode_full_standards;
-
-      // Special hack for IBM's custom DOCTYPE.
-      if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) &&
-          sysIDUCS2.EqualsLiteral(
-               "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) {
-        aParseMode = eDTDMode_quirks;
-        aDocType = eHTML_Quirks;
-      }
-
-    } else {
-      // We have to check our list of public IDs to see what to do.
-      // Yes, we want UCS2 to ASCII lossy conversion.
-      nsAutoCString publicID;
-      publicID.AssignWithConversion(publicIDUCS2);
-
-      // See comment above definition of kPublicIDs about case
-      // sensitivity.
-      ToLowerCase(publicID);
-
-      // Binary search to see if we can find the correct public ID.
-      size_t index;
-      bool found = BinarySearchIf(kPublicIDs, 0, ArrayLength(kPublicIDs),
-                                  PublicIdComparator(publicID), &index);
-      if (!found) {
-        // The DOCTYPE is not in our list, so it must be full_standards.
-        aParseMode = eDTDMode_full_standards;
-        aDocType = eHTML_Strict;
-        return;
-      }
-
-      switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID)
-                ? kPublicIDs[index].mode_if_sysid
-                : kPublicIDs[index].mode_if_no_sysid)
-      {
-        case PubIDInfo::eQuirks:
-          aParseMode = eDTDMode_quirks;
-          aDocType = eHTML_Quirks;
-          break;
-        case PubIDInfo::eAlmostStandards:
-          aParseMode = eDTDMode_almost_standards;
-          aDocType = eHTML_Strict;
-          break;
-        case PubIDInfo::eFullStandards:
-          aParseMode = eDTDMode_full_standards;
-          aDocType = eHTML_Strict;
-          break;
-        default:
-          NS_NOTREACHED("no other cases!");
-      }
-    }
-  } else {
-    // badly formed DOCTYPE -> quirks
-    aParseMode = eDTDMode_quirks;
-    aDocType = eHTML_Quirks;
-  }
-}
-
-static void
-DetermineParseMode(const nsString& aBuffer, nsDTDMode& aParseMode,
-                   eParserDocType& aDocType, const nsACString& aMimeType)
-{
-  if (aMimeType.EqualsLiteral(TEXT_HTML)) {
-    DetermineHTMLParseMode(aBuffer, aParseMode, aDocType);
-  } else if (nsContentUtils::IsPlainTextType(aMimeType)) {
-    aDocType = ePlainText;
-    aParseMode = eDTDMode_quirks;
-  } else { // Some form of XML
-    aDocType = eXML;
-    aParseMode = eDTDMode_full_standards;
-  }
-}
-
 static nsIDTD*
 FindSuitableDTD(CParserContext& aParserContext)
 {
   // We always find a DTD.
   aParserContext.mAutoDetectStatus = ePrimaryDetect;
 
   // Quick check for view source.
   MOZ_ASSERT(aParserContext.mParserCommand != eViewSource,
@@ -844,25 +436,24 @@ nsParser::WillBuildModel(nsString& aFile
   if (!mParserContext)
     return NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT;
 
   if (eUnknownDetect != mParserContext->mAutoDetectStatus)
     return NS_OK;
 
   if (eDTDMode_unknown == mParserContext->mDTDMode ||
       eDTDMode_autodetect == mParserContext->mDTDMode) {
-    char16_t buf[1025];
-    nsFixedString theBuffer(buf, 1024, 0);
-
-    // Grab 1024 characters, starting at the first non-whitespace
-    // character, to look for the doctype in.
-    mParserContext->mScanner->Peek(theBuffer, 1024, mParserContext->mScanner->FirstNonWhitespacePosition());
-    DetermineParseMode(theBuffer, mParserContext->mDTDMode,
-                       mParserContext->mDocType, mParserContext->mMimeType);
-  }
+    if (mIsAboutBlank) {
+      mParserContext->mDTDMode = eDTDMode_quirks;
+      mParserContext->mDocType = eHTML_Quirks;
+    } else {
+      mParserContext->mDTDMode = eDTDMode_full_standards;
+      mParserContext->mDocType = eXML;
+    }
+  } // else XML fragment with nested parser context
 
   NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
                "Clobbering DTD for non-root parser context!");
   mDTD = FindSuitableDTD(*mParserContext);
   NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY);
 
   nsITokenizer* tokenizer;
   nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer);
--- a/parser/htmlparser/nsScanner.cpp
+++ b/parser/htmlparser/nsScanner.cpp
@@ -324,80 +324,16 @@ nsresult nsScanner::GetChar(char16_t& aC
   }
 
   aChar = *mCurrentPosition++;
   --mCountRemaining;
 
   return NS_OK;
 }
 
-
-/**
- *  peek ahead to consume next char from scanner's internal
- *  input buffer
- *  
- *  @update  gess 3/25/98
- *  @param   
- *  @return  
- */
-nsresult nsScanner::Peek(char16_t& aChar, uint32_t aOffset) {
-  aChar = 0;
-
-  if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
-    return NS_ERROR_HTMLPARSER_EOF;
-  }
-
-  if (aOffset > 0) {
-    if (mCountRemaining <= aOffset)
-      return NS_ERROR_HTMLPARSER_EOF;
-
-    nsScannerIterator pos = mCurrentPosition;
-    pos.advance(aOffset);
-    aChar=*pos;
-  }
-  else {
-    aChar=*mCurrentPosition;
-  }
-
-  return NS_OK;
-}
-
-nsresult nsScanner::Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset)
-{
-  if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
-    return NS_ERROR_HTMLPARSER_EOF;
-  }
-
-  nsScannerIterator start, end;
-
-  start = mCurrentPosition;
-
-  if ((int32_t)mCountRemaining <= aOffset) {
-    return NS_ERROR_HTMLPARSER_EOF;
-  }
-
-  if (aOffset > 0) {
-    start.advance(aOffset);
-  }
-
-  if (mCountRemaining < uint32_t(aNumChars + aOffset)) {
-    end = mEndPosition;
-  }
-  else {
-    end = start;
-    end.advance(aNumChars);
-  }
-
-  if (!CopyUnicodeTo(start, end, aStr)) {
-    return NS_ERROR_OUT_OF_MEMORY;
-  }
-
-  return NS_OK;
-}
-
 void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
 {
   aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
 }
 
 void nsScanner::CurrentPosition(nsScannerIterator& aPosition)
 {
   aPosition = mCurrentPosition;
--- a/parser/htmlparser/nsScanner.h
+++ b/parser/htmlparser/nsScanner.h
@@ -56,28 +56,16 @@ class nsScanner {
        *  
        *  @update  gess 3/25/98
        *  @param   ch is the char to accept new value
        *  @return  error code reflecting read status
        */
       nsresult GetChar(char16_t& ch);
 
       /**
-       *  peek ahead to consume next char from scanner's internal
-       *  input buffer
-       *  
-       *  @update  gess 3/25/98
-       *  @param   ch is the char to accept new value
-       *  @return  error code reflecting read status
-       */
-      nsresult Peek(char16_t& ch, uint32_t aOffset=0);
-
-      nsresult Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset = 0);
-
-      /**
        *  Records current offset position in input stream. This allows us
        *  to back up to this point if the need should arise, such as when
        *  tokenization gets interrupted.
        *  
        *  @update  gess 5/12/98
        *  @param   
        *  @return  
        */
--- a/parser/htmlparser/nsScannerString.cpp
+++ b/parser/htmlparser/nsScannerString.cpp
@@ -1,16 +1,17 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include <stdlib.h>
 #include "nsScannerString.h"
+#include "mozilla/CheckedInt.h"
 
 
   /**
    * nsScannerBufferList
    */
 
 #define MAX_CAPACITY ((UINT32_MAX / sizeof(char16_t)) - \
                       (sizeof(Buffer) + sizeof(char16_t)))
@@ -467,17 +468,23 @@ copy_multifragment_string( nsScannerIter
   }
 
 bool
 CopyUnicodeTo( const nsScannerIterator& aSrcStart,
                const nsScannerIterator& aSrcEnd,
                nsAString& aDest )
   {
     nsAString::iterator writer;
-    if (!aDest.SetLength(Distance(aSrcStart, aSrcEnd), mozilla::fallible)) {
+
+    mozilla::CheckedInt<nsAString::size_type> distance(Distance(aSrcStart, aSrcEnd));
+    if (!distance.isValid()) {
+      return false; // overflow detected
+    }
+
+    if (!aDest.SetLength(distance.value(), mozilla::fallible)) {
       aDest.Truncate();
       return false; // out of memory
     }
     aDest.BeginWriting(writer);
     nsScannerIterator fromBegin(aSrcStart);
     
     copy_multifragment_string(fromBegin, aSrcEnd, writer);
     return true;
@@ -500,18 +507,24 @@ AppendUnicodeTo( const nsScannerIterator
   }
 
 bool
 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
                  const nsScannerIterator& aSrcEnd,
                  nsAString& aDest )
   {
     nsAString::iterator writer;
-    uint32_t oldLength = aDest.Length();
-    if (!aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd), mozilla::fallible))
+    const nsAString::size_type oldLength = aDest.Length();
+    CheckedInt<nsAString::size_type> newLen(Distance(aSrcStart, aSrcEnd));
+    newLen += oldLength;
+    if (!newLen.isValid()) {
+      return false; // overflow detected
+    }
+
+    if (!aDest.SetLength(newLen.value(), mozilla::fallible))
       return false; // out of memory
     aDest.BeginWriting(writer).advance(oldLength);
     nsScannerIterator fromBegin(aSrcStart);
     
     copy_multifragment_string(fromBegin, aSrcEnd, writer);
     return true;
   }