Bug 650776 part 1 - Introduce a new HTML source to HTML source sanitizer XPCOM API. r=smaug.
authorHenri Sivonen <hsivonen@iki.fi>
Mon, 19 Mar 2012 10:16:20 +0200
changeset 89702 91750caaa3fed887af2824ce272b168f1a302d67
parent 89701 4a8e46ce88dd2226e0825d5552d9def4ee51ecc5
child 89703 b81ec953e4d68ac810d9f2533e30d1c9cfdf6d54
push id7275
push userhsivonen@iki.fi
push dateMon, 19 Mar 2012 08:17:10 +0000
treeherdermozilla-inbound@91750caaa3fe [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmaug
bugs650776
milestone14.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 650776 part 1 - Introduce a new HTML source to HTML source sanitizer XPCOM API. r=smaug.
content/base/public/nsTreeSanitizer.h
content/base/src/nsTreeSanitizer.cpp
content/base/test/chrome/Makefile.in
content/base/test/chrome/test_bug650776.html
editor/libeditor/html/nsHTMLDataTransfer.cpp
parser/html/nsIParserUtils.idl
parser/html/nsParserUtils.cpp
--- a/content/base/public/nsTreeSanitizer.h
+++ b/content/base/public/nsTreeSanitizer.h
@@ -38,53 +38,91 @@
 #ifndef nsTreeSanitizer_h_
 #define nsTreeSanitizer_h_
 
 #include "nsIContent.h"
 #include "mozilla/css/StyleRule.h"
 #include "nsIPrincipal.h"
 #include "mozilla/dom/Element.h"
 
+/**
+ * See the documentation of nsIParserUtils::sanitize for documentation
+ * about the default behavior and the configuration options of this sanitizer.
+ */
 class NS_STACK_CLASS nsTreeSanitizer {
 
   public:
 
     /**
      * The constructor.
      *
-     * @param aAllowStyles Whether to allow <style> and style=""
-     * @param aAllowComments Whether to allow comment nodes
+     * @param aFlags Flags from nsIParserUtils
      */
-    nsTreeSanitizer(bool aAllowStyles, bool aAllowComments);
+    nsTreeSanitizer(PRUint32 aFlags = 0);
 
     static void InitializeStatics();
     static void ReleaseStatics();
 
     /**
      * Sanitizes a disconnected DOM fragment freshly obtained from a parser.
      * The argument must be of type nsINode::eDOCUMENT_FRAGMENT and,
      * consequently, must not be in the document. Furthermore, the fragment
      * must have just come from a parser so that it can't have mutation
      * event listeners set on it.
      */
     void Sanitize(nsIContent* aFragment);
 
+    /**
+     * Sanitizes a disconnected (not in a docshell) document freshly obtained
+     * from a parser. The document must not be embedded in a docshell and must
+     * not have had a chance to get mutation event listeners attached to it.
+     * The root element must be <html>.
+     */
+    void Sanitize(nsIDocument* aDocument);
+
   private:
 
     /**
      * Whether <style> and style="" are allowed.
      */
     bool mAllowStyles;
 
     /**
      * Whether comment nodes are allowed.
      */
     bool mAllowComments;
 
     /**
+     * Whether HTML <font>, <center>, bgcolor="", etc., are dropped.
+     */
+    bool mDropNonCSSPresentation;
+
+    /**
+     * Whether to remove forms and form controls (excluding fieldset/legend).
+     */
+    bool mDropForms;
+
+    /**
+     * Whether only cid: embeds are allowed.
+     */
+    bool mCidEmbedsOnly;
+
+    /**
+     * Whether to drop <img>, <video>, <audio> and <svg>.
+     */
+    bool mDropMedia;
+
+    /**
+     * Whether we are sanitizing a full document (as opposed to a fragment).
+     */
+    bool mFullDocument;
+
+    void SanitizeChildren(nsINode* aRoot);
+
+    /**
      * Queries if an element must be replaced with its children.
      * @param aNamespace the namespace of the element the question is about
      * @param aLocal the local name of the element the question is about
      * @return true if the element must be replaced with its children and
      *         false if the element is to be kept
      */
     bool MustFlatten(PRInt32 aNamespace, nsIAtom* aLocal);
 
@@ -173,21 +211,26 @@ class NS_STACK_CLASS nsTreeSanitizer {
                               nsIURI* aBaseURI);
 
     /**
      * The whitelist of HTML elements.
      */
     static nsTHashtable<nsISupportsHashKey>* sElementsHTML;
 
     /**
-     * The whitelist of HTML attributes.
+     * The whitelist of non-presentational HTML attributes.
      */
     static nsTHashtable<nsISupportsHashKey>* sAttributesHTML;
 
     /**
+     * The whitelist of presentational HTML attributes.
+     */
+    static nsTHashtable<nsISupportsHashKey>* sPresAttributesHTML;
+
+    /**
      * The whitelist of SVG elements.
      */
     static nsTHashtable<nsISupportsHashKey>* sElementsSVG;
 
     /**
      * The whitelist of SVG attributes.
      */
     static nsTHashtable<nsISupportsHashKey>* sAttributesSVG;
--- a/content/base/src/nsTreeSanitizer.cpp
+++ b/content/base/src/nsTreeSanitizer.cpp
@@ -50,16 +50,17 @@
 #include "nsCSSStyleSheet.h"
 #include "nsIDOMCSSRule.h"
 #include "nsAttrName.h"
 #include "nsIScriptSecurityManager.h"
 #include "nsNetUtil.h"
 #include "nsComponentManagerUtils.h"
 #include "nsNullPrincipal.h"
 #include "nsContentUtils.h"
+#include "nsIParserUtils.h"
 
 using namespace mozilla;
 
 //
 // Thanks to Mark Pilgrim and Sam Ruby for the initial whitelist
 //
 nsIAtom** const kElementsHTML[] = {
   &nsGkAtoms::a,
@@ -72,16 +73,17 @@ nsIAtom** const kElementsHTML[] = {
 #ifdef MOZ_MEDIA
   &nsGkAtoms::audio,
 #endif
   &nsGkAtoms::b,
   &nsGkAtoms::bdi,
   &nsGkAtoms::bdo,
   &nsGkAtoms::big,
   &nsGkAtoms::blockquote,
+  // body checked specially
   &nsGkAtoms::br,
   &nsGkAtoms::button,
   &nsGkAtoms::canvas,
   &nsGkAtoms::caption,
   &nsGkAtoms::center,
   &nsGkAtoms::cite,
   &nsGkAtoms::code,
   &nsGkAtoms::col,
@@ -104,19 +106,21 @@ nsIAtom** const kElementsHTML[] = {
   &nsGkAtoms::footer,
   &nsGkAtoms::form,
   &nsGkAtoms::h1,
   &nsGkAtoms::h2,
   &nsGkAtoms::h3,
   &nsGkAtoms::h4,
   &nsGkAtoms::h5,
   &nsGkAtoms::h6,
+  // head checked specially
   &nsGkAtoms::header,
   &nsGkAtoms::hgroup,
   &nsGkAtoms::hr,
+  // html checked specially
   &nsGkAtoms::i,
   &nsGkAtoms::img,
   &nsGkAtoms::input,
   &nsGkAtoms::ins,
   &nsGkAtoms::kbd,
   &nsGkAtoms::label,
   &nsGkAtoms::legend,
   &nsGkAtoms::li,
@@ -150,24 +154,26 @@ nsIAtom** const kElementsHTML[] = {
   &nsGkAtoms::source,
 #endif
   &nsGkAtoms::span,
   &nsGkAtoms::strike,
   &nsGkAtoms::strong,
   &nsGkAtoms::sub,
   &nsGkAtoms::summary,
   &nsGkAtoms::sup,
+  // style checked specially
   &nsGkAtoms::table,
   &nsGkAtoms::tbody,
   &nsGkAtoms::td,
   &nsGkAtoms::textarea,
   &nsGkAtoms::tfoot,
   &nsGkAtoms::th,
   &nsGkAtoms::thead,
   &nsGkAtoms::time,
+  // title checked specially
   &nsGkAtoms::tr,
 #ifdef MOZ_MEDIA
   &nsGkAtoms::track,
 #endif
   &nsGkAtoms::tt,
   &nsGkAtoms::u,
   &nsGkAtoms::ul,
   &nsGkAtoms::var,
@@ -179,61 +185,52 @@ nsIAtom** const kElementsHTML[] = {
 };
 
 nsIAtom** const kAttributesHTML[] = {
   &nsGkAtoms::abbr,
   &nsGkAtoms::accept,
   &nsGkAtoms::acceptcharset,
   &nsGkAtoms::accesskey,
   &nsGkAtoms::action,
-  &nsGkAtoms::align,
   &nsGkAtoms::alt,
   &nsGkAtoms::autocomplete,
   &nsGkAtoms::autofocus,
 #ifdef MOZ_MEDIA
   &nsGkAtoms::autoplay,
 #endif
   &nsGkAtoms::axis,
-  &nsGkAtoms::background,
-  &nsGkAtoms::bgcolor,
-  &nsGkAtoms::border,
-  &nsGkAtoms::cellpadding,
-  &nsGkAtoms::cellspacing,
   &nsGkAtoms::_char,
   &nsGkAtoms::charoff,
   &nsGkAtoms::charset,
   &nsGkAtoms::checked,
   &nsGkAtoms::cite,
   &nsGkAtoms::_class,
-  &nsGkAtoms::clear,
   &nsGkAtoms::cols,
   &nsGkAtoms::colspan,
-  &nsGkAtoms::color,
+  &nsGkAtoms::content,
   &nsGkAtoms::contenteditable,
   &nsGkAtoms::contextmenu,
 #ifdef MOZ_MEDIA
   &nsGkAtoms::controls,
 #endif
-  &nsGkAtoms::compact,
   &nsGkAtoms::coords,
   &nsGkAtoms::datetime,
   &nsGkAtoms::dir,
   &nsGkAtoms::disabled,
   &nsGkAtoms::draggable,
   &nsGkAtoms::enctype,
   &nsGkAtoms::face,
   &nsGkAtoms::_for,
   &nsGkAtoms::frame,
   &nsGkAtoms::headers,
   &nsGkAtoms::height,
   &nsGkAtoms::hidden,
   &nsGkAtoms::high,
   &nsGkAtoms::href,
   &nsGkAtoms::hreflang,
-  &nsGkAtoms::hspace,
   &nsGkAtoms::icon,
   &nsGkAtoms::id,
   &nsGkAtoms::ismap,
   &nsGkAtoms::itemid,
   &nsGkAtoms::itemprop,
   &nsGkAtoms::itemref,
   &nsGkAtoms::itemscope,
   &nsGkAtoms::itemtype,
@@ -253,27 +250,25 @@ nsIAtom** const kAttributesHTML[] = {
   &nsGkAtoms::min,
   &nsGkAtoms::mozdonotsend,
   &nsGkAtoms::multiple,
 #ifdef MOZ_MEDIA
   &nsGkAtoms::muted,
 #endif
   &nsGkAtoms::name,
   &nsGkAtoms::nohref,
-  &nsGkAtoms::noshade,
   &nsGkAtoms::novalidate,
   &nsGkAtoms::nowrap,
   &nsGkAtoms::open,
   &nsGkAtoms::optimum,
   &nsGkAtoms::pattern,
   &nsGkAtoms::placeholder,
 #ifdef MOZ_MEDIA
   &nsGkAtoms::playbackrate,
 #endif
-  &nsGkAtoms::pointSize,
 #ifdef MOZ_MEDIA
   &nsGkAtoms::poster,
   &nsGkAtoms::preload,
 #endif
   &nsGkAtoms::prompt,
   &nsGkAtoms::pubdate,
   &nsGkAtoms::radiogroup,
   &nsGkAtoms::readonly,
@@ -284,36 +279,52 @@ nsIAtom** const kAttributesHTML[] = {
   &nsGkAtoms::role,
   &nsGkAtoms::rows,
   &nsGkAtoms::rowspan,
   &nsGkAtoms::rules,
   &nsGkAtoms::scoped,
   &nsGkAtoms::scope,
   &nsGkAtoms::selected,
   &nsGkAtoms::shape,
-  &nsGkAtoms::size,
   &nsGkAtoms::span,
   &nsGkAtoms::spellcheck,
   &nsGkAtoms::src,
   &nsGkAtoms::srclang,
   &nsGkAtoms::start,
   &nsGkAtoms::summary,
   &nsGkAtoms::tabindex,
   &nsGkAtoms::target,
   &nsGkAtoms::title,
   &nsGkAtoms::type,
   &nsGkAtoms::usemap,
-  &nsGkAtoms::valign,
   &nsGkAtoms::value,
-  &nsGkAtoms::vspace,
   &nsGkAtoms::width,
   &nsGkAtoms::wrap,
   nsnull
 };
 
+nsIAtom** const kPresAttributesHTML[] = {
+  &nsGkAtoms::align,
+  &nsGkAtoms::background,
+  &nsGkAtoms::bgcolor,
+  &nsGkAtoms::border,
+  &nsGkAtoms::cellpadding,
+  &nsGkAtoms::cellspacing,
+  &nsGkAtoms::color,
+  &nsGkAtoms::compact,
+  &nsGkAtoms::clear,
+  &nsGkAtoms::hspace,
+  &nsGkAtoms::noshade,
+  &nsGkAtoms::pointSize,
+  &nsGkAtoms::size,
+  &nsGkAtoms::valign,
+  &nsGkAtoms::vspace,
+  nsnull
+};
+
 nsIAtom** const kURLAttributesHTML[] = {
   &nsGkAtoms::action,
   &nsGkAtoms::href,
   &nsGkAtoms::src,
   &nsGkAtoms::longdesc,
   &nsGkAtoms::cite,
   &nsGkAtoms::background,
   nsnull
@@ -974,46 +985,82 @@ nsIAtom** const kAttributesMathML[] = {
    &nsGkAtoms::width, // width
    &nsGkAtoms::xref_, // xref
   nsnull
 };
 
 nsIAtom** const kURLAttributesMathML[] = {
   &nsGkAtoms::href,
   &nsGkAtoms::src,
+  &nsGkAtoms::cdgroup_,
+  &nsGkAtoms::altimg_,
   &nsGkAtoms::definitionURL_,
   nsnull
 };
 
 nsTHashtable<nsISupportsHashKey>* nsTreeSanitizer::sElementsHTML = nsnull;
 nsTHashtable<nsISupportsHashKey>* nsTreeSanitizer::sAttributesHTML = nsnull;
+nsTHashtable<nsISupportsHashKey>* nsTreeSanitizer::sPresAttributesHTML = nsnull;
 nsTHashtable<nsISupportsHashKey>* nsTreeSanitizer::sElementsSVG = nsnull;
 nsTHashtable<nsISupportsHashKey>* nsTreeSanitizer::sAttributesSVG = nsnull;
 nsTHashtable<nsISupportsHashKey>* nsTreeSanitizer::sElementsMathML = nsnull;
 nsTHashtable<nsISupportsHashKey>* nsTreeSanitizer::sAttributesMathML = nsnull;
 nsIPrincipal* nsTreeSanitizer::sNullPrincipal = nsnull;
 
-nsTreeSanitizer::nsTreeSanitizer(bool aAllowStyles, bool aAllowComments)
- : mAllowStyles(aAllowStyles)
- , mAllowComments(aAllowComments)
+nsTreeSanitizer::nsTreeSanitizer(PRUint32 aFlags)
+ : mAllowStyles(aFlags & nsIParserUtils::SanitizerAllowStyle)
+ , mAllowComments(aFlags & nsIParserUtils::SanitizerAllowComments)
+ , mDropNonCSSPresentation(aFlags &
+     nsIParserUtils::SanitizerDropNonCSSPresentation)
+ , mDropForms(aFlags & nsIParserUtils::SanitizerDropForms)
+ , mCidEmbedsOnly(aFlags &
+     nsIParserUtils::SanitizerCidEmbedsOnly)
+ , mDropMedia(aFlags & nsIParserUtils::SanitizerDropMedia)
+ , mFullDocument(false)
 {
+  if (mCidEmbedsOnly) {
+    // Sanitizing styles for external references is not supported.
+    mAllowStyles = false;
+  }
   if (!sElementsHTML) {
     // Initialize lazily to avoid having to initialize at all if the user
     // doesn't paste HTML or load feeds.
     InitializeStatics();
   }
 }
 
 bool
 nsTreeSanitizer::MustFlatten(PRInt32 aNamespace, nsIAtom* aLocal)
 {
   if (aNamespace == kNameSpaceID_XHTML) {
+    if (mDropNonCSSPresentation && (nsGkAtoms::font == aLocal ||
+                                    nsGkAtoms::center == aLocal)) {
+      return true;
+    }
+    if (mDropForms && (nsGkAtoms::form == aLocal ||
+                       nsGkAtoms::input == aLocal ||
+                       nsGkAtoms::keygen == aLocal ||
+                       nsGkAtoms::option == aLocal ||
+                       nsGkAtoms::optgroup == aLocal)) {
+      return true;
+    }
+    if (mFullDocument && (nsGkAtoms::title == aLocal ||
+                          nsGkAtoms::html == aLocal ||
+                          nsGkAtoms::head == aLocal ||
+                          nsGkAtoms::body == aLocal)) {
+      return false;
+    }
     return !sElementsHTML->GetEntry(aLocal);
   }
   if (aNamespace == kNameSpaceID_SVG) {
+    if (mCidEmbedsOnly || mDropMedia) {
+      // Sanitizing CSS-based URL references inside SVG presentational
+      // attributes is not supported, so flattening for cid: embed case.
+      return true;
+    }
     return !sElementsSVG->GetEntry(aLocal);
   }
   if (aNamespace == kNameSpaceID_MathML) {
     return !sElementsMathML->GetEntry(aLocal);
   }
   return true;
 }
 
@@ -1037,21 +1084,40 @@ nsTreeSanitizer::MustPrune(PRInt32 aName
 {
   // To avoid attacks where a MathML script becomes something that gets
   // serialized in a way that it parses back as an HTML script, let's just
   // drop elements with the local name 'script' regardless of namespace.
   if (nsGkAtoms::script == aLocal) {
     return true;
   }
   if (aNamespace == kNameSpaceID_XHTML) {
-    if (nsGkAtoms::title == aLocal) {
+    if (nsGkAtoms::title == aLocal && !mFullDocument) {
       // emulate the quirks of the old parser
       return true;
     }
-    if ((nsGkAtoms::meta == aLocal || nsGkAtoms::link == aLocal) &&
+    if (mDropForms && (nsGkAtoms::select == aLocal ||
+                       nsGkAtoms::button == aLocal ||
+                       nsGkAtoms::datalist == aLocal)) {
+      return true;
+    }
+    if (mDropMedia && (nsGkAtoms::img == aLocal ||
+                       nsGkAtoms::video == aLocal ||
+                       nsGkAtoms::audio == aLocal ||
+                       nsGkAtoms::source == aLocal)) {
+      return true;
+    }
+    if (nsGkAtoms::meta == aLocal &&
+        (aElement->HasAttr(kNameSpaceID_None, nsGkAtoms::charset) ||
+         aElement->HasAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv))) {
+      // Throw away charset declarations even if they also have microdata
+      // which they can't validly have.
+      return true;
+    }
+    if (((!mFullDocument && nsGkAtoms::meta == aLocal) ||
+        nsGkAtoms::link == aLocal) &&
         !(aElement->HasAttr(kNameSpaceID_None, nsGkAtoms::itemprop) ||
           aElement->HasAttr(kNameSpaceID_None, nsGkAtoms::itemscope))) {
       // emulate old behavior for non-Microdata <meta> and <link> presumably
       // in <head>. <meta> and <link> are whitelisted in order to avoid
       // corrupting Microdata when they appear in <body>. Note that
       // SanitizeAttributes() will remove the rel attribute from <link> and
       // the name attribute from <meta>.
       return true;
@@ -1202,27 +1268,37 @@ nsTreeSanitizer::SanitizeAttributes(mozi
         continue;
       }
       if (IsURL(aURLs, attrLocal)) {
         if (SanitizeURL(aElement, attrNs, attrLocal)) {
           // in case the attribute removal shuffled the attribute order, start
           // the loop again.
           --ac;
           i = ac; // i will be decremented immediately thanks to the for loop
+          continue;
         }
+        // else fall through to see if there's another reason to drop this
+        // attribute (in particular if the attribute is background="" on an
+        // HTML element)
+      }
+      if (!mDropNonCSSPresentation &&
+          (aAllowed == sAttributesHTML) && // element is HTML
+          sPresAttributesHTML->GetEntry(attrLocal)) {
         continue;
       }
       if (aAllowed->GetEntry(attrLocal) &&
-          !(attrLocal == nsGkAtoms::rel &&
-            aElement->IsHTML(nsGkAtoms::link)) &&
-          !(attrLocal == nsGkAtoms::name &&
-            aElement->IsHTML(nsGkAtoms::meta))) {
+          !((attrLocal == nsGkAtoms::rel &&
+             aElement->IsHTML(nsGkAtoms::link)) ||
+            (!mFullDocument &&
+             attrLocal == nsGkAtoms::name &&
+             aElement->IsHTML(nsGkAtoms::meta)))) {
         // name="" and rel="" are whitelisted, but treat them as blacklisted
-        // for <meta name> and <link rel> to avoid document-wide metadata
-        // or styling overrides with non-conforming <meta name itemprop> or
+        // for <meta name> (fragment case) and <link rel> (all cases) to avoid
+        // document-wide metadata or styling overrides with non-conforming
+        // <meta name itemprop> or
         // <link rel itemprop>
         continue;
       }
       const PRUnichar* localStr = attrLocal->GetUTF16String();
       // Allow underscore to cater to the MCE editor library.
       // Allow data-* on SVG and MathML, too, as a forward-compat measure.
       if (*localStr == '_' || (attrLocal->GetLength() > 5 && localStr[0] == 'd'
           && localStr[1] == 'a' && localStr[2] == 't' && localStr[3] == 'a'
@@ -1297,59 +1373,101 @@ nsTreeSanitizer::SanitizeURL(mozilla::do
   PRUint32 flags = nsIScriptSecurityManager::DISALLOW_INHERIT_PRINCIPAL;
 
   nsCOMPtr<nsIURI> baseURI = aElement->GetBaseURI();
   nsCOMPtr<nsIURI> attrURI;
   nsresult rv = NS_NewURI(getter_AddRefs(attrURI), v, nsnull, baseURI);
   if (NS_SUCCEEDED(rv)) {
     rv = secMan->CheckLoadURIWithPrincipal(sNullPrincipal, attrURI, flags);
   }
+  if (mCidEmbedsOnly &&
+      NS_SUCCEEDED(rv) &&
+      kNameSpaceID_None == aNamespace) {
+    if (nsGkAtoms::src == aLocalName || nsGkAtoms::background == aLocalName) {
+      bool isCid;
+      attrURI->SchemeIs("cid", &isCid);
+      if (!isCid) {
+        rv = NS_ERROR_FAILURE;
+      }
+    } else if (nsGkAtoms::cdgroup_ == aLocalName ||
+               nsGkAtoms::altimg_ == aLocalName ||
+               nsGkAtoms::definitionURL_ == aLocalName) {
+      // Gecko doesn't fetch these now and shouldn't in the future, but
+      // in case someone goofs with these in the future, let's drop them.
+      rv = NS_ERROR_FAILURE;
+    }
+  }
   if (NS_FAILED(rv)) {
     aElement->UnsetAttr(aNamespace, aLocalName, false);
     return true;
   }
   return false;
 }
 
 void
-nsTreeSanitizer::Sanitize(nsIContent* aFragment) {
+nsTreeSanitizer::Sanitize(nsIContent* aFragment)
+{
   // If you want to relax these preconditions, be sure to check the code in
   // here that notifies / does not notify or that fires mutation events if
   // in tree.
   NS_PRECONDITION(aFragment->IsNodeOfType(nsINode::eDOCUMENT_FRAGMENT),
       "Argument was not DOM fragment.");
   NS_PRECONDITION(!aFragment->IsInDoc(), "The fragment is in doc?");
 
-  nsIContent* node = aFragment->GetFirstChild();
+  mFullDocument = false;
+  SanitizeChildren(aFragment);
+}
+
+void
+nsTreeSanitizer::Sanitize(nsIDocument* aDocument)
+{
+  // If you want to relax these preconditions, be sure to check the code in
+  // here that notifies / does not notify or that fires mutation events if
+  // in tree.
+#ifdef DEBUG
+  nsCOMPtr<nsISupports> container = aDocument->GetContainer();
+  NS_PRECONDITION(!container, "The document is in a shell.");
+  nsRefPtr<mozilla::dom::Element> root = aDocument->GetRootElement();
+  NS_PRECONDITION(root->IsHTML(nsGkAtoms::html), "Not HTML root.");
+#endif
+
+  mFullDocument = true;
+  SanitizeChildren(aDocument);
+}
+
+void
+nsTreeSanitizer::SanitizeChildren(nsINode* aRoot)
+{
+  nsIContent* node = aRoot->GetFirstChild();
   while (node) {
     if (node->IsElement()) {
       mozilla::dom::Element* elt = node->AsElement();
       nsINodeInfo* nodeInfo = node->NodeInfo();
       nsIAtom* localName = nodeInfo->NameAtom();
       PRInt32 ns = nodeInfo->NamespaceID();
 
       if (MustPrune(ns, localName, elt)) {
-        nsIContent* next = node->GetNextNonChildNode(aFragment);
+        nsIContent* next = node->GetNextNonChildNode(aRoot);
         node->GetParent()->RemoveChild(node);
         node = next;
         continue;
       }
       if (nsGkAtoms::style == localName) {
         // If styles aren't allowed, style elements got pruned above. Even
         // if styles are allowed, non-HTML, non-SVG style elements got pruned
         // above.
         NS_ASSERTION(ns == kNameSpaceID_XHTML || ns == kNameSpaceID_SVG,
             "Should have only HTML or SVG here!");
         nsAutoString styleText;
         nsContentUtils::GetNodeTextContent(node, false, styleText);
         nsAutoString sanitizedStyle;
         nsCOMPtr<nsIURI> baseURI = node->GetBaseURI();
         if (SanitizeStyleSheet(styleText,
                                sanitizedStyle,
-                               aFragment->OwnerDoc(),
+                               aRoot->OwnerDoc(),
                                baseURI)) {
           nsContentUtils::SetNodeTextContent(node, sanitizedStyle, true);
         } else {
           // If the node had non-text child nodes, this operation zaps those.
           nsContentUtils::SetNodeTextContent(node, styleText, true);
         }
         if (ns == kNameSpaceID_XHTML) {
           SanitizeAttributes(elt,
@@ -1361,21 +1479,21 @@ nsTreeSanitizer::Sanitize(nsIContent* aF
         } else {
           SanitizeAttributes(elt,
                              sAttributesSVG,
                              (nsIAtom***)kURLAttributesSVG,
                              true,
                              mAllowStyles,
                              false);
         }
-        node = node->GetNextNonChildNode(aFragment);
+        node = node->GetNextNonChildNode(aRoot);
         continue;
       }
       if (MustFlatten(ns, localName)) {
-        nsIContent* next = node->GetNextNode(aFragment);
+        nsIContent* next = node->GetNextNode(aRoot);
         nsIContent* parent = node->GetParent();
         nsCOMPtr<nsIContent> child; // Must keep the child alive during move
         nsresult rv;
         while ((child = node->GetFirstChild())) {
           parent->InsertBefore(child, node, &rv);
           if (NS_FAILED(rv)) {
             break;
           }
@@ -1388,39 +1506,40 @@ nsTreeSanitizer::Sanitize(nsIContent* aF
                    ns == kNameSpaceID_SVG ||
                    ns == kNameSpaceID_MathML,
           "Should have only HTML, MathML or SVG here!");
       if (ns == kNameSpaceID_XHTML) {
         SanitizeAttributes(elt,
                            sAttributesHTML,
                            (nsIAtom***)kURLAttributesHTML,
                            false, mAllowStyles,
-                           (nsGkAtoms::img == localName));
+                           (nsGkAtoms::img == localName) &&
+                           !mCidEmbedsOnly);
       } else if (ns == kNameSpaceID_SVG) {
         SanitizeAttributes(elt,
                            sAttributesSVG,
                            (nsIAtom***)kURLAttributesSVG,
                            true,
                            mAllowStyles,
                            false);
       } else {
         SanitizeAttributes(elt,
                            sAttributesMathML,
                            (nsIAtom***)kURLAttributesMathML,
                            true,
                            false,
                            false);
       }
-      node = node->GetNextNode(aFragment);
+      node = node->GetNextNode(aRoot);
       continue;
     }
     NS_ASSERTION(!node->GetFirstChild(), "How come non-element node had kids?");
-    nsIContent* next = node->GetNextNonChildNode(aFragment);
+    nsIContent* next = node->GetNextNonChildNode(aRoot);
     if (!mAllowComments && node->IsNodeOfType(nsINode::eCOMMENT)) {
-      node->GetParent()->RemoveChild(node);
+      node->GetNodeParent()->RemoveChild(node);
     }
     node = next;
   }
 }
 
 void
 nsTreeSanitizer::InitializeStatics()
 {
@@ -1433,16 +1552,22 @@ nsTreeSanitizer::InitializeStatics()
   }
 
   sAttributesHTML = new nsTHashtable<nsISupportsHashKey> ();
   sAttributesHTML->Init(ArrayLength(kAttributesHTML));
   for (PRUint32 i = 0; kAttributesHTML[i]; i++) {
     sAttributesHTML->PutEntry(*kAttributesHTML[i]);
   }
 
+  sPresAttributesHTML = new nsTHashtable<nsISupportsHashKey> ();
+  sPresAttributesHTML->Init(ArrayLength(kPresAttributesHTML));
+  for (PRUint32 i = 0; kPresAttributesHTML[i]; i++) {
+    sPresAttributesHTML->PutEntry(*kPresAttributesHTML[i]);
+  }
+
   sElementsSVG = new nsTHashtable<nsISupportsHashKey> ();
   sElementsSVG->Init(ArrayLength(kElementsSVG));
   for (PRUint32 i = 0; kElementsSVG[i]; i++) {
     sElementsSVG->PutEntry(*kElementsSVG[i]);
   }
 
   sAttributesSVG = new nsTHashtable<nsISupportsHashKey> ();
   sAttributesSVG->Init(ArrayLength(kAttributesSVG));
@@ -1471,16 +1596,19 @@ void
 nsTreeSanitizer::ReleaseStatics()
 {
   delete sElementsHTML;
   sElementsHTML = nsnull;
 
   delete sAttributesHTML;
   sAttributesHTML = nsnull;
 
+  delete sPresAttributesHTML;
+  sPresAttributesHTML = nsnull;
+
   delete sElementsSVG;
   sElementsSVG = nsnull;
 
   delete sAttributesSVG;
   sAttributesSVG = nsnull;
 
   delete sElementsMathML;
   sElementsMathML = nsnull;
--- a/content/base/test/chrome/Makefile.in
+++ b/content/base/test/chrome/Makefile.in
@@ -66,16 +66,17 @@ include $(topsrcdir)/config/rules.mk
     test_fileconstructor.xul \
     fileconstructor_file.png \
     test_bug339494.xul \
     test_bug357450.xul \
     test_bug571390.xul \
     test_bug574596.html \
     test_bug683852.xul \
     test_bug599295.html \
+    test_bug650776.html \
     test_bug650784.html \
     $(NULL)
 
 libs:: $(_TEST_FILES)
 	$(INSTALL) $(foreach f,$^,"$f") $(DEPTH)/_tests/testing/mochitest/tests/$(relativesrcdir)
 
 libs:: $(_CHROME_FILES)
 	$(INSTALL) $(foreach f,$^,"$f") $(DEPTH)/_tests/testing/mochitest/chrome/$(relativesrcdir)
new file mode 100644
--- /dev/null
+++ b/content/base/test/chrome/test_bug650776.html
@@ -0,0 +1,110 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=650776
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 650776</title>
+  <script type="application/javascript" src="chrome://mochikit/content/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="chrome://mochikit/content/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650776">Mozilla Bug 650776</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+  
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 650776 **/
+
+var u = Components.interfaces.nsIParserUtils;
+var s = Components.classes["@mozilla.org/parserutils;1"]
+        .getService(u);
+
+// Basic sanity
+is(s.sanitize("foo", 0), "<html><head></head><body>foo</body></html>", "Wrong sanitizer result 1");
+// Scripts get removed
+is(s.sanitize("<script>\u003c/script>", 0), "<html><head></head><body></body></html>", "Wrong sanitizer result 2");
+// Event handlers get removed
+is(s.sanitize("<a onclick='boom()'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 3");
+// By default, styles are removed
+is(s.sanitize("<style>p { color: red; }</style><p style='background-color: blue;'></p>", 0), "<html><head></head><body><p></p></body></html>", "Wrong sanitizer result 4");
+// Can allow styles
+is(s.sanitize("<style>p { color: red; }</style><p style='background-color: blue;'></p>", u.SanitizerAllowStyle), '<html><head><style>p { color: red; }</style></head><body><p style="background-color: blue;"></p></body></html>', "Wrong sanitizer result 5");
+// -moz-binding gets dropped when styles allowed; however, reconstructing the p { ... } part seems broken!
+todo_is(s.sanitize("<style>p { color: red; -moz-binding: url(foo); }</style><p style='background-color: blue; -moz-binding: url(foo);'></p>", u.SanitizerAllowStyle), '<html><head><style>p { color: red; }</style></head><body><p style="background-color: blue;"></p></body></html>', "Wrong sanitizer result 6");
+// Various cid: embeds only cases
+is(s.sanitize("<img src='foo.html'>", u.SanitizerCidEmbedsOnly), '<html><head></head><body><img></body></html>', "Wrong sanitizer result 7");
+is(s.sanitize("<img src='cid:foo'>", u.SanitizerCidEmbedsOnly), '<html><head></head><body><img src="cid:foo"></body></html>', "Wrong sanitizer result 8");
+is(s.sanitize("<img src='data:image/png,'>", u.SanitizerCidEmbedsOnly), '<html><head></head><body><img></body></html>', "Wrong sanitizer result 9");
+is(s.sanitize("<img src='http://mochi.test/'>", u.SanitizerCidEmbedsOnly), '<html><head></head><body><img></body></html>', "Wrong sanitizer result 10");
+is(s.sanitize("<a href='http://mochi.test/'></a>", u.SanitizerCidEmbedsOnly), '<html><head></head><body><a href="http://mochi.test/"></a></body></html>', "Wrong sanitizer result 11");
+is(s.sanitize("<body background='http://mochi.test/'>", u.SanitizerCidEmbedsOnly), '<html><head></head><body></body></html>', "Wrong sanitizer result 12");
+is(s.sanitize("<body background='cid:foo'>", u.SanitizerCidEmbedsOnly), '<html><head></head><body background="cid:foo"></body></html>', "Wrong sanitizer result 13");
+is(s.sanitize("<svg></svg>", u.SanitizerCidEmbedsOnly), '<html><head></head><body></body></html>', "Wrong sanitizer result 14");
+is(s.sanitize("<math definitionURL='cid:foo' altimg='cid:foo'></math>", u.SanitizerCidEmbedsOnly), '<html><head></head><body><math></math></body></html>', "Wrong sanitizer result 14");
+is(s.sanitize("<video><source src='http://mochi.test/'></video>", u.SanitizerCidEmbedsOnly), '<html><head></head><body><video controls="controls"><source></video></body></html>', "Wrong sanitizer result 15");
+is(s.sanitize("<style></style>", u.SanitizerAllowStyle | u.SanitizerCidEmbedsOnly), '<html><head></head><body></body></html>', "Wrong sanitizer result 16");
+// Dangerous links
+is(s.sanitize("<a href='javascript:boom()'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 17");
+is(s.sanitize("<a href='JavaScript:boom()'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 18");
+is(s.sanitize("<a href=' javascript:boom()'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 19");
+is(s.sanitize("<a href='\njavascript:boom()'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 20");
+is(s.sanitize("<a href='\fjavascript:boom()'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 21");
+is(s.sanitize("<a href='\u00A0javascript:boom()'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 22");
+is(s.sanitize("<a href='foo.html'></a>", 0), "<html><head></head><body><a></a></body></html>", "Wrong sanitizer result 23");
+// Comments
+is(s.sanitize("<!-- foo -->", 0), "<html><head></head><body></body></html>", "Wrong sanitizer result 24");
+is(s.sanitize("<!-- foo -->", u.SanitizerAllowComments), "<!-- foo -->\n<html><head></head><body></body></html>", "Wrong sanitizer result 25");
+// noscript
+is(s.sanitize("<body><noscript><p class=bar>foo</p></noscript>", 0), '<html><head></head><body><noscript><p class="bar">foo</p></noscript></body></html>', "Wrong sanitizer result 26");
+// dangerous elements
+is(s.sanitize("<iframe></iframe>", 0), "<html><head></head><body></body></html>", "Wrong sanitizer result 27");
+is(s.sanitize("<object></object>", 0), "<html><head></head><body></body></html>", "Wrong sanitizer result 28");
+is(s.sanitize("<embed>", 0), "<html><head></head><body></body></html>", "Wrong sanitizer result 29");
+// presentationalism
+is(s.sanitize("<font></font>", 0), "<html><head></head><body><font></font></body></html>", "Wrong sanitizer result 30");
+is(s.sanitize("<center></center>", 0), "<html><head></head><body><center></center></body></html>", "Wrong sanitizer result 31");
+is(s.sanitize("<div align=center></div>", 0), '<html><head></head><body><div align="center"></div></body></html>', "Wrong sanitizer result 32");
+is(s.sanitize("<table><tr><td bgcolor=#FFFFFF>", 0), '<html><head></head><body><table><tbody><tr><td bgcolor="#FFFFFF"></td></tr></tbody></table></body></html>', "Wrong sanitizer result 33");
+is(s.sanitize("<font></font>", u.SanitizerDropNonCSSPresentation), "<html><head></head><body></body></html>", "Wrong sanitizer result 34");
+is(s.sanitize("<center></center>", u.SanitizerDropNonCSSPresentation), "<html><head></head><body></body></html>", "Wrong sanitizer result 35");
+is(s.sanitize("<div align=center></div>", u.SanitizerDropNonCSSPresentation), '<html><head></head><body><div></div></body></html>', "Wrong sanitizer result 36");
+is(s.sanitize("<table><tr><td bgcolor=#FFFFFF>", u.SanitizerDropNonCSSPresentation), '<html><head></head><body><table><tbody><tr><td></td></tr></tbody></table></body></html>', "Wrong sanitizer result 37");
+// metadata
+is(s.sanitize("<meta charset=utf-7>", 0), "<html><head></head><body></body></html>", "Wrong sanitizer result 38");
+is(s.sanitize("<meta http-equiv=content-type content='text/html; charset=utf-7'>", 0), "<html><head></head><body></body></html>", "Wrong sanitizer result 39");
+is(s.sanitize("<meta itemprop=foo content=bar>", 0), '<html><head><meta itemprop="foo" content="bar"></head><body></body></html>', "Wrong sanitizer result 40");
+is(s.sanitize("<link rel=whatever href=http://mochi.test/ >", 0), '<html><head></head><body></body></html>', "Wrong sanitizer result 41");
+is(s.sanitize("<link itemprop=foo href=http://mochi.test/ >", 0), '<html><head><link itemprop="foo" href="http://mochi.test/"></head><body></body></html>', "Wrong sanitizer result 42");
+is(s.sanitize("<link rel=stylesheet itemprop=foo href=http://mochi.test/ >", 0), '<html><head><link itemprop="foo" href="http://mochi.test/"></head><body></body></html>', "Wrong sanitizer result 43");
+is(s.sanitize("<meta name=foo content=bar>", 0), '<html><head><meta name="foo" content="bar"></head><body></body></html>', "Wrong sanitizer result 44");
+// forms
+is(s.sanitize("<form></form>", 0), '<html><head></head><body><form></form></body></html>', "Wrong sanitizer result 45");
+is(s.sanitize("<fieldset><legend></legend></fieldset>", 0), '<html><head></head><body><fieldset><legend></legend></fieldset></body></html>', "Wrong sanitizer result 46");
+is(s.sanitize("<input>", 0), '<html><head></head><body><input></body></html>', "Wrong sanitizer result 47");
+is(s.sanitize("<button>foo</button>", 0), '<html><head></head><body><button>foo</button></body></html>', "Wrong sanitizer result 48");
+is(s.sanitize("<select><optgroup><option>foo</option></optgroup></select></button>", 0), '<html><head></head><body><select><optgroup><option>foo</option></optgroup></select></body></html>', "Wrong sanitizer result 49");
+is(s.sanitize("<form></form>", u.SanitizerDropForms), '<html><head></head><body></body></html>', "Wrong sanitizer result 50");
+is(s.sanitize("<fieldset><legend></legend></fieldset>", u.SanitizerDropForms), '<html><head></head><body><fieldset><legend></legend></fieldset></body></html>', "Wrong sanitizer result 51");
+is(s.sanitize("<input>", u.SanitizerDropForms), '<html><head></head><body></body></html>', "Wrong sanitizer result 52");
+is(s.sanitize("<button>foo</button>", u.SanitizerDropForms), '<html><head></head><body></body></html>', "Wrong sanitizer result 53");
+is(s.sanitize("<select><optgroup><option>foo</option></optgroup></select></button>", u.SanitizerDropForms), '<html><head></head><body></body></html>', "Wrong sanitizer result 54");
+// doctype
+is(s.sanitize("<!DOCTYPE html>", 0), '<!DOCTYPE html>\n<html><head></head><body></body></html>', "Wrong sanitizer result 55");
+// title
+is(s.sanitize("<title></title>", 0), '<html><head><title></title></head><body></body></html>', "Wrong sanitizer result 56");
+// Drop media
+is(s.sanitize("<img>", u.SanitizerDropMedia), '<html><head></head><body></body></html>', "Wrong sanitizer result 57");
+is(s.sanitize("<svg>foo</svg>", u.SanitizerDropMedia), '<html><head></head><body>foo</body></html>', "Wrong sanitizer result 58");
+is(s.sanitize("<video><source></video>", u.SanitizerDropMedia), '<html><head></head><body></body></html>', "Wrong sanitizer result 59");
+is(s.sanitize("<audio><source></audio>", u.SanitizerDropMedia), '<html><head></head><body></body></html>', "Wrong sanitizer result 60");
+
+
+</script>
+</pre>
+</body>
+</html>
--- a/editor/libeditor/html/nsHTMLDataTransfer.cpp
+++ b/editor/libeditor/html/nsHTMLDataTransfer.cpp
@@ -121,16 +121,17 @@
 #include "plbase64.h"
 #include "prmem.h"
 #include "nsStreamUtils.h"
 #include "nsIPrincipal.h"
 #include "nsIDocShell.h"
 #include "nsIDocShellTreeItem.h"
 #include "nsContentUtils.h"
 #include "mozilla/Preferences.h"
+#include "nsIParserUtils.h"
 
 using namespace mozilla;
 
 const PRUnichar nbsp = 160;
 
 static NS_DEFINE_CID(kCParserCID,     NS_PARSER_CID);
 
 #define kInsertCookie  "_moz_Insert Here_moz_"
@@ -2365,17 +2366,19 @@ nsresult nsHTMLEditor::ParseFragment(con
   rv = nsContentUtils::ParseFragmentHTML(aFragStr,
                                          fragment,
                                          aContextLocalName ?
                                            aContextLocalName : nsGkAtoms::body,
                                         kNameSpaceID_XHTML,
                                         false,
                                         true);
   if (!aTrustedInput) {
-    nsTreeSanitizer sanitizer(!!aContextLocalName, !aContextLocalName);
+    nsTreeSanitizer sanitizer(aContextLocalName ?
+                              nsIParserUtils::SanitizerAllowStyle :
+                              nsIParserUtils::SanitizerAllowComments);
     sanitizer.Sanitize(fragment);
   }
   *outNode = do_QueryInterface(frag);
   return rv;
 }
 
 nsresult nsHTMLEditor::CreateListOfNodesToPaste(nsIDOMNode  *aFragmentAsNode,
                                                 nsCOMArray<nsIDOMNode>& outNodeList,
--- a/parser/html/nsIParserUtils.idl
+++ b/parser/html/nsIParserUtils.idl
@@ -1,21 +1,99 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "nsISupports.idl"
 
 /**
  * Non-Web HTML parser functionality to Firefox extensions and XULRunner apps. 
- * Don't use this from within Gecko--use nsContentUtils directly instead.
+ * Don't use this from within Gecko--use nsContentUtils, nsTreeSanitizer, etc.
+ * directly instead.
  */
 [scriptable, uuid(290f49bb-0619-4bda-8006-ab31bec7231a)]
 interface nsIParserUtils : nsISupports
 {
+
+  /**
+   * Flag for sanitizer: Allow comment nodes.
+   */
+  const unsigned long SanitizerAllowComments = (1 << 0);
+
+  /**
+   * Flag for sanitizer: Allow <style> and style="" (with contents sanitized
+   * in case of -moz-binding). Note! If -moz-binding is absent, properties
+   * that might be XSS risks in other Web engines are preserved!
+   */
+  const unsigned long SanitizerAllowStyle = (1 << 1);
+
+  /**
+   * Flag for sanitizer: Only allow cid: URLs for embedded content.
+   *
+   * At present, sanitizing CSS backgrounds, etc., is not supported, so setting 
+   * this together with SanitizerAllowStyle doesn't make sense.
+   *
+   * At present, sanitizing CSS syntax in SVG presentational attributes is not
+   * supported, so this option flattens out SVG.
+   */
+  const unsigned long SanitizerCidEmbedsOnly = (1 << 2);
+
+  /**
+   * Flag for sanitizer: Drop non-CSS presentational HTML elements and 
+   * attributes, such as <font>, <center> and bgcolor="".
+   */
+  const unsigned long SanitizerDropNonCSSPresentation = (1 << 3);
+
+  /**
+   * Flag for sanitizer: Drop forms and form controls (excluding 
+   * fieldset/legend).
+   */
+  const unsigned long SanitizerDropForms = (1 << 4);
+
+  /**
+   * Flag for sanitizer: Drop <img>, <video>, <audio> and <source> and flatten
+   * out SVG.
+   */
+  const unsigned long SanitizerDropMedia = (1 << 5);
+
+  /**
+   * Parses a string into an HTML document, sanitizes the document and 
+   * returns the result serialized to a string.
+   *
+   * The sanitizer is designed to protect against XSS when sanitized content
+   * is inserted into a different-origin context without an iframe-equivalent
+   * sandboxing mechanism.
+   *
+   * By default, the sanitizer doesn't try to avoid leaking information that 
+   * the content was viewed to third parties. That is, by default, e.g. 
+   * <img src> pointing to an HTTP server potentially controlled by a third 
+   * party is not removed. To avoid ambient information leakage upon loading
+   * the sanitized content, use the SanitizerInternalEmbedsOnly flag. In that 
+   * case, <a href> links (and similar) to other content are preserved, so an
+   * explicit user action (following a link) after the content has been loaded
+   * can still leak information.
+   *
+   * By default, non-dangerous non-CSS presentational HTML elements and 
+   * attributes or forms are not removed. To remove these, use 
+   * SanitizerDropNonCSSPresentation and/or SanitizerDropForms.
+   *
+   * By default, comments and CSS is removed. To preserve comments, use
+   * SanitizerAllowComments. To preserve <style> and style="", use 
+   * SanitizerAllowStyle. -moz-binding is removed from <style> and style="" if
+   * present. In this case, properties that Gecko doesn't recognize can get 
+   * removed as a side effect. Note! If -moz-binding is not present, <style>
+   * and style="" and SanitizerAllowStyle is specified, the sanitized content
+   * may still be XSS dangerous if loaded into a non-Gecko Web engine!
+   *
+   * @param src the HTML source to parse (C++ callers are allowed but not
+   *            required to use the same string for the return value.)
+   * @param flags sanitization option flags defined above
+   */
+  AString sanitize(in AString src, in unsigned long flags);
+
   /**
    * Convert HTML to plain text.
    *
    * @param src the HTML source to parse (C++ callers are allowed but not
    *            required to use the same string for the return value.)
    * @param flags conversion option flags defined in nsIDocumentEncoder
    * @param wrapCol number of characters per line; 0 for no auto-wrapping
    */
--- a/parser/html/nsParserUtils.cpp
+++ b/parser/html/nsParserUtils.cpp
@@ -73,43 +73,85 @@ NS_IMPL_ISUPPORTS2(nsParserUtils,
                    nsIScriptableUnescapeHTML,
                    nsIParserUtils)
 
 static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
 
 
 
 NS_IMETHODIMP
-nsParserUtils::ConvertToPlainText(const nsAString & aFromStr,
+nsParserUtils::ConvertToPlainText(const nsAString& aFromStr,
                                            PRUint32 aFlags,
                                            PRUint32 aWrapCol,
-                                           nsAString & aToStr)
+                                           nsAString& aToStr)
 {
   return nsContentUtils::ConvertToPlainText(aFromStr,
     aToStr,
     aFlags,
     aWrapCol);
 }
 
 NS_IMETHODIMP
-nsParserUtils::Unescape(const nsAString & aFromStr,
-                                 nsAString & aToStr)
+nsParserUtils::Unescape(const nsAString& aFromStr,
+                        nsAString& aToStr)
 {
   return nsContentUtils::ConvertToPlainText(aFromStr,
     aToStr,
     nsIDocumentEncoder::OutputSelectionOnly |
     nsIDocumentEncoder::OutputAbsoluteLinks,
     0);
 }
 
+NS_IMETHODIMP
+nsParserUtils::Sanitize(const nsAString& aFromStr,
+                        PRUint32 aFlags,
+                        nsAString& aToStr)
+{
+  nsCOMPtr<nsIURI> uri;
+  NS_NewURI(getter_AddRefs(uri), "about:blank");
+  nsCOMPtr<nsIPrincipal> principal =
+    do_CreateInstance("@mozilla.org/nullprincipal;1");
+  nsCOMPtr<nsIDOMDocument> domDocument;
+  nsresult rv = nsContentUtils::CreateDocument(EmptyString(),
+                                               EmptyString(),
+                                               nsnull,
+                                               uri,
+                                               uri,
+                                               principal,
+                                               nsnull,
+                                               DocumentFlavorHTML,
+                                               getter_AddRefs(domDocument));
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  nsCOMPtr<nsIDocument> document = do_QueryInterface(domDocument);
+  rv = nsContentUtils::ParseDocumentHTML(aFromStr, document, false);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  nsTreeSanitizer sanitizer(aFlags);
+  sanitizer.Sanitize(document);
+
+  nsCOMPtr<nsIDocumentEncoder> encoder =
+    do_CreateInstance(NS_DOC_ENCODER_CONTRACTID_BASE "text/html");
+
+  encoder->NativeInit(document,
+                      NS_LITERAL_STRING("text/html"),
+                      nsIDocumentEncoder::OutputDontRewriteEncodingDeclaration |
+                      nsIDocumentEncoder::OutputNoScriptContent |
+                      nsIDocumentEncoder::OutputEncodeBasicEntities |
+                      nsIDocumentEncoder::OutputLFLineBreak |
+                      nsIDocumentEncoder::OutputRaw);
+
+  return encoder->EncodeToString(aToStr);
+}
+
 // The feed version of nsContentUtils::CreateContextualFragment It
 // creates a fragment, but doesn't go to all the effort to preserve
 // context like innerHTML does, because feed DOMs shouldn't have that.
 NS_IMETHODIMP
-nsParserUtils::ParseFragment(const nsAString &aFragment,
+nsParserUtils::ParseFragment(const nsAString& aFragment,
                                       bool aIsXML,
                                       nsIURI* aBaseURI,
                                       nsIDOMElement* aContextElement,
                                       nsIDOMDocumentFragment** aReturn)
 {
   NS_ENSURE_ARG(aContextElement);
   *aReturn = nsnull;
 
@@ -192,17 +234,17 @@ nsParserUtils::ParseFragment(const nsASt
                           spec16,
                           false);
           }
           node = node->GetNextSibling();
         }
       }
     }
     if (fragment) {
-      nsTreeSanitizer sanitizer(false, false);
+      nsTreeSanitizer sanitizer;
       sanitizer.Sanitize(fragment);
     }
   }
 
   if (scripts_enabled)
       loader->SetEnabled(true);
   
   return rv;