Bug 482921 part 4 - Support XML syntax highlighting in the parser core. r=Olli.Pettay.
authorHenri Sivonen <hsivonen@iki.fi>
Tue, 01 Nov 2011 13:33:11 +0200
changeset 80855 ee743a986ea130eae786f3d64933fdb343150cb1
parent 80854 8a18316c38652ab13fbb4168463c816e2642b293
child 80856 e06b72c42fc82ab88b579d107f99d0aa5dc7a587
push id434
push userclegnitto@mozilla.com
push dateWed, 21 Dec 2011 12:10:54 +0000
treeherdermozilla-beta@bddb6ed8dd47 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersOlli
bugs482921
milestone10.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 482921 part 4 - Support XML syntax highlighting in the parser core. r=Olli.Pettay.
parser/html/javasrc/Tokenizer.java
parser/html/javasrc/TreeBuilder.java
parser/html/nsHtml5AtomList.h
parser/html/nsHtml5Highlighter.cpp
parser/html/nsHtml5Highlighter.h
parser/html/nsHtml5Parser.cpp
parser/html/nsHtml5StreamParser.cpp
parser/html/nsHtml5Tokenizer.cpp
parser/html/nsHtml5Tokenizer.h
parser/html/nsHtml5TreeBuilder.cpp
--- a/parser/html/javasrc/Tokenizer.java
+++ b/parser/html/javasrc/Tokenizer.java
@@ -211,16 +211,20 @@ public class Tokenizer implements Locato
     public static final int SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69;
 
     public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70;
 
     public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71;
 
     public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;
 
+    public static final int PROCESSING_INSTRUCTION = 73;
+
+    public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74;
+
     /**
      * Magic value for UTF-16 operations.
      */
     private static final int LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
 
     /**
      * UTF-16 code unit array containing less than and greater than for emitting
      * those characters on certain parse errors.
@@ -500,16 +504,18 @@ public class Tokenizer implements Locato
     private boolean shouldSuspend;
 
     protected boolean confident;
 
     private int line;
 
     private Interner interner;
 
+    // CPPONLY: private boolean viewingXmlSource;
+
     // [NOCPP[
 
     protected LocatorImpl ampersandLocation;
 
     public Tokenizer(TokenHandler tokenHandler, boolean newAttributesEachTime) {
         this.tokenHandler = tokenHandler;
         this.encodingDeclarationHandler = null;
         this.newAttributesEachTime = newAttributesEachTime;
@@ -526,42 +532,49 @@ public class Tokenizer implements Locato
     // ]NOCPP]
 
     /**
      * The constructor.
      * 
      * @param tokenHandler
      *            the handler for receiving tokens
      */
-    public Tokenizer(TokenHandler tokenHandler) {
+    public Tokenizer(TokenHandler tokenHandler
+    // CPPONLY: , boolean viewingXmlSource        
+    ) {
         this.tokenHandler = tokenHandler;
         this.encodingDeclarationHandler = null;
         // [NOCPP[
         this.newAttributesEachTime = false;
         // ]NOCPP]
         this.bmpChar = new char[1];
         this.astralChar = new char[2];
         this.tagName = null;
         this.attributeName = null;
         this.doctypeName = null;
         this.publicIdentifier = null;
         this.systemIdentifier = null;
         this.attributes = null;
+    // CPPONLY: this.viewingXmlSource = viewingXmlSource;
     }
 
     public void setInterner(Interner interner) {
         this.interner = interner;
     }
 
     public void initLocation(String newPublicId, String newSystemId) {
         this.systemId = newSystemId;
         this.publicId = newPublicId;
 
     }
 
+    // CPPONLY: boolean isViewingXmlSource() {
+    // CPPONLY: return viewingXmlSource;
+    // CPPONLY: }
+
     // [NOCPP[
 
     /**
      * Returns the mappingLangToXmlLang.
      * 
      * @return the mappingLangToXmlLang
      */
     public boolean isMappingLangToXmlLang() {
@@ -1113,20 +1126,26 @@ public class Tokenizer implements Locato
         HtmlAttributes attrs = (attributes == null ? HtmlAttributes.EMPTY_ATTRIBUTES
                 : attributes);
         if (endTag) {
             /*
              * When an end tag token is emitted, the content model flag must be
              * switched to the PCDATA state.
              */
             maybeErrAttributesOnEndTag(attrs);
+            // CPPONLY: if (!viewingXmlSource) {
             tokenHandler.endTag(tagName);
+            // CPPONLY: }
             Portability.delete(attributes);
         } else {
+            // CPPONLY: if (viewingXmlSource) {
+            // CPPONLY: Portability.delete(attributes);
+            // CPPONLY: } else {
             tokenHandler.startTag(tagName, attrs, selfClosing);
+            // CPPONLY: }
         }
         tagName.release();
         tagName = null;
         resetAttributes();
         /*
          * The token handler may have called setStateAndEndTagExpectation
          * and changed stateSave since the start of this method.
          */
@@ -1529,16 +1548,23 @@ public class Tokenizer implements Locato
                             case '/':
                                 /*
                                  * U+002F SOLIDUS (/) Switch to the close tag
                                  * open state.
                                  */
                                 state = transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos);
                                 continue stateloop;
                             case '?':
+                                // CPPONLY: if (viewingXmlSource) {
+                                // CPPONLY: state = transition(state,
+                                // CPPONLY: Tokenizer.PROCESSING_INSTRUCTION,
+                                // CPPONLY: reconsume,
+                                // CPPONLY: pos);
+                                // CPPONLY: continue stateloop;
+                                // CPPONLY: }
                                 /*
                                  * U+003F QUESTION MARK (?) Parse error.
                                  */
                                 errProcessingInstruction();
                                 /*
                                  * Switch to the bogus comment state.
                                  */
                                 clearLongStrBufAndAppend(c);
@@ -5711,16 +5737,51 @@ public class Tokenizer implements Locato
                                 appendLongStrBuf(c);
                                 /*
                                  * Stay in the DOCTYPE public identifier
                                  * (single-quoted) state.
                                  */
                                 continue;
                         }
                     }
+                    // XXX reorder point
+                case PROCESSING_INSTRUCTION:
+                    processinginstructionloop: for (;;) {
+                        if (++pos == endPos) {
+                            break stateloop;
+                        }
+                        c = checkChar(buf, pos);
+                        switch (c) {
+                            case '?':
+                                state = transition(
+                                        state,
+                                        Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK,
+                                        reconsume, pos);
+                                break processinginstructionloop;
+                            // continue stateloop;
+                            default:
+                                continue;
+                        }
+                    }
+                case PROCESSING_INSTRUCTION_QUESTION_MARK:
+                    if (++pos == endPos) {
+                        break stateloop;
+                    }
+                    c = checkChar(buf, pos);
+                    switch (c) {
+                        case '>':
+                            state = transition(state, Tokenizer.DATA,
+                                    reconsume, pos);
+                            continue stateloop;
+                        default:
+                            state = transition(state,
+                                    Tokenizer.PROCESSING_INSTRUCTION,
+                                    reconsume, pos);
+                            continue stateloop;
+                    }
                     // END HOTSPOT WORKAROUND
             }
         }
         flushChars(buf, pos);
         /*
          * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
          */
         // Save locals
--- a/parser/html/javasrc/TreeBuilder.java
+++ b/parser/html/javasrc/TreeBuilder.java
@@ -570,16 +570,28 @@ public abstract class TreeBuilder<T> imp
                         contextName);
             } else {
                 tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA, contextName);
             }
             contextName = null;
             contextNode = null;
         } else {
             mode = INITIAL;
+            // If we are viewing XML source, put a foreign element permanently
+            // on the stack so that cdataSectionAllowed() returns true.
+            // CPPONLY: if (tokenizer.isViewingXmlSource()) {
+            // CPPONLY: T elt = createElement("http://www.w3.org/2000/svg",
+            // CPPONLY: "svg",
+            // CPPONLY: tokenizer.emptyAttributes());
+            // CPPONLY: StackNode<T> node = new StackNode<T>(ElementName.SVG,
+            // CPPONLY: "svg",
+            // CPPONLY: elt);
+            // CPPONLY: currentPtr++;
+            // CPPONLY: stack[currentPtr] = node;
+            // CPPONLY: }
         }
     }
 
     public final void doctype(@Local String name, String publicIdentifier,
             String systemIdentifier, boolean forceQuirks) throws SAXException {
         needToDropLF = false;
         if (!isInForeign()) {
             switch (mode) {
@@ -851,16 +863,19 @@ public abstract class TreeBuilder<T> imp
     }
 
     /**
      * @see nu.validator.htmlparser.common.TokenHandler#characters(char[], int,
      *      int)
      */
     public final void characters(@Const @NoLength char[] buf, int start, int length)
             throws SAXException {
+        // CPPONLY: if (tokenizer.isViewingXmlSource()) {
+        // CPPONLY: return;
+        // CPPONLY: }
         if (needToDropLF) {
             needToDropLF = false;
             if (buf[start] == '\n') {
                 start++;
                 length--;
                 if (length == 0) {
                     return;
                 }
--- a/parser/html/nsHtml5AtomList.h
+++ b/parser/html/nsHtml5AtomList.h
@@ -26,16 +26,17 @@ HTML5_ATOM(textarea, "textarea")
 HTML5_ATOM(style, "style")
 HTML5_ATOM(xmp, "xmp")
 HTML5_ATOM(iframe, "iframe")
 HTML5_ATOM(noembed, "noembed")
 HTML5_ATOM(noframes, "noframes")
 HTML5_ATOM(noscript, "noscript")
 HTML5_ATOM(plaintext, "plaintext")
 HTML5_ATOM(script, "script")
+HTML5_ATOM(svg, "svg")
 HTML5_ATOM(table, "table")
 HTML5_ATOM(caption, "caption")
 HTML5_ATOM(p, "p")
 HTML5_ATOM(address, "address")
 HTML5_ATOM(div, "div")
 HTML5_ATOM(a, "a")
 HTML5_ATOM(nobr, "nobr")
 HTML5_ATOM(input, "input")
@@ -766,17 +767,16 @@ HTML5_ATOM(map, "map")
 HTML5_ATOM(mtr, "mtr")
 HTML5_ATOM(neq, "neq")
 HTML5_ATOM(not_, "not")
 HTML5_ATOM(nav, "nav")
 HTML5_ATOM(pre, "pre")
 HTML5_ATOM(rem, "rem")
 HTML5_ATOM(sub, "sub")
 HTML5_ATOM(sec, "sec")
-HTML5_ATOM(svg, "svg")
 HTML5_ATOM(sum, "sum")
 HTML5_ATOM(sin, "sin")
 HTML5_ATOM(sep, "sep")
 HTML5_ATOM(sup, "sup")
 HTML5_ATOM(set, "set")
 HTML5_ATOM(tan, "tan")
 HTML5_ATOM(use, "use")
 HTML5_ATOM(var, "var")
--- a/parser/html/nsHtml5Highlighter.cpp
+++ b/parser/html/nsHtml5Highlighter.cpp
@@ -65,16 +65,19 @@ PRUnichar nsHtml5Highlighter::sAttribute
 
 PRUnichar nsHtml5Highlighter::sAttributeValue[] =
   { 'a', 't', 't', 'r', 'i', 'b', 'u', 't', 'e', '-',
     'v', 'a', 'l', 'u', 'e', 0 };
 
 PRUnichar nsHtml5Highlighter::sDoctype[] =
   { 'd', 'o', 'c', 't', 'y', 'p', 'e', 0 };
 
+PRUnichar nsHtml5Highlighter::sPi[] =
+  { 'p', 'i', 0 };
+
 nsHtml5Highlighter::nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink)
  : mState(NS_HTML5TOKENIZER_DATA)
  , mCStart(PR_INT32_MAX)
  , mPos(0)
  , mInlinesOpen(0)
  , mInCharacters(false)
  , mBuffer(nsnull)
  , mSyntaxHighlight(Preferences::GetBool("view_source.syntax_highlight",
@@ -183,16 +186,19 @@ nsHtml5Highlighter::Transition(PRInt32 a
     case NS_HTML5TOKENIZER_TAG_OPEN:
       switch (aState) {
         case NS_HTML5TOKENIZER_TAG_NAME:
           StartSpan(sStartTag);
           break;
         case NS_HTML5TOKENIZER_DATA:
           FinishTag(); // DATA
           break;
+        case NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION:
+          AddClass(sPi);
+          break;
       }
       break;
     case NS_HTML5TOKENIZER_TAG_NAME:
       switch (aState) {
         case NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME:
           EndInline(); // NS_HTML5TOKENIZER_TAG_NAME
           break;
         case NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG:
@@ -451,16 +457,21 @@ nsHtml5Highlighter::Transition(PRInt32 a
     case NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
     case NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
     case NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
       if (aState == NS_HTML5TOKENIZER_DATA) {
         AddClass(sDoctype);
         FinishTag();
       }
       break;
+    case NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK:
+      if (aState == NS_HTML5TOKENIZER_DATA) {
+        FinishTag();
+      }
+      break;
     default:
       break;
   }
   mState = aState;
   return aState;
 }
 
 void
--- a/parser/html/nsHtml5Highlighter.h
+++ b/parser/html/nsHtml5Highlighter.h
@@ -428,11 +428,16 @@ class nsHtml5Highlighter
      * The string "doctype"
      */
     static PRUnichar sDoctype[];
 
     /**
      * The string "entity"
      */
     static PRUnichar sEntity[];
+
+    /**
+     * The string "pi"
+     */
+    static PRUnichar sPi[];
 };
 
 #endif // nsHtml5Highlighter_h_
--- a/parser/html/nsHtml5Parser.cpp
+++ b/parser/html/nsHtml5Parser.cpp
@@ -85,17 +85,17 @@ NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(ns
   tmp->DropStreamParser();
 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
 
 nsHtml5Parser::nsHtml5Parser()
   : mFirstBuffer(new nsHtml5OwningUTF16Buffer((void*)nsnull))
   , mLastBuffer(mFirstBuffer)
   , mExecutor(new nsHtml5TreeOpExecutor())
   , mTreeBuilder(new nsHtml5TreeBuilder(mExecutor, nsnull))
-  , mTokenizer(new nsHtml5Tokenizer(mTreeBuilder))
+  , mTokenizer(new nsHtml5Tokenizer(mTreeBuilder, false))
   , mRootContextLineNumber(1)
 {
   mAtomTable.Init(); // we aren't checking for OOM anyway...
   mTokenizer->setInterner(&mAtomTable);
   // There's a zeroing operator new for everything else
 }
 
 nsHtml5Parser::~nsHtml5Parser()
@@ -489,17 +489,17 @@ nsHtml5Parser::Parse(const nsAString& aS
       mDocWriteSpeculatorActive = true;
       if (!mDocWriteSpeculativeTreeBuilder) {
         // Lazily initialize if uninitialized
         mDocWriteSpeculativeTreeBuilder =
             new nsHtml5TreeBuilder(nsnull, mExecutor->GetStage());
         mDocWriteSpeculativeTreeBuilder->setScriptingEnabled(
             mTreeBuilder->isScriptingEnabled());
         mDocWriteSpeculativeTokenizer =
-            new nsHtml5Tokenizer(mDocWriteSpeculativeTreeBuilder);
+            new nsHtml5Tokenizer(mDocWriteSpeculativeTreeBuilder, false);
         mDocWriteSpeculativeTokenizer->setInterner(&mAtomTable);
         mDocWriteSpeculativeTokenizer->start();
       }
       mDocWriteSpeculativeTokenizer->resetToDataState();
       mDocWriteSpeculativeTreeBuilder->loadState(mTreeBuilder, &mAtomTable);
       mDocWriteSpeculativeLastWasCR = false;
     }
 
--- a/parser/html/nsHtml5StreamParser.cpp
+++ b/parser/html/nsHtml5StreamParser.cpp
@@ -180,17 +180,17 @@ nsHtml5StreamParser::nsHtml5StreamParser
   : mFirstBuffer(nsnull) // Will be filled when starting
   , mLastBuffer(nsnull) // Will be filled when starting
   , mExecutor(aExecutor)
   , mTreeBuilder(new nsHtml5TreeBuilder((aMode == VIEW_SOURCE_HTML ||
                                          aMode == VIEW_SOURCE_XML) ?
                                              nsnull : mExecutor->GetStage(),
                                          aMode == NORMAL ?
                                              mExecutor->GetStage() : nsnull))
-  , mTokenizer(new nsHtml5Tokenizer(mTreeBuilder))
+  , mTokenizer(new nsHtml5Tokenizer(mTreeBuilder, aMode == VIEW_SOURCE_XML))
   , mTokenizerMutex("nsHtml5StreamParser mTokenizerMutex")
   , mOwner(aOwner)
   , mSpeculationMutex("nsHtml5StreamParser mSpeculationMutex")
   , mTerminatedMutex("nsHtml5StreamParser mTerminatedMutex")
   , mThread(nsHtml5Module::GetStreamParserThread())
   , mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor))
   , mLoadFlusher(new nsHtml5LoadFlusher(aExecutor))
   , mFlushTimer(do_CreateInstance("@mozilla.org/timer;1"))
--- a/parser/html/nsHtml5Tokenizer.cpp
+++ b/parser/html/nsHtml5Tokenizer.cpp
@@ -83,27 +83,28 @@ static PRUnichar const IFRAME_ARR_DATA[]
 staticJArray<PRUnichar,PRInt32> nsHtml5Tokenizer::IFRAME_ARR = { IFRAME_ARR_DATA, NS_ARRAY_LENGTH(IFRAME_ARR_DATA) };
 static PRUnichar const NOEMBED_ARR_DATA[] = { 'n', 'o', 'e', 'm', 'b', 'e', 'd' };
 staticJArray<PRUnichar,PRInt32> nsHtml5Tokenizer::NOEMBED_ARR = { NOEMBED_ARR_DATA, NS_ARRAY_LENGTH(NOEMBED_ARR_DATA) };
 static PRUnichar const NOSCRIPT_ARR_DATA[] = { 'n', 'o', 's', 'c', 'r', 'i', 'p', 't' };
 staticJArray<PRUnichar,PRInt32> nsHtml5Tokenizer::NOSCRIPT_ARR = { NOSCRIPT_ARR_DATA, NS_ARRAY_LENGTH(NOSCRIPT_ARR_DATA) };
 static PRUnichar const NOFRAMES_ARR_DATA[] = { 'n', 'o', 'f', 'r', 'a', 'm', 'e', 's' };
 staticJArray<PRUnichar,PRInt32> nsHtml5Tokenizer::NOFRAMES_ARR = { NOFRAMES_ARR_DATA, NS_ARRAY_LENGTH(NOFRAMES_ARR_DATA) };
 
-nsHtml5Tokenizer::nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler)
+nsHtml5Tokenizer::nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource)
   : tokenHandler(tokenHandler),
     encodingDeclarationHandler(nsnull),
     bmpChar(jArray<PRUnichar,PRInt32>::newJArray(1)),
     astralChar(jArray<PRUnichar,PRInt32>::newJArray(2)),
     tagName(nsnull),
     attributeName(nsnull),
     doctypeName(nsnull),
     publicIdentifier(nsnull),
     systemIdentifier(nsnull),
-    attributes(nsnull)
+    attributes(nsnull),
+    viewingXmlSource(viewingXmlSource)
 {
   MOZ_COUNT_CTOR(nsHtml5Tokenizer);
 }
 
 void 
 nsHtml5Tokenizer::setInterner(nsHtml5AtomTable* interner)
 {
   this->interner = interner;
@@ -111,16 +112,22 @@ nsHtml5Tokenizer::setInterner(nsHtml5Ato
 
 void 
 nsHtml5Tokenizer::initLocation(nsString* newPublicId, nsString* newSystemId)
 {
   this->systemId = newSystemId;
   this->publicId = newPublicId;
 }
 
+bool 
+nsHtml5Tokenizer::isViewingXmlSource()
+{
+  return viewingXmlSource;
+}
+
 void 
 nsHtml5Tokenizer::setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsIAtom* endTagExpectation)
 {
   this->stateSave = specialTokenizerState;
   if (specialTokenizerState == NS_HTML5TOKENIZER_DATA) {
     return;
   }
   autoJArray<PRUnichar,PRInt32> asArray = nsHtml5Portability::newCharArrayFromLocal(endTagExpectation);
@@ -292,20 +299,26 @@ PRInt32
 nsHtml5Tokenizer::emitCurrentTagToken(bool selfClosing, PRInt32 pos)
 {
   cstart = pos + 1;
   maybeErrSlashInEndTag(selfClosing);
   stateSave = NS_HTML5TOKENIZER_DATA;
   nsHtml5HtmlAttributes* attrs = (!attributes ? nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES : attributes);
   if (endTag) {
     maybeErrAttributesOnEndTag(attrs);
-    tokenHandler->endTag(tagName);
+    if (!viewingXmlSource) {
+      tokenHandler->endTag(tagName);
+    }
     delete attributes;
   } else {
-    tokenHandler->startTag(tagName, attrs, selfClosing);
+    if (viewingXmlSource) {
+      delete attributes;
+    } else {
+      tokenHandler->startTag(tagName, attrs, selfClosing);
+    }
   }
   tagName->release();
   tagName = nsnull;
   resetAttributes();
   return stateSave;
 }
 
 void 
@@ -472,16 +485,20 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
               state = NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN;
               NS_HTML5_CONTINUE(stateloop);
             }
             case '/': {
               state = NS_HTML5TOKENIZER_CLOSE_TAG_OPEN;
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\?': {
+              if (viewingXmlSource) {
+                state = NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION;
+                NS_HTML5_CONTINUE(stateloop);
+              }
 
               clearLongStrBufAndAppend(c);
               state = NS_HTML5TOKENIZER_BOGUS_COMMENT;
               NS_HTML5_CONTINUE(stateloop);
             }
             case '>': {
 
               tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 2);
@@ -3265,16 +3282,50 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
             }
             default: {
               appendLongStrBuf(c);
               continue;
             }
           }
         }
       }
+      case NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION: {
+        for (; ; ) {
+          if (++pos == endPos) {
+            NS_HTML5_BREAK(stateloop);
+          }
+          c = checkChar(buf, pos);
+          switch(c) {
+            case '\?': {
+              state = NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK;
+              NS_HTML5_BREAK(processinginstructionloop);
+            }
+            default: {
+              continue;
+            }
+          }
+        }
+        processinginstructionloop_end: ;
+      }
+      case NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK: {
+        if (++pos == endPos) {
+          NS_HTML5_BREAK(stateloop);
+        }
+        c = checkChar(buf, pos);
+        switch(c) {
+          case '>': {
+            state = NS_HTML5TOKENIZER_DATA;
+            NS_HTML5_CONTINUE(stateloop);
+          }
+          default: {
+            state = NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION;
+            NS_HTML5_CONTINUE(stateloop);
+          }
+        }
+      }
     }
   }
   stateloop_end: ;
   flushChars(buf, pos);
   stateSave = state;
   returnStateSave = returnState;
   return pos;
 }
@@ -3348,16 +3399,20 @@ nsHtml5Tokenizer::stateLoopReportTransit
               state = mViewSource->Transition(NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '/': {
               state = mViewSource->Transition(NS_HTML5TOKENIZER_CLOSE_TAG_OPEN, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\?': {
+              if (viewingXmlSource) {
+                state = mViewSource->Transition(NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION, reconsume, pos);
+                NS_HTML5_CONTINUE(stateloop);
+              }
               errProcessingInstruction();
               clearLongStrBufAndAppend(c);
               state = mViewSource->Transition(NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '>': {
               errLtGt();
               tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 2);
@@ -6151,16 +6206,50 @@ nsHtml5Tokenizer::stateLoopReportTransit
             }
             default: {
               appendLongStrBuf(c);
               continue;
             }
           }
         }
       }
+      case NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION: {
+        for (; ; ) {
+          if (++pos == endPos) {
+            NS_HTML5_BREAK(stateloop);
+          }
+          c = checkChar(buf, pos);
+          switch(c) {
+            case '\?': {
+              state = mViewSource->Transition(NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK, reconsume, pos);
+              NS_HTML5_BREAK(processinginstructionloop);
+            }
+            default: {
+              continue;
+            }
+          }
+        }
+        processinginstructionloop_end: ;
+      }
+      case NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK: {
+        if (++pos == endPos) {
+          NS_HTML5_BREAK(stateloop);
+        }
+        c = checkChar(buf, pos);
+        switch(c) {
+          case '>': {
+            state = mViewSource->Transition(NS_HTML5TOKENIZER_DATA, reconsume, pos);
+            NS_HTML5_CONTINUE(stateloop);
+          }
+          default: {
+            state = mViewSource->Transition(NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION, reconsume, pos);
+            NS_HTML5_CONTINUE(stateloop);
+          }
+        }
+      }
     }
   }
   stateloop_end: ;
   flushChars(buf, pos);
   stateSave = state;
   returnStateSave = returnState;
   return pos;
 }
--- a/parser/html/nsHtml5Tokenizer.h
+++ b/parser/html/nsHtml5Tokenizer.h
@@ -132,20 +132,22 @@ class nsHtml5Tokenizer
     nsHtml5HtmlAttributes* attributes;
     PRInt32 mappingLangToXmlLang;
     bool shouldSuspend;
   protected:
     bool confident;
   private:
     PRInt32 line;
     nsHtml5AtomTable* interner;
+    bool viewingXmlSource;
   public:
-    nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler);
+    nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
     void setInterner(nsHtml5AtomTable* interner);
     void initLocation(nsString* newPublicId, nsString* newSystemId);
+    bool isViewingXmlSource();
     void setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsIAtom* endTagExpectation);
     void setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsHtml5ElementName* endTagExpectation);
   private:
     void endTagExpectationToArray();
   public:
     void setLineNumber(PRInt32 line);
     inline PRInt32 getLineNumber()
     {
@@ -362,14 +364,16 @@ class nsHtml5Tokenizer
 #define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65
 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66
 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67
 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68
 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69
 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70
 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71
 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72
+#define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73
+#define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74
 #define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
 #define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024
 
 
 #endif
 
--- a/parser/html/nsHtml5TreeBuilder.cpp
+++ b/parser/html/nsHtml5TreeBuilder.cpp
@@ -106,16 +106,22 @@ nsHtml5TreeBuilder::startTokenization(ns
       tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_SCRIPT_DATA, contextName);
     } else {
       tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_DATA, contextName);
     }
     contextName = nsnull;
     contextNode = nsnull;
   } else {
     mode = NS_HTML5TREE_BUILDER_INITIAL;
+    if (tokenizer->isViewingXmlSource()) {
+      nsIContent** elt = createElement(kNameSpaceID_SVG, nsHtml5Atoms::svg, tokenizer->emptyAttributes());
+      nsHtml5StackNode* node = new nsHtml5StackNode(nsHtml5ElementName::ELT_SVG, nsHtml5Atoms::svg, elt);
+      currentPtr++;
+      stack[currentPtr] = node;
+    }
   }
 }
 
 void 
 nsHtml5TreeBuilder::doctype(nsIAtom* name, nsString* publicIdentifier, nsString* systemIdentifier, bool forceQuirks)
 {
   needToDropLF = false;
   if (!isInForeign()) {
@@ -171,16 +177,19 @@ nsHtml5TreeBuilder::comment(PRUnichar* b
   flushCharacters();
   appendComment(stack[currentPtr]->node, buf, start, length);
   return;
 }
 
 void 
 nsHtml5TreeBuilder::characters(const PRUnichar* buf, PRInt32 start, PRInt32 length)
 {
+  if (tokenizer->isViewingXmlSource()) {
+    return;
+  }
   if (needToDropLF) {
     needToDropLF = false;
     if (buf[start] == '\n') {
       start++;
       length--;
       if (!length) {
         return;
       }