parser/html/nsHtml5Tokenizer.h
author Henri Sivonen <hsivonen@hsivonen.fi>
Wed, 05 Mar 2014 21:38:50 +0200
changeset 172305 aeef9a68e4a5042a2654cf823ef349bda7f2b1c2
parent 172301 e4c2bb65c694e39555ef6d2891e590f1dceefe45
child 259276 7b3c5dd7ad30fde4bf6dad4a850ebf90a6f2511e
permissions -rw-r--r--
Bug 959150 part 5 - Avoid reallocating the attribute holder when parsing with nsHtml5StringParser. r=smaug.

/*
 * Copyright (c) 2005-2007 Henri Sivonen
 * Copyright (c) 2007-2013 Mozilla Foundation
 * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla 
 * Foundation, and Opera Software ASA.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a 
 * copy of this software and associated documentation files (the "Software"), 
 * to deal in the Software without restriction, including without limitation 
 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 * and/or sell copies of the Software, and to permit persons to whom the 
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in 
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 * DEALINGS IN THE SOFTWARE.
 */

/*
 * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
 * Please edit Tokenizer.java instead and regenerate.
 */

#ifndef nsHtml5Tokenizer_h
#define nsHtml5Tokenizer_h

#include "nsIAtom.h"
#include "nsHtml5AtomTable.h"
#include "nsString.h"
#include "nsIContent.h"
#include "nsTraceRefcnt.h"
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5NamedCharactersAccel.h"
#include "nsHtml5Atoms.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsHtml5Macros.h"
#include "nsHtml5Highlighter.h"
#include "nsHtml5TokenizerLoopPolicies.h"

class nsHtml5StreamParser;

class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5AttributeName;
class nsHtml5ElementName;
class nsHtml5HtmlAttributes;
class nsHtml5UTF16Buffer;
class nsHtml5StateSnapshot;
class nsHtml5Portability;


class nsHtml5Tokenizer
{
  private:
    static char16_t LT_GT[];
    static char16_t LT_SOLIDUS[];
    static char16_t RSQB_RSQB[];
    static char16_t REPLACEMENT_CHARACTER[];
    static char16_t LF[];
    static char16_t CDATA_LSQB[];
    static char16_t OCTYPE[];
    static char16_t UBLIC[];
    static char16_t YSTEM[];
    static staticJArray<char16_t,int32_t> TITLE_ARR;
    static staticJArray<char16_t,int32_t> SCRIPT_ARR;
    static staticJArray<char16_t,int32_t> STYLE_ARR;
    static staticJArray<char16_t,int32_t> PLAINTEXT_ARR;
    static staticJArray<char16_t,int32_t> XMP_ARR;
    static staticJArray<char16_t,int32_t> TEXTAREA_ARR;
    static staticJArray<char16_t,int32_t> IFRAME_ARR;
    static staticJArray<char16_t,int32_t> NOEMBED_ARR;
    static staticJArray<char16_t,int32_t> NOSCRIPT_ARR;
    static staticJArray<char16_t,int32_t> NOFRAMES_ARR;
  protected:
    nsHtml5TreeBuilder* tokenHandler;
    nsHtml5StreamParser* encodingDeclarationHandler;
    bool lastCR;
    int32_t stateSave;
  private:
    int32_t returnStateSave;
  protected:
    int32_t index;
  private:
    bool forceQuirks;
    char16_t additional;
    int32_t entCol;
    int32_t firstCharKey;
    int32_t lo;
    int32_t hi;
    int32_t candidate;
    int32_t strBufMark;
    int32_t prevValue;
  protected:
    int32_t value;
  private:
    bool seenDigits;
  protected:
    int32_t cstart;
  private:
    nsString* publicId;
    nsString* systemId;
    autoJArray<char16_t,int32_t> strBuf;
    int32_t strBufLen;
    autoJArray<char16_t,int32_t> longStrBuf;
    int32_t longStrBufLen;
    autoJArray<char16_t,int32_t> bmpChar;
    autoJArray<char16_t,int32_t> astralChar;
  protected:
    nsHtml5ElementName* endTagExpectation;
  private:
    jArray<char16_t,int32_t> endTagExpectationAsArray;
  protected:
    bool endTag;
  private:
    nsHtml5ElementName* tagName;
  protected:
    nsHtml5AttributeName* attributeName;
  private:
    nsIAtom* doctypeName;
    nsString* publicIdentifier;
    nsString* systemIdentifier;
    nsHtml5HtmlAttributes* attributes;
    bool newAttributesEachTime;
    bool shouldSuspend;
  protected:
    bool confident;
  private:
    int32_t line;
    nsHtml5AtomTable* interner;
    bool viewingXmlSource;
  public:
    nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
    void setInterner(nsHtml5AtomTable* interner);
    void initLocation(nsString* newPublicId, nsString* newSystemId);
    bool isViewingXmlSource();
    void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation);
    void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation);
  private:
    void endTagExpectationToArray();
  public:
    void setLineNumber(int32_t line);
    inline int32_t getLineNumber()
    {
      return line;
    }

    nsHtml5HtmlAttributes* emptyAttributes();
  private:
    inline void clearStrBufAndAppend(char16_t c)
    {
      strBuf[0] = c;
      strBufLen = 1;
    }

    inline void clearStrBuf()
    {
      strBufLen = 0;
    }

    void appendStrBuf(char16_t c);
  protected:
    nsString* strBufToString();
  private:
    void strBufToDoctypeName();
    void emitStrBuf();
    inline void clearLongStrBuf()
    {
      longStrBufLen = 0;
    }

    inline void clearLongStrBufAndAppend(char16_t c)
    {
      longStrBuf[0] = c;
      longStrBufLen = 1;
    }

    void appendLongStrBuf(char16_t c);
    inline void appendSecondHyphenToBogusComment()
    {
      appendLongStrBuf('-');
    }

    inline void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char16_t c)
    {
      errConsecutiveHyphens();
      appendLongStrBuf(c);
    }

    void appendLongStrBuf(char16_t* buffer, int32_t offset, int32_t length);
    inline void appendStrBufToLongStrBuf()
    {
      appendLongStrBuf(strBuf, 0, strBufLen);
    }

    nsString* longStrBufToString();
    void emitComment(int32_t provisionalHyphens, int32_t pos);
  protected:
    void flushChars(char16_t* buf, int32_t pos);
  private:
    void strBufToElementNameString();
    int32_t emitCurrentTagToken(bool selfClosing, int32_t pos);
    void attributeNameComplete();
    void addAttributeWithoutValue();
    void addAttributeWithValue();
  public:
    void start();
    bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
  private:
    template<class P> int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, bool reconsume, int32_t returnState, int32_t endPos);
    void initDoctypeFields();
    inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
    {
      silentCarriageReturn();
      adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
    }

    inline void adjustDoubleHyphenAndAppendToLongStrBufLineFeed()
    {
      silentLineFeed();
      adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
    }

    inline void appendLongStrBufLineFeed()
    {
      silentLineFeed();
      appendLongStrBuf('\n');
    }

    inline void appendLongStrBufCarriageReturn()
    {
      silentCarriageReturn();
      appendLongStrBuf('\n');
    }

  protected:
    inline void silentCarriageReturn()
    {
      ++line;
      lastCR = true;
    }

    inline void silentLineFeed()
    {
      ++line;
    }

  private:
    void emitCarriageReturn(char16_t* buf, int32_t pos);
    void emitReplacementCharacter(char16_t* buf, int32_t pos);
    void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
    void setAdditionalAndRememberAmpersandLocation(char16_t add);
    void bogusDoctype();
    void bogusDoctypeWithoutQuirks();
    void emitOrAppendStrBuf(int32_t returnState);
    void handleNcrValue(int32_t returnState);
  public:
    void eof();
  private:
    void emitDoctypeToken(int32_t pos);
  protected:
    inline char16_t checkChar(char16_t* buf, int32_t pos)
    {
      return buf[pos];
    }

  public:
    bool internalEncodingDeclaration(nsString* internalCharset);
  private:
    void emitOrAppendTwo(const char16_t* val, int32_t returnState);
    void emitOrAppendOne(const char16_t* val, int32_t returnState);
  public:
    void end();
    void requestSuspension();
    bool isInDataState();
    void resetToDataState();
    void loadState(nsHtml5Tokenizer* other);
    void initializeWithoutStarting();
    void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler);
    ~nsHtml5Tokenizer();
    static void initializeStatics();
    static void releaseStatics();

#include "nsHtml5TokenizerHSupplement.h"
};

#define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1
#define NS_HTML5TOKENIZER_DATA 0
#define NS_HTML5TOKENIZER_RCDATA 1
#define NS_HTML5TOKENIZER_SCRIPT_DATA 2
#define NS_HTML5TOKENIZER_RAWTEXT 3
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 4
#define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED 5
#define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED 6
#define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED 7
#define NS_HTML5TOKENIZER_PLAINTEXT 8
#define NS_HTML5TOKENIZER_TAG_OPEN 9
#define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 10
#define NS_HTML5TOKENIZER_TAG_NAME 11
#define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 12
#define NS_HTML5TOKENIZER_ATTRIBUTE_NAME 13
#define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME 14
#define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE 15
#define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED 16
#define NS_HTML5TOKENIZER_BOGUS_COMMENT 17
#define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN 18
#define NS_HTML5TOKENIZER_DOCTYPE 19
#define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME 20
#define NS_HTML5TOKENIZER_DOCTYPE_NAME 21
#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME 22
#define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 23
#define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 24
#define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 25
#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 26
#define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 27
#define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 28
#define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 29
#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 30
#define NS_HTML5TOKENIZER_BOGUS_DOCTYPE 31
#define NS_HTML5TOKENIZER_COMMENT_START 32
#define NS_HTML5TOKENIZER_COMMENT_START_DASH 33
#define NS_HTML5TOKENIZER_COMMENT 34
#define NS_HTML5TOKENIZER_COMMENT_END_DASH 35
#define NS_HTML5TOKENIZER_COMMENT_END 36
#define NS_HTML5TOKENIZER_COMMENT_END_BANG 37
#define NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME 38
#define NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN 39
#define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE 40
#define NS_HTML5TOKENIZER_DOCTYPE_UBLIC 41
#define NS_HTML5TOKENIZER_DOCTYPE_YSTEM 42
#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 43
#define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 44
#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 45
#define NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE 46
#define NS_HTML5TOKENIZER_CONSUME_NCR 47
#define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL 48
#define NS_HTML5TOKENIZER_HEX_NCR_LOOP 49
#define NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP 50
#define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE 51
#define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE_RECONSUME 52
#define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP 53
#define NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG 54
#define NS_HTML5TOKENIZER_CDATA_START 55
#define NS_HTML5TOKENIZER_CDATA_SECTION 56
#define NS_HTML5TOKENIZER_CDATA_RSQB 57
#define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 58
#define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN 59
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 60
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 61
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 62
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 63
#define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 64
#define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72
#define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73
#define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74
#define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
#define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024


#endif