parser/html/nsHtml5Tokenizer.h
author Franziskus Kiefer <franziskuskiefer@gmail.com>
Fri, 01 Jun 2018 09:44:01 +0200
changeset 420853 4f9eec6361279d8657ffc4e6ef5c84e8f5d08c56
parent 408498 6af1f5ac596b4aa0a87f8a8151395c692eb81a58
child 441925 0ff0b54b9ec7c3e99acf8987b63995dbdce3e64a
permissions -rw-r--r--
Bug 1460617 - land NSS 8232a58332dd UPGRADE_NSS_RELEASE, r=me

/*
 * Copyright (c) 2005-2007 Henri Sivonen
 * Copyright (c) 2007-2015 Mozilla Foundation
 * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
 * Foundation, and Opera Software ASA.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

/*
 * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
 * Please edit Tokenizer.java instead and regenerate.
 */

#ifndef nsHtml5Tokenizer_h
#define nsHtml5Tokenizer_h

#include "nsAtom.h"
#include "nsHtml5AtomTable.h"
#include "nsHtml5String.h"
#include "nsIContent.h"
#include "nsTraceRefcnt.h"
#include "jArray.h"
#include "nsHtml5DocumentMode.h"
#include "nsHtml5ArrayCopy.h"
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5NamedCharactersAccel.h"
#include "nsGkAtoms.h"
#include "nsAHtml5TreeBuilderState.h"
#include "nsHtml5Macros.h"
#include "nsHtml5Highlighter.h"
#include "nsHtml5TokenizerLoopPolicies.h"

class nsHtml5StreamParser;

class nsHtml5AttributeName;
class nsHtml5ElementName;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
class nsHtml5UTF16Buffer;
class nsHtml5StateSnapshot;
class nsHtml5Portability;

class nsHtml5Tokenizer
{
private:
  static const int32_t DATA_AND_RCDATA_MASK = ~1;

public:
  static const int32_t DATA = 0;

  static const int32_t RCDATA = 1;

  static const int32_t SCRIPT_DATA = 2;

  static const int32_t RAWTEXT = 3;

  static const int32_t SCRIPT_DATA_ESCAPED = 4;

  static const int32_t ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5;

  static const int32_t ATTRIBUTE_VALUE_SINGLE_QUOTED = 6;

  static const int32_t ATTRIBUTE_VALUE_UNQUOTED = 7;

  static const int32_t PLAINTEXT = 8;

  static const int32_t TAG_OPEN = 9;

  static const int32_t CLOSE_TAG_OPEN = 10;

  static const int32_t TAG_NAME = 11;

  static const int32_t BEFORE_ATTRIBUTE_NAME = 12;

  static const int32_t ATTRIBUTE_NAME = 13;

  static const int32_t AFTER_ATTRIBUTE_NAME = 14;

  static const int32_t BEFORE_ATTRIBUTE_VALUE = 15;

  static const int32_t AFTER_ATTRIBUTE_VALUE_QUOTED = 16;

  static const int32_t BOGUS_COMMENT = 17;

  static const int32_t MARKUP_DECLARATION_OPEN = 18;

  static const int32_t DOCTYPE = 19;

  static const int32_t BEFORE_DOCTYPE_NAME = 20;

  static const int32_t DOCTYPE_NAME = 21;

  static const int32_t AFTER_DOCTYPE_NAME = 22;

  static const int32_t BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23;

  static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24;

  static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25;

  static const int32_t AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26;

  static const int32_t BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27;

  static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28;

  static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29;

  static const int32_t AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30;

  static const int32_t BOGUS_DOCTYPE = 31;

  static const int32_t COMMENT_START = 32;

  static const int32_t COMMENT_START_DASH = 33;

  static const int32_t COMMENT = 34;

  static const int32_t COMMENT_END_DASH = 35;

  static const int32_t COMMENT_END = 36;

  static const int32_t COMMENT_END_BANG = 37;

  static const int32_t NON_DATA_END_TAG_NAME = 38;

  static const int32_t MARKUP_DECLARATION_HYPHEN = 39;

  static const int32_t MARKUP_DECLARATION_OCTYPE = 40;

  static const int32_t DOCTYPE_UBLIC = 41;

  static const int32_t DOCTYPE_YSTEM = 42;

  static const int32_t AFTER_DOCTYPE_PUBLIC_KEYWORD = 43;

  static const int32_t BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44;

  static const int32_t AFTER_DOCTYPE_SYSTEM_KEYWORD = 45;

  static const int32_t CONSUME_CHARACTER_REFERENCE = 46;

  static const int32_t CONSUME_NCR = 47;

  static const int32_t CHARACTER_REFERENCE_TAIL = 48;

  static const int32_t HEX_NCR_LOOP = 49;

  static const int32_t DECIMAL_NRC_LOOP = 50;

  static const int32_t HANDLE_NCR_VALUE = 51;

  static const int32_t HANDLE_NCR_VALUE_RECONSUME = 52;

  static const int32_t CHARACTER_REFERENCE_HILO_LOOKUP = 53;

  static const int32_t SELF_CLOSING_START_TAG = 54;

  static const int32_t CDATA_START = 55;

  static const int32_t CDATA_SECTION = 56;

  static const int32_t CDATA_RSQB = 57;

  static const int32_t CDATA_RSQB_RSQB = 58;

  static const int32_t SCRIPT_DATA_LESS_THAN_SIGN = 59;

  static const int32_t SCRIPT_DATA_ESCAPE_START = 60;

  static const int32_t SCRIPT_DATA_ESCAPE_START_DASH = 61;

  static const int32_t SCRIPT_DATA_ESCAPED_DASH = 62;

  static const int32_t SCRIPT_DATA_ESCAPED_DASH_DASH = 63;

  static const int32_t BOGUS_COMMENT_HYPHEN = 64;

  static const int32_t RAWTEXT_RCDATA_LESS_THAN_SIGN = 65;

  static const int32_t SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66;

  static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_START = 67;

  static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED = 68;

  static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69;

  static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70;

  static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71;

  static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;

  static const int32_t PROCESSING_INSTRUCTION = 73;

  static const int32_t PROCESSING_INSTRUCTION_QUESTION_MARK = 74;

private:
  static const int32_t LEAD_OFFSET = (0xD800 - (0x10000 >> 10));

  static char16_t LT_GT[];
  static char16_t LT_SOLIDUS[];
  static char16_t RSQB_RSQB[];
  static char16_t REPLACEMENT_CHARACTER[];
  static char16_t LF[];
  static char16_t CDATA_LSQB[];
  static char16_t OCTYPE[];
  static char16_t UBLIC[];
  static char16_t YSTEM[];
  static staticJArray<char16_t, int32_t> TITLE_ARR;
  static staticJArray<char16_t, int32_t> SCRIPT_ARR;
  static staticJArray<char16_t, int32_t> STYLE_ARR;
  static staticJArray<char16_t, int32_t> PLAINTEXT_ARR;
  static staticJArray<char16_t, int32_t> XMP_ARR;
  static staticJArray<char16_t, int32_t> TEXTAREA_ARR;
  static staticJArray<char16_t, int32_t> IFRAME_ARR;
  static staticJArray<char16_t, int32_t> NOEMBED_ARR;
  static staticJArray<char16_t, int32_t> NOSCRIPT_ARR;
  static staticJArray<char16_t, int32_t> NOFRAMES_ARR;

protected:
  nsHtml5TreeBuilder* tokenHandler;
  nsHtml5StreamParser* encodingDeclarationHandler;
  bool lastCR;
  int32_t stateSave;

private:
  int32_t returnStateSave;

protected:
  int32_t index;

private:
  bool forceQuirks;
  char16_t additional;
  int32_t entCol;
  int32_t firstCharKey;
  int32_t lo;
  int32_t hi;
  int32_t candidate;
  int32_t charRefBufMark;

protected:
  int32_t value;

private:
  bool seenDigits;

protected:
  int32_t cstart;

private:
  nsHtml5String publicId;
  nsHtml5String systemId;
  autoJArray<char16_t, int32_t> strBuf;
  int32_t strBufLen;
  autoJArray<char16_t, int32_t> charRefBuf;
  int32_t charRefBufLen;
  autoJArray<char16_t, int32_t> bmpChar;
  autoJArray<char16_t, int32_t> astralChar;

protected:
  nsHtml5ElementName* endTagExpectation;

private:
  jArray<char16_t, int32_t> endTagExpectationAsArray;

protected:
  bool endTag;

private:
  bool containsHyphen;
  nsHtml5ElementName* tagName;
  nsHtml5ElementName* nonInternedTagName;

protected:
  nsHtml5AttributeName* attributeName;

private:
  nsHtml5AttributeName* nonInternedAttributeName;
  nsAtom* doctypeName;
  nsHtml5String publicIdentifier;
  nsHtml5String systemIdentifier;
  nsHtml5HtmlAttributes* attributes;
  bool newAttributesEachTime;
  bool shouldSuspend;

protected:
  bool confident;

private:
  int32_t line;
  int32_t attributeLine;
  nsHtml5AtomTable* interner;
  bool viewingXmlSource;

public:
  nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
  void setInterner(nsHtml5AtomTable* interner);
  void initLocation(nsHtml5String newPublicId, nsHtml5String newSystemId);
  bool isViewingXmlSource();
  void setStateAndEndTagExpectation(int32_t specialTokenizerState,
                                    nsAtom* endTagExpectation);
  void setStateAndEndTagExpectation(int32_t specialTokenizerState,
                                    nsHtml5ElementName* endTagExpectation);

private:
  void endTagExpectationToArray();

public:
  void setLineNumber(int32_t line);
  inline int32_t getLineNumber() { return line; }

  nsHtml5HtmlAttributes* emptyAttributes();

private:
  inline void appendCharRefBuf(char16_t c)
  {
    MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length,
                       "Attempted to overrun charRefBuf!");
    charRefBuf[charRefBufLen++] = c;
  }

  void emitOrAppendCharRefBuf(int32_t returnState);
  inline void clearStrBufAfterUse() { strBufLen = 0; }

  inline void clearStrBufBeforeUse()
  {
    MOZ_ASSERT(!strBufLen, "strBufLen not reset after previous use!");
    strBufLen = 0;
  }

  inline void clearStrBufAfterOneHyphen()
  {
    MOZ_ASSERT(strBufLen == 1, "strBufLen length not one!");
    MOZ_ASSERT(strBuf[0] == '-', "strBuf does not start with a hyphen!");
    strBufLen = 0;
  }

  inline void appendStrBuf(char16_t c)
  {
    MOZ_ASSERT(strBufLen < strBuf.length,
               "Previous buffer length insufficient.");
    if (MOZ_UNLIKELY(strBufLen == strBuf.length)) {
      if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) {
        MOZ_CRASH("Unable to recover from buffer reallocation failure");
      }
    }
    strBuf[strBufLen++] = c;
  }

protected:
  nsHtml5String strBufToString();

private:
  void strBufToDoctypeName();
  void emitStrBuf();
  inline void appendSecondHyphenToBogusComment() { appendStrBuf('-'); }

  inline void adjustDoubleHyphenAndAppendToStrBufAndErr(char16_t c)
  {
    errConsecutiveHyphens();
    appendStrBuf(c);
  }

  void appendStrBuf(char16_t* buffer, int32_t offset, int32_t length);
  inline void appendCharRefBufToStrBuf()
  {
    appendStrBuf(charRefBuf, 0, charRefBufLen);
    charRefBufLen = 0;
  }

  void emitComment(int32_t provisionalHyphens, int32_t pos);

protected:
  void flushChars(char16_t* buf, int32_t pos);

private:
  void strBufToElementNameString();
  int32_t emitCurrentTagToken(bool selfClosing, int32_t pos);
  void attributeNameComplete();
  void addAttributeWithoutValue();
  void addAttributeWithValue();

public:
  void start();
  bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);

private:
  template<class P>
  int32_t stateLoop(int32_t state,
                    char16_t c,
                    int32_t pos,
                    char16_t* buf,
                    bool reconsume,
                    int32_t returnState,
                    int32_t endPos);
  void initDoctypeFields();
  inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
  {
    silentCarriageReturn();
    adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
  }

  inline void adjustDoubleHyphenAndAppendToStrBufLineFeed()
  {
    silentLineFeed();
    adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
  }

  inline void appendStrBufLineFeed()
  {
    silentLineFeed();
    appendStrBuf('\n');
  }

  inline void appendStrBufCarriageReturn()
  {
    silentCarriageReturn();
    appendStrBuf('\n');
  }

protected:
  inline void silentCarriageReturn()
  {
    ++line;
    lastCR = true;
  }

  inline void silentLineFeed() { ++line; }

private:
  void emitCarriageReturn(char16_t* buf, int32_t pos);
  void emitReplacementCharacter(char16_t* buf, int32_t pos);
  void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
  void setAdditionalAndRememberAmpersandLocation(char16_t add);
  void bogusDoctype();
  void bogusDoctypeWithoutQuirks();
  void handleNcrValue(int32_t returnState);

public:
  void eof();

private:
  void emitDoctypeToken(int32_t pos);

protected:
  inline char16_t checkChar(char16_t* buf, int32_t pos) { return buf[pos]; }

public:
  bool internalEncodingDeclaration(nsHtml5String internalCharset);

private:
  void emitOrAppendTwo(const char16_t* val, int32_t returnState);
  void emitOrAppendOne(const char16_t* val, int32_t returnState);

public:
  void end();
  void requestSuspension();
  bool isInDataState();
  void resetToDataState();
  void loadState(nsHtml5Tokenizer* other);
  void initializeWithoutStarting();
  void setEncodingDeclarationHandler(
    nsHtml5StreamParser* encodingDeclarationHandler);
  ~nsHtml5Tokenizer();
  static void initializeStatics();
  static void releaseStatics();

#include "nsHtml5TokenizerHSupplement.h"
};

#endif