parser/html/nsHtml5Highlighter.h
author Franziskus Kiefer <franziskuskiefer@gmail.com>
Fri, 01 Jun 2018 09:44:01 +0200
changeset 420853 4f9eec6361279d8657ffc4e6ef5c84e8f5d08c56
parent 408498 6af1f5ac596b4aa0a87f8a8151395c692eb81a58
child 448947 6f3709b3878117466168c40affa7bca0b60cf75b
permissions -rw-r--r--
Bug 1460617 - land NSS 8232a58332dd UPGRADE_NSS_RELEASE, r=me

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsHtml5Highlighter_h
#define nsHtml5Highlighter_h

#include "nsCOMPtr.h"
#include "nsHtml5TreeOperation.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5TreeOperation.h"
#include "nsAHtml5TreeOpSink.h"

#define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512

/**
 * A state machine for generating HTML for display in View Source based on
 * the transitions the tokenizer makes on the source being viewed.
 */
class nsHtml5Highlighter
{
public:
  /**
   * The constructor.
   *
   * @param aOpSink the sink for the tree ops generated by this highlighter
   */
  explicit nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink);

  /**
   * The destructor.
   */
  ~nsHtml5Highlighter();

  /**
   * Starts the generated document.
   */
  void Start(const nsAutoString& aTitle);

  /**
   * Report a tokenizer state transition.
   *
   * @param aState the state being transitioned to
   * @param aReconsume whether this is a reconsuming transition
   * @param aPos the tokenizer's current position into the buffer
   */
  int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos);

  /**
   * Report end of file.
   */
  void End();

  /**
   * Set the current buffer being tokenized
   */
  void SetBuffer(nsHtml5UTF16Buffer* aBuffer);

  /**
   * Let go of the buffer being tokenized but first, flush text from it.
   *
   * @param aPos the first UTF-16 code unit not to flush
   */
  void DropBuffer(int32_t aPos);

  /**
   * Flush the tree ops into the sink.
   *
   * @return true if there were ops to flush
   */
  bool FlushOps();

  /**
   * Linkify the current attribute value if the attribute name is one of
   * known URL attributes. (When executing tree ops, javascript: URLs will
   * not be linkified, though.)
   *
   * @param aName the name of the attribute
   * @param aValue the value of the attribute
   */
  void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
                                  nsHtml5String aValue);

  /**
   * Inform the highlighter that the tokenizer successfully completed a
   * named character reference.
   */
  void CompletedNamedCharacterReference();

  /**
   * Adds an error annotation to the node that's currently on top of
   * mStack.
   *
   * @param aMsgId the id of the message in the property file
   */
  void AddErrorToCurrentNode(const char* aMsgId);

  /**
   * Adds an error annotation to the node that corresponds to the most
   * recently opened markup declaration/tag span, character reference or
   * run of text.
   *
   * @param aMsgId the id of the message in the property file
   */
  void AddErrorToCurrentRun(const char* aMsgId);

  /**
   * Adds an error annotation to the node that corresponds to the most
   * recently opened markup declaration/tag span, character reference or
   * run of text with one atom to use when formatting the message.
   *
   * @param aMsgId the id of the message in the property file
   * @param aName the atom
   */
  void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName);

  /**
   * Adds an error annotation to the node that corresponds to the most
   * recently opened markup declaration/tag span, character reference or
   * run of text with two atoms to use when formatting the message.
   *
   * @param aMsgId the id of the message in the property file
   * @param aName the first atom
   * @param aOther the second atom
   */
  void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName, nsAtom* aOther);

  /**
   * Adds an error annotation to the node that corresponds to the most
   * recent potentially character reference-starting ampersand.
   *
   * @param aMsgId the id of the message in the property file
   */
  void AddErrorToCurrentAmpersand(const char* aMsgId);

  /**
   * Adds an error annotation to the node that corresponds to the most
   * recent potentially self-closing slash.
   *
   * @param aMsgId the id of the message in the property file
   */
  void AddErrorToCurrentSlash(const char* aMsgId);

  /**
   * Enqueues a tree op for adding base to the urls with the view-source:
   *
   * @param aValue the base URL to add
   */
  void AddBase(nsHtml5String aValue);

private:
  /**
   * Starts a span with no class.
   */
  void StartSpan();

  /**
   * Starts a <span> and sets the class attribute on it.
   *
   * @param aClass the class to set (MUST be a static string that does not
   *        need to be released!)
   */
  void StartSpan(const char16_t* aClass);

  /**
   * End the current <span> or <a> in the highlighter output.
   */
  void EndSpanOrA();

  /**
   * Starts a wrapper around a run of characters.
   */
  void StartCharacters();

  /**
   * Ends a wrapper around a run of characters.
   */
  void EndCharactersAndStartMarkupRun();

  /**
   * Starts an <a>.
   */
  void StartA();

  /**
   * Flushes characters up to but not including the current one.
   */
  void FlushChars();

  /**
   * Flushes characters up to and including the current one.
   */
  void FlushCurrent();

  /**
   * Finishes highlighting a tag in the input data by closing the open
   * <span> and <a> elements in the highlighter output and then starts
   * another <span> for potentially highlighting characters potentially
   * appearing next.
   */
  void FinishTag();

  /**
   * Adds a class attribute to the current node.
   *
   * @param aClass the class to set (MUST be a static string that does not
   *        need to be released!)
   */
  void AddClass(const char16_t* aClass);

  /**
   * Allocates a handle for an element.
   *
   * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle()
   * in nsHtml5TreeBuilderHSupplement.h.
   *
   * @return the handle
   */
  nsIContent** AllocateContentHandle();

  /**
   * Enqueues an element creation tree operation.
   *
   * @param aName the name of the element
   * @param aAttributes the attribute holder (ownership will be taken) or
   *        nullptr for no attributes
   * @param aIntendedParent the intended parent node for the created element
   * @param aCreator the content creator function
   * @return the handle for the element that will be created
   */
  nsIContent** CreateElement(nsAtom* aName,
                             nsHtml5HtmlAttributes* aAttributes,
                             nsIContent** aIntendedParent,
                             mozilla::dom::HTMLContentCreatorFunction aCreator);

  /**
   * Gets the handle for the current node. May be called only after the
   * root element has been set.
   *
   * @return the handle for the current node
   */
  nsIContent** CurrentNode();

  /**
   * Create an element and push it (its handle) on the stack.
   *
   * @param aName the name of the element
   * @param aAttributes the attribute holder (ownership will be taken) or
   *        nullptr for no attributes
   * @param aCreator the content creator function
   */
  void Push(nsAtom* aName,
            nsHtml5HtmlAttributes* aAttributes,
            mozilla::dom::HTMLContentCreatorFunction aCreator);

  /**
   * Pops the current node off the stack.
   */
  void Pop();

  /**
   * Appends text content to the current node.
   *
   * @param aBuffer the buffer to copy from
   * @param aStart the index of the first code unit to copy
   * @param aLength the number of code units to copy
   */
  void AppendCharacters(const char16_t* aBuffer,
                        int32_t aStart,
                        int32_t aLength);

  /**
   * Enqueues a tree op for adding an href attribute with the view-source:
   * URL scheme to the current node.
   *
   * @param aValue the (potentially relative) URL to link to
   */
  void AddViewSourceHref(nsHtml5String aValue);

  /**
   * The state we are transitioning away from.
   */
  int32_t mState;

  /**
   * The index of the first UTF-16 code unit in mBuffer that hasn't been
   * flushed yet.
   */
  int32_t mCStart;

  /**
   * The position of the code unit in mBuffer that caused the current
   * transition.
   */
  int32_t mPos;

  /**
   * The current line number.
   */
  int32_t mLineNumber;

  /**
   * The number of inline elements open inside the <pre> excluding the
   * span potentially wrapping a run of characters.
   */
  int32_t mInlinesOpen;

  /**
   * Whether there's a span wrapping a run of characters (excluding CDATA
   * section) open.
   */
  bool mInCharacters;

  /**
   * The current buffer being tokenized.
   */
  nsHtml5UTF16Buffer* mBuffer;

  /**
   * The outgoing tree op queue.
   */
  nsTArray<nsHtml5TreeOperation> mOpQueue;

  /**
   * The tree op stage for the tree op executor.
   */
  nsAHtml5TreeOpSink* mOpSink;

  /**
   * The most recently opened markup declaration/tag or run of characters.
   */
  nsIContent** mCurrentRun;

  /**
   * The most recent ampersand in a place where character references were
   * allowed.
   */
  nsIContent** mAmpersand;

  /**
   * The most recent slash that might become a self-closing slash.
   */
  nsIContent** mSlash;

  /**
   * Memory for element handles.
   */
  mozilla::UniquePtr<nsIContent* []> mHandles;

  /**
   * Number of handles used in mHandles
   */
  int32_t mHandlesUsed;

  /**
   * A holder for old contents of mHandles
   */
  nsTArray<mozilla::UniquePtr<nsIContent* []>> mOldHandles;

  /**
   * The element stack.
   */
  nsTArray<nsIContent**> mStack;

  /**
   * The string "comment"
   */
  static char16_t sComment[];

  /**
   * The string "cdata"
   */
  static char16_t sCdata[];

  /**
   * The string "start-tag"
   */
  static char16_t sStartTag[];

  /**
   * The string "attribute-name"
   */
  static char16_t sAttributeName[];

  /**
   * The string "attribute-value"
   */
  static char16_t sAttributeValue[];

  /**
   * The string "end-tag"
   */
  static char16_t sEndTag[];

  /**
   * The string "doctype"
   */
  static char16_t sDoctype[];

  /**
   * The string "entity"
   */
  static char16_t sEntity[];

  /**
   * The string "pi"
   */
  static char16_t sPi[];

  /**
   * Whether base is already visited once.
   */
  bool mSeenBase;
};

#endif // nsHtml5Highlighter_h