parser/html/nsHtml5Highlighter.h
author Nicholas Nethercote <nnethercote@mozilla.com>
Tue, 03 Oct 2017 09:05:19 +1100
changeset 385076 67a8e12324569dd730347187e2ffccae486c758b
parent 374261 048ed01b19e02008b7f8161d70bbf2dbcab15be4
child 408498 6af1f5ac596b4aa0a87f8a8151395c692eb81a58
permissions -rw-r--r--
Bug 1400460 - Rename nsIAtom as nsAtom. r=hiro. (Path is actually r=froydnj.) Bug 1400459 devirtualized nsIAtom so that it is no longer a subclass of nsISupports. This means that nsAtom is now a better name for it than nsIAtom. MozReview-Commit-ID: 91U22X2NydP

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsHtml5Highlighter_h
#define nsHtml5Highlighter_h

#include "nsCOMPtr.h"
#include "nsHtml5TreeOperation.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5TreeOperation.h"
#include "nsAHtml5TreeOpSink.h"

#define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512

/**
 * A state machine for generating HTML for display in View Source based on
 * the transitions the tokenizer makes on the source being viewed.
 */
class nsHtml5Highlighter
{
  public:
    /**
     * The constructor.
     *
     * @param aOpSink the sink for the tree ops generated by this highlighter
     */
    explicit nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink);

    /**
     * The destructor.
     */
    ~nsHtml5Highlighter();

    /**
     * Starts the generated document.
     */
    void Start(const nsAutoString& aTitle);

    /**
     * Report a tokenizer state transition.
     *
     * @param aState the state being transitioned to
     * @param aReconsume whether this is a reconsuming transition
     * @param aPos the tokenizer's current position into the buffer
     */
    int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos);

    /**
     * Report end of file.
     */
    void End();

    /**
     * Set the current buffer being tokenized
     */
    void SetBuffer(nsHtml5UTF16Buffer* aBuffer);

    /**
     * Let go of the buffer being tokenized but first, flush text from it.
     *
     * @param aPos the first UTF-16 code unit not to flush
     */
    void DropBuffer(int32_t aPos);

    /**
     * Flush the tree ops into the sink.
     *
     * @return true if there were ops to flush
     */
    bool FlushOps();

    /**
     * Linkify the current attribute value if the attribute name is one of
     * known URL attributes. (When executing tree ops, javascript: URLs will
     * not be linkified, though.)
     *
     * @param aName the name of the attribute
     * @param aValue the value of the attribute
     */
    void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
                                    nsHtml5String aValue);

    /**
     * Inform the highlighter that the tokenizer successfully completed a
     * named character reference.
     */
    void CompletedNamedCharacterReference();

    /**
     * Adds an error annotation to the node that's currently on top of
     * mStack.
     *
     * @param aMsgId the id of the message in the property file
     */
    void AddErrorToCurrentNode(const char* aMsgId);

    /**
     * Adds an error annotation to the node that corresponds to the most
     * recently opened markup declaration/tag span, character reference or
     * run of text.
     *
     * @param aMsgId the id of the message in the property file
     */
    void AddErrorToCurrentRun(const char* aMsgId);

    /**
     * Adds an error annotation to the node that corresponds to the most
     * recently opened markup declaration/tag span, character reference or
     * run of text with one atom to use when formatting the message.
     *
     * @param aMsgId the id of the message in the property file
     * @param aName the atom
     */
    void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName);

    /**
     * Adds an error annotation to the node that corresponds to the most
     * recently opened markup declaration/tag span, character reference or
     * run of text with two atoms to use when formatting the message.
     *
     * @param aMsgId the id of the message in the property file
     * @param aName the first atom
     * @param aOther the second atom
     */
    void AddErrorToCurrentRun(const char* aMsgId,
                              nsAtom* aName,
                              nsAtom* aOther);

    /**
     * Adds an error annotation to the node that corresponds to the most
     * recent potentially character reference-starting ampersand.
     *
     * @param aMsgId the id of the message in the property file
     */
    void AddErrorToCurrentAmpersand(const char* aMsgId);

    /**
     * Adds an error annotation to the node that corresponds to the most
     * recent potentially self-closing slash.
     *
     * @param aMsgId the id of the message in the property file
     */
    void AddErrorToCurrentSlash(const char* aMsgId);
    
    /**
     * Enqueues a tree op for adding base to the urls with the view-source:
     *
     * @param aValue the base URL to add
     */
    void AddBase(nsHtml5String aValue);

  private:

    /**
     * Starts a span with no class.
     */
    void StartSpan();

    /**
     * Starts a <span> and sets the class attribute on it.
     *
     * @param aClass the class to set (MUST be a static string that does not
     *        need to be released!)
     */
    void StartSpan(const char16_t* aClass);

    /**
     * End the current <span> or <a> in the highlighter output.
     */
    void EndSpanOrA();

    /**
     * Starts a wrapper around a run of characters.
     */
    void StartCharacters();

    /**
     * Ends a wrapper around a run of characters.
     */
    void EndCharactersAndStartMarkupRun();

    /**
     * Starts an <a>.
     */
    void StartA();

    /**
     * Flushes characters up to but not including the current one.
     */
    void FlushChars();

    /**
     * Flushes characters up to and including the current one.
     */
    void FlushCurrent();

    /**
     * Finishes highlighting a tag in the input data by closing the open
     * <span> and <a> elements in the highlighter output and then starts
     * another <span> for potentially highlighting characters potentially
     * appearing next.
     */
    void FinishTag();

    /**
     * Adds a class attribute to the current node.
     *
     * @param aClass the class to set (MUST be a static string that does not
     *        need to be released!)
     */
    void AddClass(const char16_t* aClass);

    /**
     * Allocates a handle for an element.
     *
     * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle()
     * in nsHtml5TreeBuilderHSupplement.h.
     *
     * @return the handle
     */
    nsIContent** AllocateContentHandle();

    /**
     * Enqueues an element creation tree operation.
     *
     * @param aName the name of the element
     * @param aAttributes the attribute holder (ownership will be taken) or
     *        nullptr for no attributes
     * @param aIntendedParent the intended parent node for the created element
     * @param aCreator the content creator function
     * @return the handle for the element that will be created
     */
    nsIContent** CreateElement(
      nsAtom* aName,
      nsHtml5HtmlAttributes* aAttributes,
      nsIContent** aIntendedParent,
      mozilla::dom::HTMLContentCreatorFunction aCreator);

    /**
     * Gets the handle for the current node. May be called only after the
     * root element has been set.
     *
     * @return the handle for the current node
     */
    nsIContent** CurrentNode();

    /**
     * Create an element and push it (its handle) on the stack.
     *
     * @param aName the name of the element
     * @param aAttributes the attribute holder (ownership will be taken) or
     *        nullptr for no attributes
     * @param aCreator the content creator function
     */
    void Push(nsAtom* aName,
              nsHtml5HtmlAttributes* aAttributes,
              mozilla::dom::HTMLContentCreatorFunction aCreator);

    /**
     * Pops the current node off the stack.
     */
    void Pop();

    /**
     * Appends text content to the current node.
     *
     * @param aBuffer the buffer to copy from
     * @param aStart the index of the first code unit to copy
     * @param aLength the number of code units to copy
     */
    void AppendCharacters(const char16_t* aBuffer,
                          int32_t aStart,
                          int32_t aLength);

    /**
     * Enqueues a tree op for adding an href attribute with the view-source:
     * URL scheme to the current node.
     *
     * @param aValue the (potentially relative) URL to link to
     */
    void AddViewSourceHref(nsHtml5String aValue);

    /**
     * The state we are transitioning away from.
     */
    int32_t mState;

    /**
     * The index of the first UTF-16 code unit in mBuffer that hasn't been
     * flushed yet.
     */
    int32_t mCStart;

    /**
     * The position of the code unit in mBuffer that caused the current
     * transition.
     */
    int32_t mPos;

    /**
     * The current line number.
     */
    int32_t mLineNumber;

    /**
     * The number of inline elements open inside the <pre> excluding the
     * span potentially wrapping a run of characters.
     */
    int32_t mInlinesOpen;

    /**
     * Whether there's a span wrapping a run of characters (excluding CDATA
     * section) open.
     */
    bool mInCharacters;

    /**
     * The current buffer being tokenized.
     */
    nsHtml5UTF16Buffer* mBuffer;

    /**
     * The outgoing tree op queue.
     */
    nsTArray<nsHtml5TreeOperation> mOpQueue;

    /**
     * The tree op stage for the tree op executor.
     */
    nsAHtml5TreeOpSink* mOpSink;

    /**
     * The most recently opened markup declaration/tag or run of characters.
     */
    nsIContent** mCurrentRun;

    /**
     * The most recent ampersand in a place where character references were
     * allowed.
     */
    nsIContent** mAmpersand;

    /**
     * The most recent slash that might become a self-closing slash.
     */
    nsIContent** mSlash;

    /**
     * Memory for element handles.
     */
    mozilla::UniquePtr<nsIContent*[]> mHandles;

    /**
     * Number of handles used in mHandles
     */
    int32_t mHandlesUsed;

    /**
     * A holder for old contents of mHandles
     */
    nsTArray<mozilla::UniquePtr<nsIContent*[]>> mOldHandles;

    /**
     * The element stack.
     */
    nsTArray<nsIContent**> mStack;

    /**
     * The string "comment"
     */
    static char16_t sComment[];

    /**
     * The string "cdata"
     */
    static char16_t sCdata[];

    /**
     * The string "start-tag"
     */
    static char16_t sStartTag[];

    /**
     * The string "attribute-name"
     */
    static char16_t sAttributeName[];

    /**
     * The string "attribute-value"
     */
    static char16_t sAttributeValue[];

    /**
     * The string "end-tag"
     */
    static char16_t sEndTag[];

    /**
     * The string "doctype"
     */
    static char16_t sDoctype[];

    /**
     * The string "entity"
     */
    static char16_t sEntity[];

    /**
     * The string "pi"
     */
    static char16_t sPi[];
    
    /**
     * Whether base is already visited once.
     */
     bool mSeenBase;
};

#endif // nsHtml5Highlighter_h