parser/xml/nsSAXXMLReader.cpp
author Florian Queze <florian@queze.net>
Fri, 14 Apr 2017 18:29:12 +0200
changeset 353355 cd981920d0ef8adf66e504b718a4208dc03c7a4c
parent 348941 ed03003277e0cfb0cc09c7b9710f852fcec235e0
child 364546 6e0dd372f23d2f0856ae6c98118cdcf59abd5617
permissions -rw-r--r--
Bug 1355161 - script-generated patch to replace .{currentThread,mainThread}.dispatch(..., Ci.nsIThread.DISPATCH_NORMAL) with .dispatchToMainThread(...), r=froydnj.

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsIInputStream.h"
#include "nsNetCID.h"
#include "nsNetUtil.h"
#include "NullPrincipal.h"
#include "nsIParser.h"
#include "nsParserCIID.h"
#include "nsStreamUtils.h"
#include "nsStringStream.h"
#include "nsIScriptError.h"
#include "nsSAXAttributes.h"
#include "nsSAXLocator.h"
#include "nsSAXXMLReader.h"
#include "nsCharsetSource.h"

#include "mozilla/dom/EncodingUtils.h"

using mozilla::dom::EncodingUtils;

#define XMLNS_URI "http://www.w3.org/2000/xmlns/"

static NS_DEFINE_CID(kParserCID, NS_PARSER_CID);

NS_IMPL_CYCLE_COLLECTION(nsSAXXMLReader,
                         mContentHandler,
                         mDTDHandler,
                         mErrorHandler,
                         mLexicalHandler,
                         mDeclarationHandler,
                         mBaseURI,
                         mListener,
                         mParserObserver)
NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSAXXMLReader)
NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSAXXMLReader)
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSAXXMLReader)
  NS_INTERFACE_MAP_ENTRY(nsISAXXMLReader)
  NS_INTERFACE_MAP_ENTRY(nsIExpatSink)
  NS_INTERFACE_MAP_ENTRY(nsIExtendedExpatSink)
  NS_INTERFACE_MAP_ENTRY(nsIContentSink)
  NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
  NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISAXXMLReader)
NS_INTERFACE_MAP_END

nsSAXXMLReader::nsSAXXMLReader() :
    mIsAsyncParse(false),
    mEnableNamespacePrefixes(false)
{
}

// nsIContentSink
NS_IMETHODIMP
nsSAXXMLReader::WillBuildModel(nsDTDMode)
{
  if (mContentHandler)
    return mContentHandler->StartDocument();

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::DidBuildModel(bool aTerminated)
{
  if (mContentHandler)
    return mContentHandler->EndDocument();

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetParser(nsParserBase *aParser)
{
  return NS_OK;
}

// nsIExtendedExpatSink
NS_IMETHODIMP
nsSAXXMLReader::HandleStartElement(const char16_t *aName,
                                   const char16_t **aAtts,
                                   uint32_t aAttsCount,
                                   uint32_t aLineNumber)
{
  if (!mContentHandler)
    return NS_OK;

  RefPtr<nsSAXAttributes> atts = new nsSAXAttributes();
  if (!atts)
    return NS_ERROR_OUT_OF_MEMORY;
  nsAutoString uri, localName, qName;
  for (; *aAtts; aAtts += 2) {
    SplitExpatName(aAtts[0], uri, localName, qName);
    // XXX don't have attr type information
    NS_NAMED_LITERAL_STRING(cdataType, "CDATA");
    // could support xmlns reporting, it's a standard SAX feature
    if (mEnableNamespacePrefixes || !uri.EqualsLiteral(XMLNS_URI)) {
      NS_ASSERTION(aAtts[1], "null passed to handler");
      atts->AddAttribute(uri, localName, qName, cdataType,
                         nsDependentString(aAtts[1]));
    }
  }

  // Deal with the element name
  SplitExpatName(aName, uri, localName, qName);
  return mContentHandler->StartElement(uri, localName, qName, atts);
}

NS_IMETHODIMP
nsSAXXMLReader::HandleEndElement(const char16_t *aName)
{
  if (mContentHandler) {
    nsAutoString uri, localName, qName;
    SplitExpatName(aName, uri, localName, qName);
    return mContentHandler->EndElement(uri, localName, qName);
  }
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleComment(const char16_t *aName)
{
  NS_ASSERTION(aName, "null passed to handler");
  if (mLexicalHandler)
    return mLexicalHandler->Comment(nsDependentString(aName));
 
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleCDataSection(const char16_t *aData,
                                   uint32_t aLength)
{
  nsresult rv;
  if (mLexicalHandler) {
    rv = mLexicalHandler->StartCDATA();
    NS_ENSURE_SUCCESS(rv, rv);
  }

  if (mContentHandler) {
    rv = mContentHandler->Characters(Substring(aData, aData+aLength));
    NS_ENSURE_SUCCESS(rv, rv);
  }

  if (mLexicalHandler) {
    rv = mLexicalHandler->EndCDATA();
    NS_ENSURE_SUCCESS(rv, rv);
  }

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleStartDTD(const char16_t *aName,
                               const char16_t *aSystemId,
                               const char16_t *aPublicId)
{
  char16_t nullChar = char16_t(0);
  if (!aName)
    aName = &nullChar;
  if (!aSystemId)
    aSystemId = &nullChar;
  if (!aPublicId)
    aPublicId = &nullChar;

  mSystemId = aSystemId;
  mPublicId = aPublicId;
  if (mLexicalHandler) {
    return mLexicalHandler->StartDTD(nsDependentString(aName),
                                     nsDependentString(aPublicId),
                                     nsDependentString(aSystemId));
  }

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleDoctypeDecl(const nsAString & aSubset,
                                  const nsAString & aName,
                                  const nsAString & aSystemId,
                                  const nsAString & aPublicId,
                                  nsISupports* aCatalogData)
{
  if (mLexicalHandler)
    return mLexicalHandler->EndDTD();

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleCharacterData(const char16_t *aData,
                                    uint32_t aLength)
{
  if (mContentHandler)
    return mContentHandler->Characters(Substring(aData, aData+aLength));

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleStartNamespaceDecl(const char16_t *aPrefix,
                                         const char16_t *aUri)
{
  if (!mContentHandler)
    return NS_OK;
  
  char16_t nullChar = char16_t(0);
  if (!aPrefix)
    aPrefix = &nullChar;
  if (!aUri)
    aUri = &nullChar;

  return mContentHandler->StartPrefixMapping(nsDependentString(aPrefix),
                                             nsDependentString(aUri));
}

NS_IMETHODIMP
nsSAXXMLReader::HandleEndNamespaceDecl(const char16_t *aPrefix)
{
  if (!mContentHandler)
    return NS_OK;
  
  if (aPrefix)
    return mContentHandler->EndPrefixMapping(nsDependentString(aPrefix));

  return mContentHandler->EndPrefixMapping(EmptyString());
}

NS_IMETHODIMP
nsSAXXMLReader::HandleProcessingInstruction(const char16_t *aTarget,
                                            const char16_t *aData)
{
  NS_ASSERTION(aTarget && aData, "null passed to handler");
  if (mContentHandler) {
    return mContentHandler->ProcessingInstruction(nsDependentString(aTarget),
                                                  nsDependentString(aData));
  }

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleNotationDecl(const char16_t *aNotationName,
                                   const char16_t *aSystemId,
                                   const char16_t *aPublicId)
{
  NS_ASSERTION(aNotationName, "null passed to handler");
  if (mDTDHandler) {
    char16_t nullChar = char16_t(0);
    if (!aSystemId)
      aSystemId = &nullChar;
    if (!aPublicId)
      aPublicId = &nullChar;

    return mDTDHandler->NotationDecl(nsDependentString(aNotationName),
                                     nsDependentString(aSystemId),
                                     nsDependentString(aPublicId));
  }

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleUnparsedEntityDecl(const char16_t *aEntityName,
                                         const char16_t *aSystemId,
                                         const char16_t *aPublicId,
                                         const char16_t *aNotationName)
{
  NS_ASSERTION(aEntityName && aNotationName, "null passed to handler");
  if (mDTDHandler) {
    char16_t nullChar = char16_t(0);
    if (!aSystemId)
      aSystemId = &nullChar;
    if (!aPublicId)
      aPublicId = &nullChar;

    return mDTDHandler->UnparsedEntityDecl(nsDependentString(aEntityName),
                                           nsDependentString(aSystemId),
                                           nsDependentString(aPublicId),
                                           nsDependentString(aNotationName));
  }

  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::HandleXMLDeclaration(const char16_t *aVersion,
                                     const char16_t *aEncoding,
                                     int32_t aStandalone)
{
  NS_ASSERTION(aVersion, "null passed to handler");
  if (mDeclarationHandler) {
    char16_t nullChar = char16_t(0);
    if (!aEncoding)
      aEncoding = &nullChar;
    mDeclarationHandler->HandleXMLDeclaration(nsDependentString(aVersion),
                                              nsDependentString(aEncoding),
                                              aStandalone > 0);
  }
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::ReportError(const char16_t* aErrorText,
                            const char16_t* aSourceText,
                            nsIScriptError *aError,
                            bool *_retval)
{
  NS_PRECONDITION(aError && aSourceText && aErrorText, "Check arguments!!!");
  // Normally, the expat driver should report the error.
  *_retval = true;

  if (mErrorHandler) {
    uint32_t lineNumber;
    nsresult rv = aError->GetLineNumber(&lineNumber);
    NS_ENSURE_SUCCESS(rv, rv);

    uint32_t columnNumber;
    rv = aError->GetColumnNumber(&columnNumber);
    NS_ENSURE_SUCCESS(rv, rv);

    nsCOMPtr<nsISAXLocator> locator = new nsSAXLocator(mPublicId,
                                                       mSystemId,
                                                       lineNumber,
                                                       columnNumber);
    if (!locator)
      return NS_ERROR_OUT_OF_MEMORY;

    rv = mErrorHandler->FatalError(locator, nsDependentString(aErrorText));
    if (NS_SUCCEEDED(rv)) {
      // The error handler has handled the script error.  Don't log to console.
      *_retval = false;
    }
  }

  return NS_OK;
}

// nsISAXXMLReader

NS_IMETHODIMP
nsSAXXMLReader::GetBaseURI(nsIURI **aBaseURI)
{
  NS_IF_ADDREF(*aBaseURI = mBaseURI);
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetBaseURI(nsIURI *aBaseURI)
{
  mBaseURI = aBaseURI;
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::GetContentHandler(nsISAXContentHandler **aContentHandler)
{
  NS_IF_ADDREF(*aContentHandler = mContentHandler);
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetContentHandler(nsISAXContentHandler *aContentHandler)
{
  mContentHandler = aContentHandler;
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::GetDtdHandler(nsISAXDTDHandler **aDtdHandler)
{
  NS_IF_ADDREF(*aDtdHandler = mDTDHandler);
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetDtdHandler(nsISAXDTDHandler *aDtdHandler)
{
  mDTDHandler = aDtdHandler;
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::GetErrorHandler(nsISAXErrorHandler **aErrorHandler)
{
  NS_IF_ADDREF(*aErrorHandler = mErrorHandler);
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetErrorHandler(nsISAXErrorHandler *aErrorHandler)
{
  mErrorHandler = aErrorHandler;
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetFeature(const nsAString &aName, bool aValue)
{
  if (aName.EqualsLiteral("http://xml.org/sax/features/namespace-prefixes")) {
    mEnableNamespacePrefixes = aValue;
    return NS_OK;
  }
  return NS_ERROR_NOT_IMPLEMENTED;
}

NS_IMETHODIMP
nsSAXXMLReader::GetFeature(const nsAString &aName, bool *aResult)
{
  if (aName.EqualsLiteral("http://xml.org/sax/features/namespace-prefixes")) {
    *aResult = mEnableNamespacePrefixes;
    return NS_OK;
  }
  return NS_ERROR_NOT_IMPLEMENTED;
}

NS_IMETHODIMP
nsSAXXMLReader::GetDeclarationHandler(nsIMozSAXXMLDeclarationHandler **aDeclarationHandler) {
  NS_IF_ADDREF(*aDeclarationHandler = mDeclarationHandler);
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetDeclarationHandler(nsIMozSAXXMLDeclarationHandler *aDeclarationHandler) {
  mDeclarationHandler = aDeclarationHandler;
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::GetLexicalHandler(nsISAXLexicalHandler **aLexicalHandler)
{
  NS_IF_ADDREF(*aLexicalHandler = mLexicalHandler);
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetLexicalHandler(nsISAXLexicalHandler *aLexicalHandler)
{
  mLexicalHandler = aLexicalHandler;
  return NS_OK;
}

NS_IMETHODIMP
nsSAXXMLReader::SetProperty(const nsAString &aName, nsISupports* aValue)
{
  return NS_ERROR_NOT_IMPLEMENTED;
}

NS_IMETHODIMP
nsSAXXMLReader::GetProperty(const nsAString &aName, bool *aResult)
{
  return NS_ERROR_NOT_IMPLEMENTED;
}

NS_IMETHODIMP
nsSAXXMLReader::ParseFromString(const nsAString &aStr,
                                const char *aContentType)
{
  // Don't call this in the middle of an async parse
  NS_ENSURE_TRUE(!mIsAsyncParse, NS_ERROR_FAILURE);

  NS_ConvertUTF16toUTF8 data(aStr);

  // The new stream holds a reference to the buffer
  nsCOMPtr<nsIInputStream> stream;
  nsresult rv = NS_NewByteInputStream(getter_AddRefs(stream),
                                      data.get(), data.Length(),
                                      NS_ASSIGNMENT_DEPEND);
  NS_ENSURE_SUCCESS(rv, rv);
  return ParseFromStream(stream, "UTF-8", aContentType);
}

NS_IMETHODIMP
nsSAXXMLReader::ParseFromStream(nsIInputStream *aStream,
                                const char *aCharset,
                                const char *aContentType)
{
  // Don't call this in the middle of an async parse
  NS_ENSURE_TRUE(!mIsAsyncParse, NS_ERROR_FAILURE);

  NS_ENSURE_ARG(aStream);
  NS_ENSURE_ARG(aContentType);

  // Put the nsCOMPtr out here so we hold a ref to the stream as needed
  nsresult rv;
  nsCOMPtr<nsIInputStream> bufferedStream;
  if (!NS_InputStreamIsBuffered(aStream)) {
    rv = NS_NewBufferedInputStream(getter_AddRefs(bufferedStream),
                                   aStream, 4096);
    NS_ENSURE_SUCCESS(rv, rv);
    aStream = bufferedStream;
  }
 
  rv = EnsureBaseURI();
  NS_ENSURE_SUCCESS(rv, rv);

  nsCOMPtr<nsIPrincipal> nullPrincipal = NullPrincipal::Create();

  // The following channel is never openend, so it does not matter what
  // securityFlags we pass; let's follow the principle of least privilege.
  nsCOMPtr<nsIChannel> parserChannel;
  rv = NS_NewInputStreamChannel(getter_AddRefs(parserChannel),
                                mBaseURI,
                                aStream,
                                nullPrincipal,
                                nsILoadInfo::SEC_REQUIRE_SAME_ORIGIN_DATA_IS_BLOCKED,
                                nsIContentPolicy::TYPE_OTHER,
                                nsDependentCString(aContentType));
  if (!parserChannel || NS_FAILED(rv))
    return NS_ERROR_FAILURE;

  if (aCharset)
    parserChannel->SetContentCharset(nsDependentCString(aCharset));

  rv = InitParser(nullptr, parserChannel);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = mListener->OnStartRequest(parserChannel, nullptr);
  if (NS_FAILED(rv))
    parserChannel->Cancel(rv);

  /* When parsing a new document, we need to clear the XML identifiers.
     HandleStartDTD will set these values from the DTD declaration tag.
     We won't have them, of course, if there's a well-formedness error
     before the DTD tag (such as a space before an XML declaration).
   */
  mSystemId.Truncate();
  mPublicId.Truncate();

  nsresult status;
  parserChannel->GetStatus(&status);
  
  uint64_t offset = 0;
  while (NS_SUCCEEDED(rv) && NS_SUCCEEDED(status)) {
    uint64_t available;
    rv = aStream->Available(&available);
    if (rv == NS_BASE_STREAM_CLOSED) {
      rv = NS_OK;
      available = 0;
    }
    if (NS_FAILED(rv)) {
      parserChannel->Cancel(rv);
      break;
    }
    if (! available)
      break; // blocking input stream has none available when done

    if (available > UINT32_MAX)
      available = UINT32_MAX;

    rv = mListener->OnDataAvailable(parserChannel, nullptr,
                                    aStream,
                                    offset,
                                    (uint32_t)available);
    if (NS_SUCCEEDED(rv))
      offset += available;
    else
      parserChannel->Cancel(rv);
    parserChannel->GetStatus(&status);
  }
  rv = mListener->OnStopRequest(parserChannel, nullptr, status);
  mListener = nullptr;

  return rv;
}

NS_IMETHODIMP
nsSAXXMLReader::ParseAsync(nsIRequestObserver *aObserver)
{
  mParserObserver = aObserver;
  mIsAsyncParse = true;
  return NS_OK;
}

// nsIRequestObserver

NS_IMETHODIMP
nsSAXXMLReader::OnStartRequest(nsIRequest *aRequest, nsISupports *aContext)
{
  NS_ENSURE_TRUE(mIsAsyncParse, NS_ERROR_FAILURE);
  nsresult rv;
  rv = EnsureBaseURI();
  NS_ENSURE_SUCCESS(rv, rv);
  nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
  rv = InitParser(mParserObserver, channel);
  NS_ENSURE_SUCCESS(rv, rv);
  // we don't need or want this anymore
  mParserObserver = nullptr;
  return mListener->OnStartRequest(aRequest, aContext);
}

NS_IMETHODIMP
nsSAXXMLReader::OnStopRequest(nsIRequest *aRequest, nsISupports *aContext,
                              nsresult status)
{
  NS_ENSURE_TRUE(mIsAsyncParse, NS_ERROR_FAILURE);
  NS_ENSURE_STATE(mListener);
  nsresult rv = mListener->OnStopRequest(aRequest, aContext, status);
  mListener = nullptr;
  mIsAsyncParse = false;
  return rv;
}

// nsIStreamListener

NS_IMETHODIMP
nsSAXXMLReader::OnDataAvailable(nsIRequest *aRequest, nsISupports *aContext,
                                nsIInputStream *aInputStream, uint64_t offset,
                                uint32_t count)
{
  NS_ENSURE_TRUE(mIsAsyncParse, NS_ERROR_FAILURE);
  NS_ENSURE_STATE(mListener);
  return mListener->OnDataAvailable(aRequest, aContext, aInputStream, offset,
                                    count);
}

nsresult
nsSAXXMLReader::InitParser(nsIRequestObserver *aObserver, nsIChannel *aChannel)
{
  nsresult rv;

  // setup the parser
  nsCOMPtr<nsIParser> parser = do_CreateInstance(kParserCID, &rv);
  NS_ENSURE_SUCCESS(rv, rv);

  parser->SetContentSink(this);

  int32_t charsetSource = kCharsetFromDocTypeDefault;
  nsAutoCString charset(NS_LITERAL_CSTRING("UTF-8"));
  TryChannelCharset(aChannel, charsetSource, charset);
  parser->SetDocumentCharset(charset, charsetSource);

  rv = parser->Parse(mBaseURI, aObserver);
  NS_ENSURE_SUCCESS(rv, rv);

  mListener = do_QueryInterface(parser, &rv);

  return rv;
}

// from nsDocument.cpp
bool
nsSAXXMLReader::TryChannelCharset(nsIChannel *aChannel,
                                  int32_t& aCharsetSource,
                                  nsACString& aCharset)
{
  if (aCharsetSource >= kCharsetFromChannel)
    return true;
  
  if (aChannel) {
    nsAutoCString charsetVal;
    nsresult rv = aChannel->GetContentCharset(charsetVal);
    if (NS_SUCCEEDED(rv)) {
      nsAutoCString preferred;
      if (!EncodingUtils::FindEncodingForLabel(charsetVal, preferred))
        return false;

      aCharset = preferred;
      aCharsetSource = kCharsetFromChannel;
      return true;
    }
  }

  return false;
}

nsresult
nsSAXXMLReader::EnsureBaseURI()
{
  if (mBaseURI) 
    return NS_OK;

  return NS_NewURI(getter_AddRefs(mBaseURI), "about:blank");
}

nsresult
nsSAXXMLReader::SplitExpatName(const char16_t *aExpatName,
                               nsString &aURI,
                               nsString &aLocalName,
                               nsString &aQName)
{
  /**
   * Adapted from RDFContentSinkImpl
   *
   * Expat can send the following:
   *    localName
   *    namespaceURI<separator>localName
   *    namespaceURI<separator>localName<separator>prefix
   *
   * and we use 0xFFFF for the <separator>.
   *
   */

  NS_ASSERTION(aExpatName, "null passed to handler");
  nsDependentString expatStr(aExpatName);
  int32_t break1, break2 = kNotFound;
  break1 = expatStr.FindChar(char16_t(0xFFFF));

  if (break1 == kNotFound) {
    aLocalName = expatStr; // no namespace
    aURI.Truncate();
    aQName = expatStr;
  } else {
    aURI = StringHead(expatStr, break1);
    break2 = expatStr.FindChar(char16_t(0xFFFF), break1 + 1);
    if (break2 == kNotFound) { // namespace, but no prefix
      aLocalName = Substring(expatStr, break1 + 1);
      aQName = aLocalName;
    } else { // namespace with prefix
      aLocalName = Substring(expatStr, break1 + 1, break2 - break1 - 1);
      aQName = Substring(expatStr, break2 + 1) +
        NS_LITERAL_STRING(":") + aLocalName;
    }
  }

  return NS_OK;
}