Bug 1519097 - Implement the Hashbang Grammar proposal in JavaScript to allow a '#!' line at the very start of a script/module to be treated as beginning a single-line comment. r=arai
authorJeff Walden <jwalden@mit.edu>
Mon, 14 Jan 2019 23:01:14 -0500
changeset 461912 3ea769ff9a9a04270cd445f6acd4ea0714650eb1
parent 461911 b7f57b3558916ec78e96b05566faa0e12083e972
child 461913 53c9ecc53bbfb67ba5d90271398bf54cb80a5f25
push id35631
push userrgurzau@mozilla.com
push dateFri, 01 Mar 2019 13:06:03 +0000
treeherdermozilla-central@d4e19870e27f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersarai
bugs1519097
milestone67.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1519097 - Implement the Hashbang Grammar proposal in JavaScript to allow a '#!' line at the very start of a script/module to be treated as beginning a single-line comment. r=arai
js/src/frontend/ParseContext.h
js/src/frontend/Parser.cpp
js/src/frontend/SharedContext.h
js/src/frontend/TokenStream.cpp
js/src/frontend/TokenStream.h
js/src/shell/js.cpp
js/src/tests/jstests.list
js/src/tests/test262-update.py
js/src/tests/test262/language/comments/hashbang/eval-indirect.js
js/src/tests/test262/language/comments/hashbang/eval.js
js/src/tests/test262/language/comments/hashbang/function-body.js
js/src/tests/test262/language/comments/hashbang/function-constructor.js
js/src/tests/test262/language/comments/hashbang/no-line-separator.js
js/src/tests/test262/language/comments/hashbang/statement-block.js
--- a/js/src/frontend/ParseContext.h
+++ b/js/src/frontend/ParseContext.h
@@ -557,16 +557,22 @@ class ParseContext : public Nestable<Par
   //
   bool atBodyLevel() { return !innermostStatement_; }
 
   bool atGlobalLevel() { return atBodyLevel() && sc_->isGlobalContext(); }
 
   // True if we are at the topmost level of a module only.
   bool atModuleLevel() { return atBodyLevel() && sc_->isModuleContext(); }
 
+  // True if we are at the topmost level of an entire script or module.  For
+  // example, in the comment on |atBodyLevel()| above, we would encounter |f1|
+  // and the outermost |if (cond)| at top level, and everything else would not
+  // be at top level.
+  bool atTopLevel() { return atBodyLevel() && sc_->isTopLevelContext(); }
+
   void setIsStandaloneFunctionBody() { isStandaloneFunctionBody_ = true; }
 
   bool isStandaloneFunctionBody() const { return isStandaloneFunctionBody_; }
 
   void setSuperScopeNeedsHomeObject() {
     MOZ_ASSERT(sc_->allowSuperProperty());
     superScopeNeedsHomeObject_ = true;
   }
--- a/js/src/frontend/Parser.cpp
+++ b/js/src/frontend/Parser.cpp
@@ -3396,16 +3396,22 @@ GeneralParser<ParseHandler, Unit>::state
   if (!stmtList) {
     return null();
   }
 
   bool canHaveDirectives = pc_->atBodyLevel();
   if (canHaveDirectives) {
     anyChars.clearSawOctalEscape();
   }
+
+  bool canHaveHashbangComment = pc_->atTopLevel();
+  if (canHaveHashbangComment) {
+    tokenStream.consumeOptionalHashbangComment();
+  }
+
   bool afterReturn = false;
   bool warnedAboutStatementsAfterReturn = false;
   uint32_t statementBegin = 0;
   for (;;) {
     TokenKind tt = TokenKind::Eof;
     if (!tokenStream.peekToken(&tt, TokenStream::Operand)) {
       if (anyChars.isEOF()) {
         isUnexpectedEOF_ = true;
--- a/js/src/frontend/SharedContext.h
+++ b/js/src/frontend/SharedContext.h
@@ -179,16 +179,29 @@ class SharedContext {
   inline FunctionBox* asFunctionBox();
   bool isModuleContext() const { return kind_ == Kind::Module; }
   inline ModuleSharedContext* asModuleContext();
   bool isGlobalContext() const { return kind_ == Kind::Global; }
   inline GlobalSharedContext* asGlobalContext();
   bool isEvalContext() const { return kind_ == Kind::Eval; }
   inline EvalSharedContext* asEvalContext();
 
+  bool isTopLevelContext() const {
+    switch (kind_) {
+      case Kind::Module:
+      case Kind::Global:
+      case Kind::Eval:
+        return true;
+      case Kind::FunctionBox:
+        break;
+    }
+    MOZ_ASSERT(kind_ == Kind::FunctionBox);
+    return false;
+  }
+
   ThisBinding thisBinding() const { return thisBinding_; }
 
   bool allowNewTarget() const { return allowNewTarget_; }
   bool allowSuperProperty() const { return allowSuperProperty_; }
   bool allowSuperCall() const { return allowSuperCall_; }
   bool inWith() const { return inWith_; }
   bool needsThisTDZChecks() const { return needsThisTDZChecks_; }
 
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -2375,16 +2375,44 @@ MOZ_MUST_USE bool TokenStreamSpecific<Un
       return false;
     }
   }
   newBigIntToken(start, modifier, out);
   return true;
 }
 
 template <typename Unit, class AnyCharsAccess>
+void GeneralTokenStreamChars<Unit,
+                             AnyCharsAccess>::consumeOptionalHashbangComment() {
+  MOZ_ASSERT(this->sourceUnits.atStart(),
+             "HashBangComment can only appear immediately at the start of a "
+             "Script or Module");
+
+  // HashbangComment ::
+  //   #!  SingleLineCommentChars_opt
+
+  if (!matchCodeUnit('#')) {
+    // HashbangComment is optional at start of Script or Module.
+    return;
+  }
+
+  if (!matchCodeUnit('!')) {
+    // # not followed by ! at start of Script or Module is an error, but normal
+    // parsing code will handle that error just fine if we let it.
+    ungetCodeUnit('#');
+    return;
+  }
+
+  // This doesn't consume a concluding LineTerminator, and it stops consuming
+  // just before any encoding error.  The subsequent |getToken| call will call
+  // |getTokenInternal| below which will handle these possibilities.
+  this->sourceUnits.consumeRestOfSingleLineComment();
+}
+
+template <typename Unit, class AnyCharsAccess>
 MOZ_MUST_USE bool TokenStreamSpecific<Unit, AnyCharsAccess>::getTokenInternal(
     TokenKind* const ttp, const Modifier modifier) {
   // Assume we'll fail: success cases will overwrite this.
 #ifdef DEBUG
   *ttp = TokenKind::Limit;
 #endif
   MOZ_MAKE_MEM_UNDEFINED(ttp, sizeof(*ttp));
 
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -1906,16 +1906,17 @@ class GeneralTokenStreamChars : public S
   }
 
   uint32_t matchUnicodeEscape(uint32_t* codePoint);
   uint32_t matchExtendedUnicodeEscape(uint32_t* codePoint);
 
  protected:
   using CharsBase::addLineOfContext;
   using CharsBase::fillCharBufferFromSourceNormalizingAsciiLineBreaks;
+  using CharsBase::matchCodeUnit;
   using CharsBase::matchLineTerminator;
   using TokenStreamCharsShared::drainCharBufferIntoAtom;
   using TokenStreamCharsShared::isAsciiCodePoint;
   // Deliberately don't |using CharsBase::sourceUnits| because of bug 1472569.
   // :-(
   using CharsBase::toUnit;
 
   using typename CharsBase::SourceUnits;
@@ -2091,16 +2092,23 @@ class GeneralTokenStreamChars : public S
     if (err->lineNumber != anyCharsAccess().lineno) {
       return true;
     }
 
     return addLineOfContext(err, offset);
   }
 
  public:
+  /**
+   * Consume any hashbang comment at the start of a Script or Module, if one is
+   * present.  Stops consuming just before any terminating LineTerminator or
+   * before an encoding error is encountered.
+   */
+  void consumeOptionalHashbangComment();
+
   JSAtom* getRawTemplateStringAtom() {
     TokenStreamAnyChars& anyChars = anyCharsAccess();
 
     MOZ_ASSERT(anyChars.currentToken().type == TokenKind::TemplateHead ||
                anyChars.currentToken().type == TokenKind::NoSubsTemplate);
     const Unit* cur =
         this->sourceUnits.codeUnitPtrAt(anyChars.currentToken().pos.begin + 1);
     const Unit* end;
--- a/js/src/shell/js.cpp
+++ b/js/src/shell/js.cpp
@@ -836,28 +836,16 @@ enum class CompileUtf8 {
   DontInflate,
 };
 
 static MOZ_MUST_USE bool RunFile(JSContext* cx, const char* filename,
                                  FILE* file, CompileUtf8 compileMethod,
                                  bool compileOnly) {
   SkipUTF8BOM(file);
 
-  // To support the UNIX #! shell hack, gobble the first line if it starts
-  // with '#'.
-  int ch = fgetc(file);
-  if (ch == '#') {
-    while ((ch = fgetc(file)) != EOF) {
-      if (ch == '\n' || ch == '\r') {
-        break;
-      }
-    }
-  }
-  ungetc(ch, file);
-
   int64_t t1 = PRMJ_Now();
   RootedScript script(cx);
 
   {
     CompileOptions options(cx);
     options.setIntroductionType("js shell file")
         .setFileAndLine(filename, 1)
         .setIsRunOnce(true)
--- a/js/src/tests/jstests.list
+++ b/js/src/tests/jstests.list
@@ -1248,25 +1248,19 @@ skip script test262/harness/detachArrayB
 # The hashbang tests are usually raw tests -- which means they must be run
 # with no alterations.  (This because "#!" only denotes a single-line comment if
 # it appears at the very start of the script, and nowhere else.)  But jstests.py
 # expects to be told how tests should run, and what to expect from them, per the
 # // |reftest| comment on the first line -- exactly where the "#!" must appear
 # if it is to appear.  (Same for the various fake hashbangs that aren't
 # *actually* hashbangs, if they are to test what they were intended to test.)
 #
-# So for now, these tests must be explicitly skipped.  When hashbang support
-# lands, tests from below that aren't negative tests (that would require a
-# // |reftest| comment) can be enabled.  And whenever we figure out a way to
-# import even the negative tests to run them without alteration, we can enable
-# the rest of these.
-#
-# Bug 1519097 will add hashbang support and trim this list.  No bug has yet been
-# filed to deal with figuring out how to sensibly import the remaining scripts
-# here that will continue to fail after that.
+# So for now -- until we can figure out a way to encode "this is a negative
+# test" in a way that doesn't require changing the raw source of the test --
+# these tests must be explicitly skipped.  See bug 1531202.
 skip script test262/language/comments/hashbang/escaped-hashbang.js
 skip script test262/language/comments/hashbang/preceding-hashbang.js
 skip script test262/language/comments/hashbang/escaped-hash-043.js
 skip script test262/language/comments/hashbang/preceding-line-comment.js
 skip script test262/language/comments/hashbang/escaped-hash-x23.js
 skip script test262/language/comments/hashbang/escaped-hash-u0023.js
 skip script test262/language/comments/hashbang/escaped-bang-u0021.js
 skip script test262/language/comments/hashbang/preceding-empty-statement.js
--- a/js/src/tests/test262-update.py
+++ b/js/src/tests/test262-update.py
@@ -28,17 +28,16 @@ UNSUPPORTED_FEATURES = set([
     "class-static-methods-private",
     "regexp-dotall",
     "regexp-lookbehind",
     "regexp-named-groups",
     "regexp-unicode-property-escapes",
     "numeric-separator-literal",
     "Intl.Locale",
     "global",
-    "hashbang",
     "export-star-as-namespace-from-module",
     "Intl.ListFormat",
     "Intl.Segmenter",
     "Intl.NumberFormat-unified",
 ])
 FEATURE_CHECK_NEEDED = {
     "Atomics": "!this.hasOwnProperty('Atomics')",
     "BigInt": "!this.hasOwnProperty('BigInt')",
--- a/js/src/tests/test262/language/comments/hashbang/eval-indirect.js
+++ b/js/src/tests/test262/language/comments/hashbang/eval-indirect.js
@@ -1,9 +1,8 @@
-// |reftest| skip -- hashbang is not supported
 /*---
 esid: pending
 description: >
     Hashbang comments should be available in Script evaluator contexts. (indirect eval)
 info: |
     HashbangComment::
       #! SingleLineCommentChars[opt]
 features: [hashbang]
--- a/js/src/tests/test262/language/comments/hashbang/eval.js
+++ b/js/src/tests/test262/language/comments/hashbang/eval.js
@@ -1,9 +1,8 @@
-// |reftest| skip -- hashbang is not supported
 /*---
 esid: pending
 description: >
     Hashbang comments should be available in Script evaluator contexts. (direct eval)
 info: |
     HashbangComment::
       #! SingleLineCommentChars[opt]
 features: [hashbang]
--- a/js/src/tests/test262/language/comments/hashbang/function-body.js
+++ b/js/src/tests/test262/language/comments/hashbang/function-body.js
@@ -1,9 +1,9 @@
-// |reftest| skip error:SyntaxError -- hashbang is not supported
+// |reftest| error:SyntaxError
 /*---
 esid: pending
 description: >
     Hashbang comments should only be allowed at the start of source texts and should not be allowed within function bodies.
 info: |
     HashbangComment::
       #! SingleLineCommentChars[opt]
 negative:
--- a/js/src/tests/test262/language/comments/hashbang/function-constructor.js
+++ b/js/src/tests/test262/language/comments/hashbang/function-constructor.js
@@ -1,9 +1,8 @@
-// |reftest| skip -- hashbang is not supported
 /*---
 esid: pending
 description: >
     Hashbang comments should not be allowed in function evaluator contexts.
 info: |
     HashbangComment::
       #! SingleLineCommentChars[opt]
 features: [hashbang]
--- a/js/src/tests/test262/language/comments/hashbang/no-line-separator.js
+++ b/js/src/tests/test262/language/comments/hashbang/no-line-separator.js
@@ -1,9 +1,8 @@
-// |reftest| skip -- hashbang is not supported
 /*---
 esid: pending
 description: >
     Hashbang comments should not require a newline afterwards
 info: |
     HashbangComment::
       #! SingleLineCommentChars[opt]
 features: [hashbang]
--- a/js/src/tests/test262/language/comments/hashbang/statement-block.js
+++ b/js/src/tests/test262/language/comments/hashbang/statement-block.js
@@ -1,9 +1,9 @@
-// |reftest| skip error:SyntaxError -- hashbang is not supported
+// |reftest| error:SyntaxError
 /*---
 esid: pending
 description: >
     Hashbang comments should only be allowed at the start of source texts and should not be allowed within blocks.
 info: |
     HashbangComment::
       #! SingleLineCommentChars[opt]
 negative: