Bug 1551916 - Optimize column-number computations for offsets more than |ColumnChunkLength = 128| code units into a line by saving column information at 128-unit increments (rounded down to the nearest code point start) so that at most (length of... r=arai
authorJeff Walden <jwalden@mit.edu>
Fri, 17 May 2019 03:21:04 +0000
changeset 536171 70c4b663298ae5c4b287d020cc0842625fe6f52c
parent 536170 04cce27de4ac16500981c23558b355cfe4ddc186
child 536172 9ae652e07ac2e3481d3cf8ce27a8f12d81d8e79c
push id2082
push userffxbld-merge
push dateMon, 01 Jul 2019 08:34:18 +0000
treeherdermozilla-release@2fb19d0466d2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersarai
bugs1551916
milestone68.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1551916 - Optimize column-number computations for offsets more than |ColumnChunkLength = 128| code units into a line by saving column information at 128-unit increments (rounded down to the nearest code point start) so that at most (length of... r=arai ...longest code point encoding - 1) + ColumnChunkLength - 1 units must be observed when computing a column number. r=arai Depends on D31301 Differential Revision: https://phabricator.services.mozilla.com/D31302
js/src/frontend/TokenStream.cpp
js/src/frontend/TokenStream.h
js/src/tests/non262/syntax/column-numbers-in-long-lines.js
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -490,16 +490,19 @@ TokenStreamAnyChars::SourceCoords::LineT
 TokenStreamAnyChars::SourceCoords::lineToken(uint32_t offset) const {
   return LineToken(indexFromOffset(offset), offset);
 }
 
 TokenStreamAnyChars::TokenStreamAnyChars(JSContext* cx,
                                          const ReadOnlyCompileOptions& options,
                                          StrictModeGetter* smg)
     : srcCoords(cx, options.lineno, options.scriptSourceOffset),
+#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+      longLineColumnInfo_(cx),
+#endif  // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
       options_(options),
       tokens(),
       cursor_(0),
       lookahead(),
       lineno(options.lineno),
       flags(),
       linebase(0),
       prevLinebase(size_t(-1)),
@@ -684,52 +687,235 @@ inline void SourceUnits<Utf8Unit>::asser
   MOZ_ASSERT(peeked.lengthInUnits() <= 4);
   for (uint8_t i = 0; i < peeked.lengthInUnits(); i++) {
     MOZ_ASSERT(expectedUnits[i] == ptr[i].toUint8());
   }
 }
 
 #endif  // DEBUG
 
+#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+
+static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(
+    const Utf8Unit** ptr, const Utf8Unit* limit) {
+  MOZ_ASSERT(*ptr <= limit);
+
+  // |limit| is a code point boundary.
+  if (MOZ_UNLIKELY(*ptr == limit)) {
+    return;
+  }
+
+  // Otherwise rewind past trailing units to the start of the code point.
+#  ifdef DEBUG
+  size_t retracted = 0;
+#  endif
+  while (MOZ_UNLIKELY(IsTrailingUnit((*ptr)[0]))) {
+    --*ptr;
+#  ifdef DEBUG
+    retracted++;
+#  endif
+  }
+
+  MOZ_ASSERT(retracted < 4,
+             "the longest UTF-8 code point is four units, so this should never "
+             "retract more than three units");
+}
+
+static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(
+    const char16_t** ptr, const char16_t* limit) {
+  MOZ_ASSERT(*ptr <= limit);
+
+  // |limit| is a code point boundary.
+  if (MOZ_UNLIKELY(*ptr == limit)) {
+    return;
+  }
+
+  // Otherwise the pointer must be retracted by one iff it splits a two-unit
+  // code point.
+  if (MOZ_UNLIKELY(unicode::IsTrailSurrogate((*ptr)[0]))) {
+    // Outside test suites testing garbage WTF-16, it's basically guaranteed
+    // here that |(*ptr)[-1] (*ptr)[0]| is a surrogate pair.
+    if (MOZ_LIKELY(unicode::IsLeadSurrogate((*ptr)[-1]))) {
+      --*ptr;
+    }
+  }
+}
+
 template <typename Unit>
-static size_t ComputeColumn(const Unit* begin, const Unit* end) {
-#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
-  return unicode::CountCodePoints(begin, end);
-#else
-  return PointerRangeSize(begin, end);
+uint32_t TokenStreamAnyChars::computePartialColumn(
+    const LineToken lineToken, const uint32_t offset,
+    const SourceUnits<Unit>& sourceUnits) const {
+  lineToken.assertConsistentOffset(offset);
+
+  const uint32_t line = lineNumber(lineToken);
+  const uint32_t start = srcCoords.lineStart(lineToken);
+
+  // Reset the previous offset/column cache for this line, if the previous
+  // lookup wasn't on this line.
+  if (line != lineOfLastColumnComputation_) {
+    lineOfLastColumnComputation_ = line;
+    lastChunkVectorForLine_ = nullptr;
+    lastOffsetOfComputedColumn_ = start;
+    lastComputedColumn_ = 0;
+  }
+
+  // Compute and return the final column number from a partial offset/column,
+  // using the last-cached offset/column if they're more optimal.
+  auto ColumnFromPartial = [this, offset, &sourceUnits](uint32_t partialOffset,
+                                                        uint32_t partialCols) {
+    MOZ_ASSERT(partialOffset <= offset);
+
+    // If the last lookup on this line was closer to |offset|, use it.
+    if (partialOffset < this->lastOffsetOfComputedColumn_ &&
+        this->lastOffsetOfComputedColumn_ <= offset) {
+      partialOffset = this->lastOffsetOfComputedColumn_;
+      partialCols = this->lastComputedColumn_;
+    }
+
+    const Unit* begin = sourceUnits.codeUnitPtrAt(partialOffset);
+    const Unit* end = sourceUnits.codeUnitPtrAt(offset);
+
+    partialOffset += PointerRangeSize(begin, end);
+    partialCols += AssertedCast<uint32_t>(unicode::CountCodePoints(begin, end));
+
+    this->lastOffsetOfComputedColumn_ = partialOffset;
+    this->lastComputedColumn_ = partialCols;
+    return partialCols;
+  };
+
+  const uint32_t offsetInLine = offset - start;
+
+  // The index within a relevant |Vector<uint32_t>| of the nearest chunk
+  // info...if it's been computed at all.
+  const uint32_t chunkIndex = offsetInLine / ColumnChunkLength;
+
+  // Compute the column from the start of the line if chunk information would
+  // direct us to the start of the line -- including if the line's too short to
+  // be chunked.
+  if (chunkIndex == 0) {
+    return ColumnFromPartial(start, 0);
+  }
+
+  // If this line has no chunk vector yet, insert one in the hash map.  (The
+  // required index is allocated and filled further down.)
+  if (!lastChunkVectorForLine_) {
+    auto ptr = longLineColumnInfo_.lookupForAdd(line);
+    if (!ptr) {
+      // This could rehash and invalidate a cached vector pointer, but the outer
+      // condition means we don't have a cached pointer.
+      if (!longLineColumnInfo_.add(ptr, line, Vector<uint32_t>(cx))) {
+        // In case of OOM, just count columns from the start of the line.
+        cx->recoverFromOutOfMemory();
+        return ColumnFromPartial(start, 0);
+      }
+    }
+
+    // Note that adding elements to this vector won't invalidate this pointer.
+    lastChunkVectorForLine_ = &ptr->value();
+  }
+
+  const Unit* const limit = sourceUnits.codeUnitPtrAt(offset);
+
+  auto RetractedOffsetOfChunk = [
+#  ifdef DEBUG
+                                    this,
+#  endif
+                                    start, limit,
+                                    &sourceUnits](uint32_t index) {
+    MOZ_ASSERT(index < this->lastChunkVectorForLine_->length());
+
+    uint32_t naiveOffset = start + index * ColumnChunkLength;
+    const Unit* naivePtr = sourceUnits.codeUnitPtrAt(naiveOffset);
+
+    const Unit* actualPtr = naivePtr;
+    RetractPointerToCodePointBoundary(&actualPtr, limit);
+
+    return naiveOffset - PointerRangeSize(actualPtr, naivePtr);
+  };
+
+  uint32_t partialOffset;
+  uint32_t partialColumn;
+
+  auto entriesLen = AssertedCast<uint32_t>(lastChunkVectorForLine_->length());
+  if (entriesLen <= chunkIndex) {
+    // Extend the vector from its last entry or the start of the line.  (This is
+    // also a suitable partial start point if we must recover from OOM.)
+    if (entriesLen > 0) {
+      partialOffset = RetractedOffsetOfChunk(entriesLen - 1);
+      partialColumn = (*lastChunkVectorForLine_)[entriesLen - 1];
+    } else {
+      partialOffset = start;
+      partialColumn = 0;
+    }
+
+    if (!lastChunkVectorForLine_->reserve(chunkIndex + 1)) {
+      // As earlier, just start from the greatest offset/column in case of OOM.
+      cx->recoverFromOutOfMemory();
+      return ColumnFromPartial(partialOffset, partialColumn);
+    }
+
+    // OOM is no longer possible now.  \o/
+
+    // The vector always begins with the column of the line start, i.e. zero.
+    if (entriesLen == 0) {
+      lastChunkVectorForLine_->infallibleAppend(0);
+      entriesLen++;
+    }
+
+    do {
+      const Unit* const begin = sourceUnits.codeUnitPtrAt(partialOffset);
+      const Unit* chunkLimit = sourceUnits.codeUnitPtrAt(
+          start + std::min(entriesLen * ColumnChunkLength, offsetInLine));
+
+      MOZ_ASSERT(begin < chunkLimit);
+      MOZ_ASSERT(chunkLimit <= limit);
+
+      static_assert(ColumnChunkLength > SourceUnitTraits<Unit>::maxUnitsLength,
+                    "chunk length in code units must be able to contain the "
+                    "largest encoding of a code point, for retracting below to "
+                    "never underflow");
+
+      // Prior tokenizing ensured that [begin, limit) is validly encoded, and
+      // |begin < chunkLimit|, so any retraction here can't underflow.
+      RetractPointerToCodePointBoundary(&chunkLimit, limit);
+
+      MOZ_ASSERT(begin < chunkLimit);
+      MOZ_ASSERT(chunkLimit <= limit);
+
+      partialOffset += PointerRangeSize(begin, chunkLimit);
+      partialColumn += unicode::CountCodePoints(begin, chunkLimit);
+
+      lastChunkVectorForLine_->infallibleAppend(partialColumn);
+      entriesLen++;
+    } while (entriesLen < chunkIndex + 1);
+  } else {
+    partialOffset = RetractedOffsetOfChunk(chunkIndex);
+    partialColumn = (*lastChunkVectorForLine_)[chunkIndex];
+  }
+
+  return ColumnFromPartial(partialOffset, partialColumn);
+}
+
 #endif  // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
-}
 
 template <typename Unit, class AnyCharsAccess>
 uint32_t GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeColumn(
     LineToken lineToken, uint32_t offset) const {
   lineToken.assertConsistentOffset(offset);
 
   const TokenStreamAnyChars& anyChars = anyCharsAccess();
 
-  uint32_t lineNumber = anyChars.srcCoords.lineNumber(lineToken);
-
-  uint32_t beginOffset;
-  uint32_t partialCols;
-  if (lineNumber == lastLineForColumn_ && lastOffsetForColumn_ <= offset) {
-    beginOffset = lastOffsetForColumn_;
-    partialCols = lastColumn_;
-  } else {
-    beginOffset = anyChars.lineStart(lineToken);
-    partialCols = 0;
-  }
-
-  const Unit* begin = this->sourceUnits.codeUnitPtrAt(beginOffset);
-  const Unit* end = this->sourceUnits.codeUnitPtrAt(offset);
-
-  partialCols += AssertedCast<uint32_t>(ComputeColumn(begin, end));
-
-  lastLineForColumn_ = lineNumber;
-  lastOffsetForColumn_ = offset;
-  lastColumn_ = partialCols;
+  uint32_t partialCols =
+#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+      anyChars.computePartialColumn(lineToken, offset, this->sourceUnits)
+#else
+      offset - anyChars.lineStart(lineToken)
+#endif  // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+      ;
+
   return (lineToken.isFirstLine() ? anyChars.options_.column : 0) + partialCols;
 }
 
 template <typename Unit, class AnyCharsAccess>
 void GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeLineAndColumn(
     uint32_t offset, uint32_t* line, uint32_t* column) const {
   const TokenStreamAnyChars& anyChars = anyCharsAccess();
 
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -202,16 +202,17 @@
 #include <stdio.h>
 
 #include "jspubtd.h"
 
 #include "frontend/ErrorReporter.h"
 #include "frontend/Token.h"
 #include "frontend/TokenKind.h"
 #include "js/CompileOptions.h"
+#include "js/HashTable.h"    // js::HashMap
 #include "js/RegExpFlags.h"  // JS::RegExpFlags
 #include "js/UniquePtr.h"
 #include "js/Vector.h"
 #include "util/Text.h"
 #include "util/Unicode.h"
 #include "vm/ErrorReporting.h"
 #include "vm/JSAtom.h"
 #include "vm/StringType.h"
@@ -313,16 +314,19 @@ class MOZ_STACK_CLASS TokenStreamPositio
   unsigned lineno;
   size_t linebase;
   size_t prevLinebase;
   Token currentToken;
   unsigned lookahead;
   Token lookaheadTokens[TokenStreamShared::maxLookahead];
 } JS_HAZ_ROOTED;
 
+template <typename Unit>
+class SourceUnits;
+
 // Column numbers *ought* be in terms of counts of code points, but in the past
 // we counted code units.  Set this to 0 to keep returning counts of code units
 // (even for UTF-8, which is clearly wrong, but we don't ship UTF-8 yet so this
 // is fine until we can fix users that depend on code-unit counting).
 #define JS_COLUMN_DIMENSION_IS_CODE_POINTS() 0
 
 class TokenStreamAnyChars : public TokenStreamShared {
  public:
@@ -596,16 +600,18 @@ class TokenStreamAnyChars : public Token
 
     /** Compute the line number for the given token. */
     uint32_t lineNumber(LineToken lineToken) const {
       return lineNumberFromIndex(lineToken.index);
     }
 
     /** Return the offset of the start of the line for |lineToken|. */
     uint32_t lineStart(LineToken lineToken) const {
+      MOZ_ASSERT(lineToken.index + 1 < lineStartOffsets_.length(),
+                 "recorded line-start information must be available");
       return lineStartOffsets_[lineToken.index];
     }
   };
 
   SourceCoords srcCoords;
 
   JSContext* context() const { return cx; }
 
@@ -634,16 +640,74 @@ class TokenStreamAnyChars : public Token
    * 2) line-of-context-related fields and return true.  The caller *must*
    * fill in the line/column number; filling the line of context is optional.
    */
   bool fillExceptingContext(ErrorMetadata* err, uint32_t offset);
 
   MOZ_ALWAYS_INLINE void updateFlagsForEOL() { flags.isDirtyLine = false; }
 
  private:
+#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+  /**
+   * Compute the "partial" column number in Unicode code points of the absolute
+   * |offset| within source text on the line of |lineToken| (which must have
+   * been computed from |offset|).
+   *
+   * A partial column number on a line that isn't the first line is just the
+   * actual column number.  But a partial column number on the first line is the
+   * column number *ignoring the initial line/column of the script*.  For
+   * example, consider this HTML with line/column number keys:
+   *
+   *                 1         2            3
+   *       0123456789012345678901234   567890
+   *     ------------------------------------
+   *   1 | <html>
+   *   2 | <head>
+   *   3 |   <script>var x = 3;  x &lt; 4;
+   *   4 | const y = 7;</script>
+   *   5 | </head>
+   *   6 | <body></body>
+   *   7 | </html>
+   *
+   * The script would be compiled specifying initial (line, column) of (3, 10)
+   * using |JS::ReadOnlyCompileOptions::{lineno,column}|.  And the column
+   * reported by |computeColumn| for the "v" of |var| would be 10.  But the
+   * partial column number of the "v" in |var|, that this function returns,
+   * would be 0.  On the other hand, the column reported by |computeColumn| and
+   * the partial column number returned by this function for the "c" in |const|
+   * would both be 0, because it's not in the first line of source text.
+   *
+   * The partial column is with respect *only* to the JavaScript source text as
+   * SpiderMonkey sees it.  In the example, the "&lt;" is converted to "<" by
+   * the browser before SpiderMonkey would see it.  So the partial column of the
+   * "4" in the inequality would be 16, not 19.
+   *
+   * Code points are not all equal length, so counting requires *some* kind of
+   * linear-time counting from the start of the line.  This function attempts
+   * various tricks to reduce this cost.  If these optimizations succeed,
+   * repeated calls to this function on a line will pay a one-time cost linear
+   * in the length of the line, then each call pays a separate constant-time
+   * cost.  If the optimizations do not succeed, this function works in time
+   * linear in the length of the line.
+   *
+   * It's unusual for a function in *this* class to be |Unit|-templated, but
+   * while this operation manages |Unit|-agnostic fields in this class and in
+   * |srcCoords|, it must *perform* |Unit|-sensitive computations to fill them.
+   * And this is the best place to do that.
+   */
+  template <typename Unit>
+  uint32_t computePartialColumn(const LineToken lineToken,
+                                const uint32_t offset,
+                                const SourceUnits<Unit>& sourceUnits) const;
+#endif  // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+
+  /**
+   * Update line/column information for the start of a new line at
+   * |lineStartOffset|.
+   */
   MOZ_MUST_USE MOZ_ALWAYS_INLINE bool internalUpdateLineInfoForEOL(
       uint32_t lineStartOffset);
 
  public:
   const Token& nextToken() const {
     MOZ_ASSERT(hasLookahead());
     return tokens[nextCursor()];
   }
@@ -681,16 +745,32 @@ class TokenStreamAnyChars : public Token
   // ErrorReportMixin methods in TokenStream class.
   void reportErrorNoOffset(unsigned errorNumber, ...);
   void reportErrorNoOffsetVA(unsigned errorNumber, va_list* args);
 
   const JS::ReadOnlyCompileOptions& options() const { return options_; }
 
   const char* getFilename() const { return filename_; }
 
+ private:
+#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+  static constexpr uint32_t ColumnChunkLength = 128;
+
+  /**
+   * Line number (of lines at least |ColumnChunkLength| code units long) to
+   * a sequence of the column numbers at |ColumnChunkLength| boundaries rewound
+   * (if needed) to the nearest code point boundary.
+   *
+   * Entries appear in this map only when a column computation of sufficient
+   * distance is performed on a line, and the vectors are lazily filled as
+   * greater offsets within lines require column computations.
+   */
+  mutable HashMap<uint32_t, Vector<uint32_t>> longLineColumnInfo_;
+#endif  // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+
  protected:
   // Options used for parsing/tokenizing.
   const JS::ReadOnlyCompileOptions& options_;
 
   Token tokens[ntokens];  // circular token buffer
  private:
   unsigned cursor_;  // index of last parsed token
  protected:
@@ -725,16 +805,39 @@ class TokenStreamAnyChars : public Token
    *       locality.)  Don't worry!  Initialization time for each TokenStream
    *       is trivial.  See bug 639420.
    */
   bool isExprEnding[size_t(TokenKind::Limit)] = {};  // all-false initially
 
   JSContext* const cx;
   bool mutedErrors;
   StrictModeGetter* strictModeGetter;  // used to test for strict mode
+
+#if JS_COLUMN_DIMENSION_IS_CODE_POINTS()
+  // Computing accurate column numbers requires at *some* point linearly
+  // iterating through prior source units in the line to properly account for
+  // multi-unit code points.  This is quadratic if counting happens repeatedly.
+  //
+  // But usually we need columns for advancing offsets through scripts.  By
+  // caching the last ((line number, offset) => relative column) mapping (in
+  // similar manner to how |SourceUnits::lastIndex_| is used to cache
+  // (offset => line number) mappings) we can usually avoid re-iterating through
+  // the common line prefix.
+  //
+  // Additionally, we avoid hash table lookup costs by caching the
+  // |Vector<uint32_t>*| for the line of the last lookup.  (|nullptr| means we
+  // have to look it up -- or it hasn't been created yet.)  This pointer is
+  // invalidated when a lookup on a new line occurs, but as it's not a pointer
+  // at literal element data, it's *not* invalidated when new entries are added
+  // to such a vector.
+  mutable uint32_t lineOfLastColumnComputation_ = UINT32_MAX;
+  mutable Vector<uint32_t>* lastChunkVectorForLine_ = nullptr;
+  mutable uint32_t lastOffsetOfComputedColumn_ = UINT32_MAX;
+  mutable uint32_t lastComputedColumn_ = 0;
+#endif  // JS_COLUMN_DIMENSION_IS_CODE_POINTS()
 };
 
 constexpr char16_t CodeUnitValue(char16_t unit) { return unit; }
 
 constexpr uint8_t CodeUnitValue(mozilla::Utf8Unit unit) {
   return unit.toUint8();
 }
 
@@ -1672,32 +1775,24 @@ class GeneralTokenStreamChars : public S
   TokenStreamSpecific* asSpecific() {
     static_assert(
         mozilla::IsBaseOf<GeneralTokenStreamChars, TokenStreamSpecific>::value,
         "static_cast below presumes an inheritance relationship");
 
     return static_cast<TokenStreamSpecific*>(this);
   }
 
- private:
-  // Computing accurate column numbers requires linearly iterating through all
-  // source units in the line to account for multi-unit code points; on long
-  // lines requiring many column computations, this becomes quadratic.
-  //
-  // However, because usually we need columns for advancing offsets through
-  // scripts, caching the last ((line number, offset) => relative column)
-  // mapping -- in similar manner to how |SourceUnits::lastIndex_| is used to
-  // cache (offset => line number) mappings -- lets us avoid re-iterating
-  // through the line prefix in most cases.
-
-  mutable uint32_t lastLineForColumn_ = UINT32_MAX;
-  mutable uint32_t lastOffsetForColumn_ = UINT32_MAX;
-  mutable uint32_t lastColumn_ = 0;
-
  protected:
+  /**
+   * Compute the column number in Unicode code points of the absolute |offset|
+   * within source text on the line corresponding to |lineToken|.
+   *
+   * |offset| must be a code point boundary, preceded only by validly-encoded
+   * source units.  (It doesn't have to be *followed* by valid source units.)
+   */
   uint32_t computeColumn(LineToken lineToken, uint32_t offset) const;
   void computeLineAndColumn(uint32_t offset, uint32_t* line,
                             uint32_t* column) const;
 
   /**
    * Fill in |err| completely, except for line-of-context information.
    *
    * Return true if the caller can compute a line of context from the token
new file mode 100644
--- /dev/null
+++ b/js/src/tests/non262/syntax/column-numbers-in-long-lines.js
@@ -0,0 +1,403 @@
+// |reftest| skip-if(!this.hasOwnProperty('Reflect')||!Reflect.parse) -- uses Reflect.parse(..., { loc: true}) to trigger the column-computing API
+/*
+ * Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/licenses/publicdomain/
+ */
+
+//-----------------------------------------------------------------------------
+var BUGNUMBER = 1551916;
+var summary =
+  "Optimize computing a column number as count of code points by caching " +
+  "column numbers (and whether each chunk might contain anything multi-unit) " +
+  "and counting forward from them";
+
+print(BUGNUMBER + ": " + summary);
+
+/**************
+ * BEGIN TEST *
+ **************/
+
+// Various testing of column-number computations, with respect to counting as
+// code points or units, for very long lines.
+//
+// This test should be valid regardless whether
+// |JS_COLUMN_DIMENSION_IS_CODE_POINTS()| is 0 or 1.  It also *should* pass no
+// matter what valid value |TokenStreamAnyChars::ColumnChunkLength| takes (it
+// must be at least 4 so that the maximum-length code point in UTF-8/16 will fit
+// in a single chunk), because the value of that constant should be externally
+// invisible save for perf effects. (As a result, recompiling and running this
+// test with a variety of different values assigned to that constant is a good
+// smoke-test of column number computation, that doesn't require having to
+// closely inspect any column-related code.)
+//
+// However, this test is structured on the assumption that that constant has the
+// value 128, in order to exercise in targeted fashion various column number
+// computation edge cases.
+//
+// All this testing *could* be written to not be done with |Reflect.parse| --
+// backwards column computations happen even when compiling normal code, in some
+// cases.  But it's much more the exception than the rule.  And |Reflect.parse|
+// has *very* predictable column-computation operations (basically start/end
+// coordinates are computed immediately when the end of an AST node is reached)
+// that make it easier to recognize what the exact pattern of computations for
+// which offsets will look like.
+
+// Helper function for checking node location tersely.
+function checkLoc(node, expectedStart, expectedEnd)
+{
+  let start = node.loc.start;
+
+  assertEq(start.line, expectedStart[0],
+           "start line number must be as expected");
+  assertEq(start.column, expectedStart[1],
+           "start column number must be as expected");
+
+  let end = node.loc.end;
+
+  assertEq(end.line, expectedEnd[0], "end line number must be as expected");
+  assertEq(end.column, expectedEnd[1],
+           "end column number must be as expected");
+}
+
+function lengthInCodePoints(str)
+{
+  return [...str].length;
+}
+
+// True if column numbers are counts of code points, false otherwise.  This
+// constant can be used to short-circuit testing that isn't point/unit-agnostic.
+const columnsAreCodePoints = (function()
+{
+  var columnTypes = [];
+
+  function checkColumn(actual, expectedPoints, expectedUnits)
+  {
+    if (actual === expectedPoints)
+      columnTypes.push("p");
+    else if (actual === expectedUnits)
+      columnTypes.push("u");
+    else
+      columnTypes.push("x");
+  }
+
+  var script = Reflect.parse('"😱😱😱😱";', { loc: true });
+  assertEq(script.type, "Program");
+  assertEq(script.loc.start.line, 1);
+  assertEq(script.loc.end.line, 1);
+  assertEq(script.loc.start.column, 0);
+  checkColumn(script.loc.end.column, 7, 11);
+
+  var body = script.body;
+  assertEq(body.length, 1);
+
+  var stmt = body[0];
+  assertEq(stmt.type, "ExpressionStatement");
+  assertEq(stmt.loc.start.line, 1);
+  assertEq(stmt.loc.end.line, 1);
+  assertEq(stmt.loc.start.column, 0);
+  checkColumn(stmt.loc.end.column, 7, 11);
+
+  var expr = stmt.expression;
+  assertEq(expr.type, "Literal");
+  assertEq(expr.value, "😱😱😱😱");
+  assertEq(expr.loc.start.line, 1);
+  assertEq(expr.loc.end.line, 1);
+  assertEq(expr.loc.start.column, 0);
+  checkColumn(expr.loc.end.column, 6, 10);
+
+  var checkResult = columnTypes.join(",");
+
+  assertEq(checkResult === "p,p,p" || checkResult === "u,u,u", true,
+           "columns must be wholly code points or units: " + checkResult);
+
+  return checkResult === "p,p,p";
+})();
+
+// Start with some basic sanity-testing, without regard to exactly when, how, or
+// in what order (offset => column) computations are performed.
+function testSimple()
+{
+  if (!columnsAreCodePoints)
+    return;
+
+  // Array elements within the full |simpleCode| string constructed below are
+  // one-element arrays containing the string "😱😱#x" where "#" is the
+  // character that, in C++, could be written as |'(' + i| where |i| is the
+  // index of the array within the outer array.
+  let simpleCodeArray =
+    [
+      'var Q = [[',  // column 0, offset 0
+      // REPEAT
+      '"😱😱(x"],["',  // column 10, offset 10
+      '😱😱)x"],["😱',  // column 20, offset 22
+      '😱*x"],["😱😱',  // column 30, offset 35
+      '+x"],["😱😱,',  // column 40, offset 48
+      'x"],["😱😱-x',  // column 50, offset 60
+      '"],["😱😱.x"',  // column 60, offset 72
+      '],["😱😱/x"]',  // column 70, offset 84
+      ',["😱😱0x"],',  // column 80, offset 96
+      '["😱😱1x"],[',  // column 90, offset 108
+      // REPEAT
+      '"😱😱2x"],["',  // column 100, offset 120 -- chunk limit between "]
+      '😱😱3x"],["😱',  // column 110, offset 132
+      '😱4x"],["😱😱',  // column 120, offset 145
+      '5x"],["😱😱6',  // column 130, offset 158
+      'x"],["😱😱7x',  // column 140, offset 170
+      '"],["😱😱8x"',  // column 150, offset 182
+      '],["😱😱9x"]',  // column 160, offset 194
+      ',["😱😱:x"],',  // column 170, offset 206
+      '["😱😱;x"],[',  // column 180, offset 218
+      // REPEAT
+      '"😱😱<x"],["',  // column 190, offset 230
+      '😱😱=x"],["😱',  // column 200, offset 242
+      '😱>x"],["😱😱',  // column 210, offset 255 -- chunk limit splits first 😱
+      '?x"],["😱😱@',  // column 220, offset 268
+      'x"],["😱😱Ax',  // column 230, offset 280
+      '"],["😱😱Bx"',  // column 240, offset 292
+      '],["😱😱Cx"]',  // column 250, offset 304
+      ',["😱😱Dx"],',  // column 260, offset 316
+      '["😱😱Ex"],[',  // column 270, offset 328
+      // REPEAT
+      '"😱😱Fx"],["',  // column 280, offset 340
+      '😱😱Gx"],["😱',  // column 290, offset 352
+      '😱Hx"],["😱😱',  // column 300, offset 365
+      'Ix"],["😱😱J',  // column 310, offset 378 -- chunk limit between ["
+      'x"],["😱😱Kx',  // column 320, offset 390
+      '"],["😱😱Lx"',  // column 330, offset 402
+      '],["😱😱Mx"]',  // column 340, offset 414
+      ',["😱😱Nx"],',  // column 350, offset 426
+      '["😱😱Ox"]];',  // column 360 (10 long), offset 438 (+12 to end)
+    ];
+  let simpleCode = simpleCodeArray.join("");
+
+  // |simpleCode| overall contains this many code points.  (This number is
+  // chosen to be several |TokenStreamAnyChars::ColumnChunkLength = 128| chunks
+  // long so that long-line handling is exercised, and the relevant vector
+  // increased in length, for more than one chunk [which would be too short to
+  // trigger chunking] and for more than two chunks [so that vector extension
+  // will eventually occur].)
+  const CodePointLength = 370;
+
+  assertEq(lengthInCodePoints(simpleCode), CodePointLength,
+           "code point count should be correct");
+
+  // |simpleCodeArray| contains this many REPEAT-delimited cycles.
+  const RepetitionNumber = 4;
+
+  // Each cycle consists of this many elements.
+  const ElementsPerCycle = 9;
+
+  // Each element in a cycle has at least this many 😱.
+  const MinFaceScreamingPerElementInCycle = 2;
+
+  // Each cycle consists of many elements with three 😱.
+  const ElementsInCycleWithThreeFaceScreaming = 2;
+
+  // Compute the overall number of UTF-16 code units.  (UTF-16 because this is a
+  // JS string as input.)
+  const OverallCodeUnitCount =
+    CodePointLength +
+    RepetitionNumber * (ElementsPerCycle * MinFaceScreamingPerElementInCycle +
+                        ElementsInCycleWithThreeFaceScreaming);
+
+  // Code units != code points.
+  assertEq(OverallCodeUnitCount > CodePointLength, true,
+           "string contains code points outside BMP, so length in units " +
+           "exceeds length in points");
+
+  // The overall computed number of code units has this exact numeric value.
+  assertEq(OverallCodeUnitCount, 450,
+           "code unit count computation produces this value");
+
+  // The overall computed number of code units matches the string length.
+  assertEq(simpleCode.length, OverallCodeUnitCount, "string length must match");
+
+  // Evaluate the string.
+  var Q;
+  eval(simpleCode);
+
+  // Verify characteristics of the resulting execution.
+  assertEq(Array.isArray(Q), true);
+
+  const NumArrayElements = 40;
+  assertEq(Q.length, NumArrayElements);
+  Q.forEach((v, i) => {
+    assertEq(Array.isArray(v), true);
+    assertEq(v.length, 1);
+    assertEq(v[0], "😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x");
+  });
+
+  let parseTree = Reflect.parse(simpleCode, { loc: true });
+
+  // Check the overall script.
+  assertEq(parseTree.type, "Program");
+  checkLoc(parseTree, [1, 0], [1, 370]);
+  assertEq(parseTree.body.length, 1);
+
+  // Check the coordinates of the declaration.
+  let varDecl = parseTree.body[0];
+  assertEq(varDecl.type, "VariableDeclaration");
+  checkLoc(varDecl, [1, 0], [1, 369]);
+
+  // ...and its initializing expression.
+  let varInit = varDecl.declarations[0].init;
+  assertEq(varInit.type, "ArrayExpression");
+  checkLoc(varInit, [1, 8], [1, 369]);
+
+  // ...and then every literal inside it.
+  assertEq(varInit.elements.length, NumArrayElements, "array literal length");
+
+  const ItemLength = lengthInCodePoints('["😱😱#x"],');
+  assertEq(ItemLength, 9, "item length check");
+
+  for (let i = 0; i < NumArrayElements; i++)
+  {
+    let elem = varInit.elements[i];
+    assertEq(elem.type, "ArrayExpression");
+
+    let startCol = 9 + i * ItemLength;
+    let endCol = startCol + ItemLength - 1;
+    checkLoc(elem, [1, startCol], [1, endCol]);
+
+    let arrayElems = elem.elements;
+    assertEq(arrayElems.length, 1);
+
+    let str = arrayElems[0];
+    assertEq(str.type, "Literal");
+    assertEq(str.value,
+             "😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x");
+    checkLoc(str, [1, startCol + 1], [1, endCol - 1]);
+  }
+}
+testSimple();
+
+// Test |ChunkInfo::unitsType() == UnitsType::GuaranteedSingleUnit| -- not that
+// it should be observable, precisely, but effects of mis-applying or
+// miscomputing it would in principle be observable if such were happening.
+// This test also is intended to to be useful for (manually, in a debugger)
+// verifying that the optimization is computed and kicks in correctly.
+function testGuaranteedSingleUnit()
+{
+  if (!columnsAreCodePoints)
+    return;
+
+  // Begin a few array literals in a first chunk to test column computation in
+  // that first chunk.
+  //
+  // End some of them in the first chunk to test columns *before* we know we
+  // have a long line.
+  //
+  // End one array *outside* the first chunk to test a computation inside a
+  // first chunk *after* we know we have a long line and have computed a first
+  // chunk.
+  let mixedChunksCode = "var Z = [ [ [],"; // column 0, offset 0
+  assertEq(mixedChunksCode.length, 15);
+  assertEq(lengthInCodePoints(mixedChunksCode), 15);
+
+  mixedChunksCode +=
+    " ".repeat(128 - mixedChunksCode.length); // column 15, offset 15
+  assertEq(mixedChunksCode.length, 128);
+  assertEq(lengthInCodePoints(mixedChunksCode), 128);
+
+  // Fill out a second chunk as also single-unit, with an outer array literal
+  // that begins in this chunk but finishes in the next (to test column
+  // computation in a prior, guaranteed-single-unit chunk).
+  mixedChunksCode += "[" + "[],".repeat(42) + " "; // column 128, offset 128
+  assertEq(mixedChunksCode.length, 256);
+  assertEq(lengthInCodePoints(mixedChunksCode), 256);
+
+  // Add a third chunk with one last empty nested array literal (so that we
+  // tack on another chunk, and conclude the second chunk is single-unit, before
+  // closing the enclosing array literal).  Then close the enclosing array
+  // literal.  Finally start a new string literal element containing
+  // multi-unit code points.  For good measure, make the chunk *end* in the
+  // middle of such a code point, so that the relevant chunk limit must be
+  // retracted one code unit.
+  mixedChunksCode += "[] ], '" + "😱".repeat(61); // column 256, offset 256
+  assertEq(mixedChunksCode.length, 384 + 1);
+  assertEq(lengthInCodePoints(mixedChunksCode), 324);
+
+  // Wrap things up.  Terminate the string, then terminate the nested array
+  // literal to trigger a column computation within the first chunk that can
+  // benefit from knowing the first chunk is all single-unit.  Next add a *new*
+  // element to the outermost array, a string literal that contains a line
+  // terminator.  The terminator invalidates the column computation cache, so
+  // when the outermost array is closed, location info for it will not hit the
+  // cache.  Finally, tack on the terminating semicolon for good measure.
+  mixedChunksCode += "' ], '\u2028' ];"; // column 324, offset 385
+  assertEq(mixedChunksCode.length, 396);
+  assertEq(lengthInCodePoints(mixedChunksCode), 335);
+
+  let parseTree = Reflect.parse(mixedChunksCode, { loc: true });
+
+  // Check the overall script.
+  assertEq(parseTree.type, "Program");
+  checkLoc(parseTree, [1, 0], [2, 4]);
+  assertEq(parseTree.body.length, 1);
+
+  // Check the coordinates of the declaration.
+  let varDecl = parseTree.body[0];
+  assertEq(varDecl.type, "VariableDeclaration");
+  checkLoc(varDecl, [1, 0], [2, 3]);
+
+  // ...and its initializing expression.
+  let varInit = varDecl.declarations[0].init;
+  assertEq(varInit.type, "ArrayExpression");
+  checkLoc(varInit, [1, 8], [2, 3]);
+
+  let outerArrayElements = varInit.elements;
+  assertEq(outerArrayElements.length, 2);
+
+  {
+    // Next the first element, the array inside the initializing expression.
+    let nestedArray = varInit.elements[0];
+    assertEq(nestedArray.type, "ArrayExpression");
+    checkLoc(nestedArray, [1, 10], [1, 327]);
+
+    // Now inside that nested array.
+    let nestedArrayElements = nestedArray.elements;
+    assertEq(nestedArrayElements.length, 3);
+
+    // First the [] in chunk #0
+    let emptyArray = nestedArrayElements[0];
+    assertEq(emptyArray.type, "ArrayExpression");
+    assertEq(emptyArray.elements.length, 0);
+    checkLoc(emptyArray, [1, 12], [1, 14]);
+
+    // Then the big array of empty arrays starting in chunk #1 and ending just
+    // barely in chunk #2.
+    let bigArrayOfEmpties = nestedArrayElements[1];
+    assertEq(bigArrayOfEmpties.type, "ArrayExpression");
+    assertEq(bigArrayOfEmpties.elements.length, 42 + 1);
+    bigArrayOfEmpties.elements.forEach((elem, i) => {
+      assertEq(elem.type, "ArrayExpression");
+      assertEq(elem.elements.length, 0);
+      if (i !== 42)
+        checkLoc(elem, [1, 129 + i * 3], [1, 131 + i * 3]);
+      else
+        checkLoc(elem, [1, 256], [1, 258]); // last element was hand-placed
+    });
+
+    // Then the string literal of multi-unit code points beginning in chunk #2
+    // and ending just into chunk #3 on a second line.
+    let multiUnitStringLiteral = nestedArrayElements[2];
+    assertEq(multiUnitStringLiteral.type, "Literal");
+    assertEq(multiUnitStringLiteral.value, "😱".repeat(61));
+    checkLoc(multiUnitStringLiteral, [1, 262], [1, 325]);
+  }
+
+  {
+    // Finally, the string literal containing a line terminator as element in
+    // the outermost array.
+    let stringLiteralWithEmbeddedTerminator = outerArrayElements[1];
+    assertEq(stringLiteralWithEmbeddedTerminator.type, "Literal");
+    assertEq(stringLiteralWithEmbeddedTerminator.value, "\u2028");
+    checkLoc(stringLiteralWithEmbeddedTerminator, [1, 329], [2, 1]);
+  }
+}
+testGuaranteedSingleUnit();
+
+if (typeof reportCompare === "function")
+  reportCompare(true, true);
+
+print("Testing completed");