Bug 1056516 - let auto hyphen honor manual hyphen when hyphens:auto is set. r=jfkthame
authorJeremy Chen <jeremychen@mozilla.com>
Mon, 13 Mar 2017 12:54:05 +0800
changeset 497506 31ceca5176daa892a27ff4fc4ca4db292f342aad
parent 497505 35671fd90af7cc1189587721004fb3d6a9f7fa75
child 497507 f50781ae2a854c844a7bd38c4d80b7c93cd22a11
push id48933
push userbmo:james@hoppipolla.co.uk
push dateMon, 13 Mar 2017 13:53:04 +0000
reviewersjfkthame
bugs1056516
milestone55.0a1
Bug 1056516 - let auto hyphen honor manual hyphen when hyphens:auto is set. r=jfkthame MozReview-Commit-ID: F3ALAJRHned
gfx/thebes/gfxTextRun.cpp
gfx/thebes/gfxTextRun.h
layout/generic/nsTextFrame.cpp
--- a/gfx/thebes/gfxTextRun.cpp
+++ b/gfx/thebes/gfxTextRun.cpp
@@ -836,16 +836,83 @@ gfxTextRun::MeasureText(Range aRange,
             iter.GetGlyphRun()->mOrientation, &accumulatedMetrics);
     }
 
     return accumulatedMetrics;
 }
 
 #define MEASUREMENT_BUFFER_SIZE 100
 
+void
+gfxTextRun::ClassifyAutoHyphenations(uint32_t aStart, Range aRange,
+                                     nsTArray<HyphenType>& aHyphenBuffer,
+                                     HyphenationState* aWordState)
+{
+  NS_PRECONDITION(aRange.end - aStart <= aHyphenBuffer.Length() &&
+                  aRange.start >= aStart, "Range out of bounds");
+  MOZ_ASSERT(aWordState->mostRecentBoundary >= aStart,
+             "Unexpected aMostRecentWordBoundary!!");
+
+  uint32_t start = std::min<uint32_t>(aRange.start, aWordState->mostRecentBoundary);
+
+  for (uint32_t i = start; i < aRange.end; ++i) {
+    if (aHyphenBuffer[i - aStart] == HyphenType::Explicit &&
+        !aWordState->hasExplicitHyphen) {
+      aWordState->hasExplicitHyphen = true;
+    }
+    if (!aWordState->hasManualHyphen &&
+        (aHyphenBuffer[i - aStart] == HyphenType::Soft ||
+         aHyphenBuffer[i - aStart] == HyphenType::Explicit)) {
+      aWordState->hasManualHyphen = true;
+      // This is the first manual hyphen in the current word. We can only
+      // know if the current word has a manual hyphen until now. So, we need
+      // to run a sub loop to update the auto hyphens between the start of
+      // the current word and this manual hyphen.
+      if (aWordState->hasAutoHyphen) {
+        for (uint32_t j = aWordState->mostRecentBoundary; j < i; j++) {
+          if (aHyphenBuffer[j - aStart] == HyphenType::AutoWithoutManualInSameWord) {
+            aHyphenBuffer[j - aStart] = HyphenType::AutoWithManualInSameWord;
+          }
+        }
+      }
+    }
+    if (aHyphenBuffer[i - aStart] == HyphenType::AutoWithoutManualInSameWord) {
+      if (!aWordState->hasAutoHyphen) {
+        aWordState->hasAutoHyphen = true;
+      }
+      if (aWordState->hasManualHyphen) {
+        aHyphenBuffer[i - aStart] = HyphenType::AutoWithManualInSameWord;
+      }
+    }
+
+    // If we're at the word boundary, clear/reset couple states.
+    if (mCharacterGlyphs[i].CharIsSpace() ||
+        mCharacterGlyphs[i].CharIsTab() ||
+        mCharacterGlyphs[i].CharIsNewline() ||
+        // Since we will not have a boundary in the end of the string, let's
+        // call the end of the string a special case for word boundary.
+        i == GetLength() - 1) {
+      // We can only get to know whether we should raise/clear an explicit
+      // manual hyphen until we get to the end of a word, because this depends
+      // on whether there exists at least one auto hyphen in the same word.
+      if (!aWordState->hasAutoHyphen && aWordState->hasExplicitHyphen) {
+        for (uint32_t j = aWordState->mostRecentBoundary; j <= i; j++) {
+          if (aHyphenBuffer[j - aStart] == HyphenType::Explicit) {
+            aHyphenBuffer[j - aStart] = HyphenType::None;
+          }
+        }
+      }
+      aWordState->mostRecentBoundary = i;
+      aWordState->hasManualHyphen = false;
+      aWordState->hasAutoHyphen = false;
+      aWordState->hasExplicitHyphen = false;
+    }
+  }
+}
+
 uint32_t
 gfxTextRun::BreakAndMeasureText(uint32_t aStart, uint32_t aMaxLength,
                                 bool aLineBreakBefore, gfxFloat aWidth,
                                 PropertyProvider *aProvider,
                                 SuppressBreak aSuppressBreak,
                                 gfxFloat *aTrimWhitespace,
                                 bool aWhitespaceCanHang,
                                 Metrics *aMetrics,
@@ -863,46 +930,60 @@ gfxTextRun::BreakAndMeasureText(uint32_t
     Range bufferRange(aStart, aStart +
         std::min<uint32_t>(aMaxLength, MEASUREMENT_BUFFER_SIZE));
     PropertyProvider::Spacing spacingBuffer[MEASUREMENT_BUFFER_SIZE];
     bool haveSpacing = aProvider && (mFlags & gfxTextRunFactory::TEXT_ENABLE_SPACING) != 0;
     if (haveSpacing) {
         GetAdjustedSpacing(this, bufferRange, aProvider, spacingBuffer);
     }
     AutoTArray<HyphenType, 4096> hyphenBuffer;
+    HyphenationState wordState;
+    wordState.mostRecentBoundary = aStart;
     bool haveHyphenation = aProvider &&
         (aProvider->GetHyphensOption() == StyleHyphens::Auto ||
          (aProvider->GetHyphensOption() == StyleHyphens::Manual &&
           (mFlags & gfxTextRunFactory::TEXT_ENABLE_HYPHEN_BREAKS) != 0));
     if (haveHyphenation) {
         if (hyphenBuffer.AppendElements(bufferRange.Length(), fallible)) {
             aProvider->GetHyphenationBreaks(bufferRange, hyphenBuffer.Elements());
+            if (aProvider->GetHyphensOption() == StyleHyphens::Auto) {
+                ClassifyAutoHyphenations(aStart, bufferRange, hyphenBuffer,
+                                         &wordState);
+            }
         } else {
             haveHyphenation = false;
         }
     }
 
     gfxFloat width = 0;
     gfxFloat advance = 0;
     // The number of space characters that can be trimmed or hang at a soft-wrap
     uint32_t trimmableChars = 0;
     // The amount of space removed by ignoring trimmableChars
     gfxFloat trimmableAdvance = 0;
     int32_t lastBreak = -1;
     int32_t lastBreakTrimmableChars = -1;
     gfxFloat lastBreakTrimmableAdvance = -1;
+    // Cache the last candidate break
+    int32_t lastCandidateBreak = -1;
+    int32_t lastCandidateBreakTrimmableChars = -1;
+    gfxFloat lastCandidateBreakTrimmableAdvance = -1;
+    bool lastCandidateBreakUsedHyphenation = false;
+    gfxBreakPriority lastCandidateBreakPriority = gfxBreakPriority::eNoBreak;
     bool aborted = false;
     uint32_t end = aStart + aMaxLength;
     bool lastBreakUsedHyphenation = false;
-
     Range ligatureRange(aStart, end);
     ShrinkToLigatureBoundaries(&ligatureRange);
 
-    uint32_t i;
-    for (i = aStart; i < end; ++i) {
+    // We may need to move `i` backwards in the following loop, and re-scan
+    // part of the textrun; we'll use `rescanLimit` so we can tell when that
+    // is happening: if `i < rescanLimit` then we're rescanning.
+    uint32_t rescanLimit = aStart;
+    for (uint32_t i = aStart; i < end; ++i) {
         if (i >= bufferRange.end) {
             // Fetch more spacing and hyphenation data
             uint32_t oldHyphenBufferLength = hyphenBuffer.Length();
             bufferRange.start = i;
             bufferRange.end = std::min(aStart + aMaxLength,
                                        i + MEASUREMENT_BUFFER_SIZE);
             // For spacing, we always overwrite the old data with the newly
             // fetched one. However, for hyphenation, hyphenation data sometimes
@@ -915,62 +996,106 @@ gfxTextRun::BreakAndMeasureText(uint32_t
             // when getting more data.
             if (haveSpacing) {
                 GetAdjustedSpacing(this, bufferRange, aProvider, spacingBuffer);
             }
             if (haveHyphenation) {
                 if (hyphenBuffer.AppendElements(bufferRange.Length(), fallible)) {
                     aProvider->GetHyphenationBreaks(
                         bufferRange, hyphenBuffer.Elements() + oldHyphenBufferLength);
+                    if (aProvider->GetHyphensOption() == StyleHyphens::Auto) {
+                        uint32_t prevMostRecentWordBoundary = wordState.mostRecentBoundary;
+                        ClassifyAutoHyphenations(aStart, bufferRange, hyphenBuffer,
+                                                 &wordState);
+                        // If the buffer boundary is in the middle of a word,
+                        // we need to go back to the start of the current word.
+                        // So, we can correct the wrong candidates that we set
+                        // in the previous runs of the loop.
+                        if (prevMostRecentWordBoundary < oldHyphenBufferLength) {
+                            rescanLimit = i;
+                            i = prevMostRecentWordBoundary - 1;
+                            continue;
+                        }
+                    }
                 } else {
                     haveHyphenation = false;
                 }
             }
         }
 
         // There can't be a word-wrap break opportunity at the beginning of the
         // line: if the width is too small for even one character to fit, it
         // could be the first and last break opportunity on the line, and that
         // would trigger an infinite loop.
         if (aSuppressBreak != eSuppressAllBreaks &&
             (aSuppressBreak != eSuppressInitialBreak || i > aStart)) {
             bool atNaturalBreak = mCharacterGlyphs[i].CanBreakBefore() == 1;
             bool atHyphenationBreak = !atNaturalBreak && haveHyphenation &&
                 hyphenBuffer[i - aStart] != HyphenType::None;
+            bool atAutoHyphenWithManualHyphenInSameWord = atHyphenationBreak &&
+                hyphenBuffer[i - aStart] == HyphenType::AutoWithManualInSameWord;
             bool atBreak = atNaturalBreak || atHyphenationBreak;
             bool wordWrapping =
                 aCanWordWrap && mCharacterGlyphs[i].IsClusterStart() &&
                 *aBreakPriority <= gfxBreakPriority::eWordWrapBreak;
 
             if (atBreak || wordWrapping) {
                 gfxFloat hyphenatedAdvance = advance;
                 if (atHyphenationBreak) {
                     hyphenatedAdvance += aProvider->GetHyphenWidth();
                 }
 
-                if (lastBreak < 0 || width + hyphenatedAdvance - trimmableAdvance <= aWidth) {
+                if (lastBreak < 0 ||
+                    width + hyphenatedAdvance - trimmableAdvance <= aWidth) {
                     // We can break here.
                     lastBreak = i;
                     lastBreakTrimmableChars = trimmableChars;
                     lastBreakTrimmableAdvance = trimmableAdvance;
                     lastBreakUsedHyphenation = atHyphenationBreak;
                     *aBreakPriority = atBreak ? gfxBreakPriority::eNormalBreak
                                               : gfxBreakPriority::eWordWrapBreak;
                 }
 
                 width += advance;
                 advance = 0;
                 if (width - trimmableAdvance > aWidth) {
                     // No more text fits. Abort
                     aborted = true;
                     break;
                 }
+                // There are various kinds of break opportunities:
+                // 1. word wrap break,
+                // 2. natural break,
+                // 3. manual hyphenation break,
+                // 4. auto hyphenation break without any manual hyphenation
+                //    in the same word,
+                // 5. auto hyphenation break with another manual hyphenation
+                //    in the same word.
+                // Allow all of them except the last one to be a candidate.
+                // So, we can ensure that we don't use an automatic
+                // hyphenation opportunity within a word that contains another
+                // manual hyphenation, unless it is the only choice.
+                if (wordWrapping ||
+                    !atAutoHyphenWithManualHyphenInSameWord) {
+                    lastCandidateBreak = lastBreak;
+                    lastCandidateBreakTrimmableChars = lastBreakTrimmableChars;
+                    lastCandidateBreakTrimmableAdvance = lastBreakTrimmableAdvance;
+                    lastCandidateBreakUsedHyphenation = lastBreakUsedHyphenation;
+                    lastCandidateBreakPriority = *aBreakPriority;
+                }
             }
         }
 
+        // If we're re-scanning part of a word (to re-process potential
+        // hyphenation types) then we don't want to accumulate widths again
+        // for the characters that were already added to `advance`.
+        if (i < rescanLimit) {
+            continue;
+        }
+
         gfxFloat charAdvance;
         if (i >= ligatureRange.start && i < ligatureRange.end) {
             charAdvance = GetAdvanceForGlyphs(Range(i, i + 1));
             if (haveSpacing) {
                 PropertyProvider::Spacing *space =
                     &spacingBuffer[i - bufferRange.start];
                 charAdvance += space->mBefore + space->mAfter;
             }
@@ -999,16 +1124,23 @@ gfxTextRun::BreakAndMeasureText(uint32_t
     // 1) all the text fit (width <= aWidth)
     // 2) some of the text fit up to a break opportunity (width > aWidth && lastBreak >= 0)
     // 3) none of the text fits before a break opportunity (width > aWidth && lastBreak < 0)
     uint32_t charsFit;
     bool usedHyphenation = false;
     if (width - trimmableAdvance <= aWidth) {
         charsFit = aMaxLength;
     } else if (lastBreak >= 0) {
+        if (lastCandidateBreak >= 0 && lastCandidateBreak != lastBreak) {
+            lastBreak = lastCandidateBreak;
+            lastBreakTrimmableChars = lastCandidateBreakTrimmableChars;
+            lastBreakTrimmableAdvance = lastCandidateBreakTrimmableAdvance;
+            lastBreakUsedHyphenation = lastCandidateBreakUsedHyphenation;
+            *aBreakPriority = lastCandidateBreakPriority;
+        }
         charsFit = lastBreak - aStart;
         trimmableChars = lastBreakTrimmableChars;
         trimmableAdvance = lastBreakTrimmableAdvance;
         usedHyphenation = lastBreakUsedHyphenation;
     } else {
         charsFit = aMaxLength;
     }
 
--- a/gfx/thebes/gfxTextRun.h
+++ b/gfx/thebes/gfxTextRun.h
@@ -172,18 +172,27 @@ public:
      * @return true if this changed the linebreaks, false if the new line
      * breaks are the same as the old
      */
     virtual bool SetPotentialLineBreaks(Range aRange,
                                         const uint8_t* aBreakBefore);
 
     enum class HyphenType : uint8_t {
       None,
-      Manual,
-      Auto
+      Explicit,
+      Soft,
+      AutoWithManualInSameWord,
+      AutoWithoutManualInSameWord
+    };
+
+    struct HyphenationState {
+      uint32_t mostRecentBoundary = 0;
+      bool     hasManualHyphen = false;
+      bool     hasExplicitHyphen = false;
+      bool     hasAutoHyphen = false;
     };
 
     /**
      * Layout provides PropertyProvider objects. These allow detection of
      * potential line break points and computation of spacing. We pass the data
      * this way to allow lazy data acquisition; for example BreakAndMeasureText
      * will want to only ask for properties of text it's actually looking at.
      *
@@ -339,16 +348,20 @@ public:
     enum SuppressBreak {
       eNoSuppressBreak,
       // Measure the range of text as if there is no break before it.
       eSuppressInitialBreak,
       // Measure the range of text as if it contains no break
       eSuppressAllBreaks
     };
 
+    void ClassifyAutoHyphenations(uint32_t aStart, Range aRange,
+                                  nsTArray<HyphenType>& aHyphenBuffer,
+                                  HyphenationState* aWordState);
+
     /**
      * Finds the longest substring that will fit into the given width.
      * Uses GetHyphenationBreaks and GetSpacing from aBreakProvider.
      * Guarantees the following:
      * -- 0 <= result <= aMaxLength
      * -- result is the maximal value of N such that either
      *       N < aMaxLength && line break at N && GetAdvanceWidth(aStart, N) <= aWidth
      *   OR  N < aMaxLength && hyphen break at N && GetAdvanceWidth(aStart, N) + GetHyphenWidth() <= aWidth
--- a/layout/generic/nsTextFrame.cpp
+++ b/layout/generic/nsTextFrame.cpp
@@ -3177,17 +3177,16 @@ public:
 
   /**
    * Compute the justification information in given DOM range, return
    * justification info and assignments if requested.
    */
   JustificationInfo ComputeJustification(
     Range aRange, nsTArray<JustificationAssignment>* aAssignments = nullptr);
 
-  const nsStyleText* StyleText() { return mTextStyle; }
   nsTextFrame* GetFrame() { return mFrame; }
   // This may not be equal to the frame offset/length in because we may have
   // adjusted for whitespace trimming according to the state bits set in the frame
   // (for the static provider)
   const gfxSkipCharsIterator& GetStart() const { return mStart; }
   // May return INT32_MAX if that was given to the constructor
   uint32_t GetOriginalLength() const {
     NS_ASSERTION(mLength != INT32_MAX, "Length not known");
@@ -3592,16 +3591,26 @@ gfxFloat
 PropertyProvider::GetHyphenWidth()
 {
   if (mHyphenWidth < 0) {
     mHyphenWidth = GetFontGroup()->GetHyphenWidth(this);
   }
   return mHyphenWidth + mLetterSpacing;
 }
 
+static inline bool
+IS_HYPHEN(char16_t u)
+{
+  return (u == char16_t('-') ||
+          u == 0x058A || // ARMENIAN HYPHEN
+          u == 0x2010 || // HYPHEN
+          u == 0x2012 || // FIGURE DASH
+          u == 0x2013);  // EN DASH
+}
+
 void
 PropertyProvider::GetHyphenationBreaks(Range aRange, HyphenType* aBreakBefore)
 {
   NS_PRECONDITION(IsInBounds(mStart, mLength, aRange), "Range out of bounds");
   NS_PRECONDITION(mLength != INT32_MAX, "Can't call this with undefined length");
 
   if (!mTextStyle->WhiteSpaceCanWrap(mFrame) ||
       mTextStyle->mHyphens == StyleHyphens::None)
@@ -3637,26 +3646,34 @@ PropertyProvider::GetHyphenationBreaks(R
       memset(aBreakBefore + runOffsetInSubstring,
              static_cast<uint8_t>(HyphenType::None),
              run.GetRunLength() * sizeof(HyphenType));
       // Don't allow hyphen breaks at the start of the line
       aBreakBefore[runOffsetInSubstring] =
           allowHyphenBreakBeforeNextChar &&
           (!(mFrame->GetStateBits() & TEXT_START_OF_LINE) ||
            run.GetSkippedOffset() > mStart.GetSkippedOffset())
-          ? HyphenType::Manual
+          ? HyphenType::Soft
           : HyphenType::None;
       allowHyphenBreakBeforeNextChar = false;
     }
   }
 
   if (mTextStyle->mHyphens == StyleHyphens::Auto) {
     for (uint32_t i = 0; i < aRange.Length(); ++i) {
-      if (mTextRun->CanHyphenateBefore(aRange.start + i)) {
-        aBreakBefore[i] = HyphenType::Auto;
+      int32_t fragIndex = mFrag->GetLength() > aRange.end ?
+                          aRange.start + i : i;
+      if (IS_HYPHEN(mFrag->CharAt(fragIndex))) {
+        aBreakBefore[i] = HyphenType::Explicit;
+        continue;
+      }
+
+      if (mTextRun->CanHyphenateBefore(aRange.start + i) &&
+          aBreakBefore[i] == HyphenType::None) {
+        aBreakBefore[i] = HyphenType::AutoWithoutManualInSameWord;
       }
     }
   }
 }
 
 void
 PropertyProvider::InitializeForDisplay(bool aTrimAfter)
 {