bug 911849 - Unicode characters U+FEFF (zero-width no-break space) and U+2060 (word joiner) should inhibit line-breaking. r=masayuki
authorJonathan Kew <jkew@mozilla.com>
Thu, 05 Sep 2013 14:23:03 +0100
changeset 145658 2e7256075903ff62ae3f62f03cb95885db467f28
parent 145657 4880f320522131843c7b7fd302c47b75c32e0eeb
child 145659 73be965472ba42c09b737d09e610ec3040652f3c
push id33343
push userjkew@mozilla.com
push dateThu, 05 Sep 2013 13:25:43 +0000
treeherdermozilla-inbound@73be965472ba [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmasayuki
bugs911849
milestone26.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
bug 911849 - Unicode characters U+FEFF (zero-width no-break space) and U+2060 (word joiner) should inhibit line-breaking. r=masayuki
intl/lwbrk/src/jisx4051class.h
intl/lwbrk/src/nsJISx4051LineBreaker.cpp
intl/lwbrk/tools/anzx4051.html
intl/lwbrk/tools/jisx4051class.txt
--- a/intl/lwbrk/src/jisx4051class.h
+++ b/intl/lwbrk/src/jisx4051class.h
@@ -49,17 +49,17 @@ 0x77A777A7, // U+2018 - U+201F
 0xAAAA7777, // U+2020 - U+2027
 0xB7777777, // U+2028 - U+202F
 0x77744444, // U+2030 - U+2037
 0x7A115107, // U+2038 - U+203F
 0x11017777, // U+2040 - U+2047
 0x77777711, // U+2048 - U+204F
 0x77777777, // U+2050 - U+2057
 0x57777777, // U+2058 - U+205F
-0x77777777, // U+2060 - U+2067
+0x7777777B, // U+2060 - U+2067
 0x77777777, // U+2068 - U+206F
 0x77777777, // U+2070 - U+2077
 0x77777777, // U+2078 - U+207F
 0x77777777, // U+2080 - U+2087
 0x77777777, // U+2088 - U+208F
 0x77777777, // U+2090 - U+2097
 0x77777777, // U+2098 - U+209F
 0x77777777, // U+20A0 - U+20A7
--- a/intl/lwbrk/src/nsJISx4051LineBreaker.cpp
+++ b/intl/lwbrk/src/nsJISx4051LineBreaker.cpp
@@ -492,16 +492,18 @@ GetClass(PRUnichar u)
      else
        c = CLASS_CHARACTER;
    } else if (0x1600 == h) {
      if (0x80 == l) { // U+1680 OGHAM SPACE MARK
        c = CLASS_BREAKABLE;
      } else {
        c = CLASS_CHARACTER;
      }
+   } else if (u == 0xfeff) {
+     c = CLASS_NON_BREAKABLE;
    } else {
      c = CLASS_CHARACTER; // others
    }
    return c;
 }
 
 static bool
 GetPair(int8_t c1, int8_t c2)
--- a/intl/lwbrk/tools/anzx4051.html
+++ b/intl/lwbrk/tools/anzx4051.html
@@ -321,26 +321,26 @@ Analysis of JIS X 4051 to Unicode Genera
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 </TR>
 <TR><TH>07_18<TH>
-<TD>19</TD>
+<TD>18</TD>
 <TD>157</TD>
 <TD></TD>
 <TD>33</TD>
 <TD>56</TD>
 <TD>125</TD>
 <TD>2</TD>
-<TD BGCOLOR=white>392</TD>
+<TD BGCOLOR=white>391</TD>
 <TD></TD>
-<TD>19</TD>
+<TD>18</TD>
 <TD></TD>
 <TD></TD>
 <TD>64</TD>
 <TD>7</TD>
 <TD>5</TD>
 <TD></TD>
 <TD>81</TD>
 <TD></TD>
@@ -477,26 +477,26 @@ Analysis of JIS X 4051 to Unicode Genera
 <TD>3</TD>
 <TD>7</TD>
 <TD>2</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 </TR>
 <TR><TH>0B_[e]<TH>
-<TD></TD>
+<TD>1</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD>1</TD>
 <TD>1</TD>
 <TD>3</TD>
-<TD BGCOLOR=white>5</TD>
+<TD BGCOLOR=white>6</TD>
 <TD></TD>
-<TD></TD>
+<TD>1</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
@@ -624,21 +624,21 @@ Analysis of JIS X 4051 to Unicode Genera
 <TR><TH>20<TH>
 <TD>2</TD>
 <TD>8</TD>
 <TD>1</TD>
 <TD></TD>
 <TD>5</TD>
 <TD>13</TD>
 <TD></TD>
-<TD>101</TD>
+<TD>100</TD>
 <TD></TD>
 <TD></TD>
 <TD>7</TD>
-<TD>3</TD>
+<TD>4</TD>
 <TD></TD>
 </TR>
 <TR><TH>21<TH>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD>1</TD>
 <TD></TD>
--- a/intl/lwbrk/tools/jisx4051class.txt
+++ b/intl/lwbrk/tools/jisx4051class.txt
@@ -74,17 +74,18 @@ 203C;203D;3
 203E;;23
 203F;2043;18
 2044;;3
 2045;;1
 2046;;2
 2047;2049;3
 204A;205E;18
 205F;;17
-2060;2063;18
+2060;;24
+2061;2063;18
 206A;206F;18
 2070;2071;18
 2074;208E;18
 2090;2094;18
 2116;;8
 2160;217F;12
 2190;21EA;a12
 2126;;18