Bug 255990 Characters below U+0100 are not subject to line-breaking rules at all r+sr=roc
authormasayuki@d-toybox.com
Thu, 12 Jul 2007 08:52:51 -0700
changeset 3397 c2de0be778519d45c13496426a101a38a47a1e0c
parent 3396 10f530628650b1755e3f7e800fa4e886a0431398
child 3398 57e9d581428352fc6bb3c0f08fcc0de7f631418e
push idunknown
push userunknown
push dateunknown
bugs255990
milestone1.9a7pre
Bug 255990 Characters below U+0100 are not subject to line-breaking rules at all r+sr=roc
content/base/public/nsLineBreaker.h
content/base/src/nsLineBreaker.cpp
intl/lwbrk/public/nsILineBreaker.h
intl/lwbrk/src/jisx4501class.h
intl/lwbrk/src/nsJISx4501LineBreaker.cpp
intl/lwbrk/src/nsJISx4501LineBreaker.h
intl/lwbrk/tools/anzx4501.html
intl/lwbrk/tools/jisx4501class.txt
--- a/content/base/public/nsLineBreaker.h
+++ b/content/base/public/nsLineBreaker.h
@@ -81,19 +81,28 @@ public:
   nsLineBreaker();
   ~nsLineBreaker();
   
   static inline PRBool IsSpace(PRUnichar u)
   {
     return u == 0x0020 || u == 0x200b/*ZWSP*/ || u == '\n' || u == '\t';
   }
 
+  static inline PRBool IsComplexASCIIChar(PRUnichar u)
+  {
+    return !((0x0030 <= u && u <= 0x0039) ||
+             (0x0041 <= u && u <= 0x005A) ||
+             (0x0061 <= u && u <= 0x007A));
+  }
+
   static inline PRBool IsComplexChar(PRUnichar u)
   {
-    return (0x1100 <= u && u <= 0x11ff) ||
+    return IsComplexASCIIChar(u) ||
+           (0x1100 <= u && u <= 0x11ff) ||
+           (0x2000 <= u && u <= 0x21ff) ||
            (0x2e80 <= u && u <= 0xd7ff) ||
            (0xf900 <= u && u <= 0xfaff) ||
            (0xff00 <= u && u <= 0xffef);
   }
 
   // Normally, break opportunities exist at the end of each run of whitespace
   // (see IsSpace above). Break opportunities can also exist inside runs of
   // non-whitespace, as determined by nsILineBreaker. We pass a whitespace-
--- a/content/base/src/nsLineBreaker.cpp
+++ b/content/base/src/nsLineBreaker.cpp
@@ -209,16 +209,20 @@ nsLineBreaker::AppendText(nsIAtom* aLang
   PRUint32 offset = 0;
 
   // Continue the current word
   if (mCurrentWord.Length() > 0) {
     NS_ASSERTION(!mAfterSpace, "These should not be set");
 
     while (offset < aLength && !IsSpace(aText[offset])) {
       mCurrentWord.AppendElement(aText[offset]);
+      if (!mCurrentWordContainsComplexChar &&
+          IsComplexASCIIChar(aText[offset])) {
+        mCurrentWordContainsComplexChar = PR_TRUE;
+      }
       ++offset;
     }
 
     if (offset > 0) {
       mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
     }
 
     if (offset == aLength) {
@@ -244,54 +248,67 @@ nsLineBreaker::AppendText(nsIAtom* aLang
     offset = aLength;
     while (offset > start) {
       --offset;
       if (IsSpace(aText[offset]))
         break;
     }
   }
   PRUint32 wordStart = offset;
+  PRBool wordHasComplexChar = PR_FALSE;
 
   for (;;) {
     PRUint8 ch = aText[offset];
     PRBool isSpace = IsSpace(ch);
 
     if (aSink) {
       breakState[offset] = mAfterSpace && !isSpace &&
         (aFlags & (offset == 0 ? BREAK_ALLOW_INITIAL : BREAK_ALLOW_INSIDE));
     }
     mAfterSpace = isSpace;
 
     if (isSpace) {
-      // The current word can't have any complex characters inside it
-      // because this is 8-bit text, so just ignore it
+      if (offset > wordStart && wordHasComplexChar) {
+        if (aFlags & BREAK_ALLOW_INSIDE) {
+          // Save current start-of-word state because GetJISx4051Breaks will
+          // set it to false
+          PRPackedBool currentStart = breakState[wordStart];
+          nsContentUtils::LineBreaker()->
+            GetJISx4051Breaks(aText + wordStart, offset - wordStart,
+                              breakState.Elements() + wordStart);
+          breakState[wordStart] = currentStart;
+        }
+        wordHasComplexChar = PR_FALSE;
+      }
+
       ++offset;
       if (offset >= aLength)
         break;
       wordStart = offset;
     } else {
+      if (!wordHasComplexChar && IsComplexASCIIChar(ch)) {
+        wordHasComplexChar = PR_TRUE;
+      }
       ++offset;
       if (offset >= aLength) {
         // Save this word
-        mCurrentWordContainsComplexChar = PR_FALSE;
+        mCurrentWordContainsComplexChar = wordHasComplexChar;
         PRUint32 len = offset - wordStart;
         PRUnichar* elems = mCurrentWord.AppendElements(len);
         if (!elems)
           return NS_ERROR_OUT_OF_MEMORY;
         PRUint32 i;
         for (i = wordStart; i < offset; ++i) {
           elems[i - wordStart] = aText[i];
         }
         mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
         // Ensure that the break-before for this word is written out
         offset = wordStart + 1;
         break;
       }
-      // We can't break inside words in 8-bit text (no complex characters), so
-      // there is no need to do anything else to handle words
     }
   }
 
   if (aSink) {
     aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
   }
   return NS_OK;
 }
--- a/intl/lwbrk/public/nsILineBreaker.h
+++ b/intl/lwbrk/public/nsILineBreaker.h
@@ -38,20 +38,20 @@
 #define nsILineBreaker_h__
 
 #include "nsISupports.h"
 
 #include "nscore.h"
 
 #define NS_LINEBREAKER_NEED_MORE_TEXT -1
 
-// {c3d9f25f-7cea-4a76-a08f-05c431353448}
+// {C9C5938E-70EF-4db2-ADEE-E7B2CCFBBEE6}
 #define NS_ILINEBREAKER_IID \
-{ 0xc3d9f25f, 0x7cea, 0x4a76, \
-    { 0xa0, 0x8f, 0x05, 0xc4, 0x31, 0x35, 0x34, 0x48 } }
+{ 0xc9c5938e, 0x70ef, 0x4db2, \
+    { 0xad, 0xee, 0xe7, 0xb2, 0xcc, 0xfb, 0xbe, 0xe6 } }
 
 class nsILineBreaker : public nsISupports
 {
 public:
   NS_DECLARE_STATIC_IID_ACCESSOR(NS_ILINEBREAKER_IID)
   virtual PRBool BreakInBetween( const PRUnichar* aText1 , PRUint32 aTextLen1,
                                  const PRUnichar* aText2 , 
                                  PRUint32 aTextLen2) = 0;
@@ -65,13 +65,15 @@ public:
   // Call this on a word with whitespace at either end. We will apply JISx4501
   // rules to find breaks inside the word. aBreakBefore is set to the break-
   // before status of each character; aBreakBefore[0] will always be false
   // because we never return a break before the first character.
   // aLength is the length of the aText array and also the length of the aBreakBefore
   // output array.
   virtual void GetJISx4051Breaks(const PRUnichar* aText, PRUint32 aLength,
                                  PRPackedBool* aBreakBefore) = 0;
+  virtual void GetJISx4051Breaks(const PRUint8* aText, PRUint32 aLength,
+                                 PRPackedBool* aBreakBefore) = 0;
 };
 
 NS_DEFINE_STATIC_IID_ACCESSOR(nsILineBreaker, NS_ILINEBREAKER_IID)
 
 #endif  /* nsILineBreaker_h__ */
--- a/intl/lwbrk/src/jisx4501class.h
+++ b/intl/lwbrk/src/jisx4501class.h
@@ -17,18 +17,18 @@
  * The Initial Developer of the Original Code is
  * Netscape Communications Corporation.
  * Portions created by the Initial Developer are Copyright (C) 1999
  * the Initial Developer. All Rights Reserved.
  *
  * Contributor(s):
  *
  * Alternatively, the contents of this file may be used under the terms of
- * either of the GNU General Public License Version 2 or later (the "GPL"),
- * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  * in which case the provisions of the GPL or the LGPL are applicable instead
  * of those above. If you wish to allow use of your version of this file only
  * under the terms of either the GPL or the LGPL, and not to allow others to
  * use your version of this file under the terms of the MPL, indicate your
  * decision by deleting the provisions above and replace them with the notice
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
@@ -38,51 +38,51 @@
     DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
     mozilla/intl/lwbrk/tools/anzx4501.pl
  */
 static const PRUint32 gLBClass00[32] = {
 0x55555555, // U+0000 - U+0007
 0x55555555, // U+0008 - U+000F
 0x55555555, // U+0010 - U+0017
 0x55555555, // U+0018 - U+001F
-0x88438815, // U+0020 - U+0027
-0x81515810, // U+0028 - U+002F
+0x88108815, // U+0020 - U+0027
+0x11118810, // U+0028 - U+002F
 0x66666666, // U+0030 - U+0037
-0x11501166, // U+0038 - U+003F
+0x11101866, // U+0038 - U+003F
 0x88888888, // U+0040 - U+0047
 0x88888888, // U+0048 - U+004F
 0x88888888, // U+0050 - U+0057
-0x88130888, // U+0058 - U+005F
+0x88100888, // U+0058 - U+005F
 0x88888888, // U+0060 - U+0067
 0x88888888, // U+0068 - U+006F
 0x88888888, // U+0070 - U+0077
 0x88180888, // U+0078 - U+007F
 0x88888888, // U+0080 - U+0087
 0x88888888, // U+0088 - U+008F
 0x88888888, // U+0090 - U+0097
 0x88888888, // U+0098 - U+009F
-0x58383488, // U+00A0 - U+00A7
-0x85888858, // U+00A8 - U+00AF
-0x85888854, // U+00B0 - U+00B7
+0x88383488, // U+00A0 - U+00A7
+0x88888888, // U+00A8 - U+00AF
+0x88888881, // U+00B0 - U+00B7
 0x88888888, // U+00B8 - U+00BF
 0x88888888, // U+00C0 - U+00C7
 0x88888888, // U+00C8 - U+00CF
-0x58888888, // U+00D0 - U+00D7
+0x88888888, // U+00D0 - U+00D7
 0x88888888, // U+00D8 - U+00DF
 0x88888888, // U+00E0 - U+00E7
 0x88888888, // U+00E8 - U+00EF
-0x58888888, // U+00F0 - U+00F7
+0x88888888, // U+00F0 - U+00F7
 0x88888888, // U+00F8 - U+00FF
 };
 
 static const PRUint32 gLBClass20[32] = {
 0x55555555, // U+2000 - U+2007
 0x88885555, // U+2008 - U+200F
 0x88828888, // U+2010 - U+2017
-0x01100110, // U+2018 - U+201F
+0x88888888, // U+2018 - U+201F
 0x81118888, // U+2020 - U+2027
 0x88888888, // U+2028 - U+202F
 0x88884444, // U+2030 - U+2037
 0x88815888, // U+2038 - U+203F
 0x88818888, // U+2040 - U+2047
 0x88888888, // U+2048 - U+204F
 0x88888888, // U+2050 - U+2057
 0x88888888, // U+2058 - U+205F
--- a/intl/lwbrk/src/nsJISx4501LineBreaker.cpp
+++ b/intl/lwbrk/src/nsJISx4501LineBreaker.cpp
@@ -344,52 +344,69 @@ nsJISx4051LineBreaker::nsJISx4051LineBre
 }
 
 nsJISx4051LineBreaker::~nsJISx4051LineBreaker()
 {
 }
 
 NS_IMPL_ISUPPORTS1(nsJISx4051LineBreaker, nsILineBreaker)
 
-#define U_PERIOD ((PRUnichar) '.')
-#define U_COMMA ((PRUnichar) ',')
-#define U_SPACE ((PRUnichar) ' ')
-#define U_RIGHT_SINGLE_QUOTATION_MARK ((PRUnichar) 0x2019)
+#define U_PERIOD    PRUnichar('.')
+#define U_COMMA     PRUnichar(',')
+#define U_SEMICOLON PRUnichar(';')
+#define U_SLASH     PRUnichar('/')
+#define U_SPACE     PRUnichar(' ')
+#define U_HYPHEN    PRUnichar('-')
+#define U_EQUAL     PRUnichar('=')
+#define U_NULL      PRUnichar(0x0000)
+#define U_RIGHT_SINGLE_QUOTATION_MARK PRUnichar(0x2019)
 #define NEED_CONTEXTUAL_ANALYSIS(c) ((c) == U_PERIOD || \
                                      (c) == U_COMMA || \
+                                     (c) == U_SEMICOLON || \
+                                     (c) == U_SLASH || \
+                                     (c) == U_HYPHEN || \
+                                     (c) == U_EQUAL || \
                                      (c) == U_RIGHT_SINGLE_QUOTATION_MARK)
 #define NUMERIC_CLASS  6 // JIS x4051 class 15 is now map to simplified class 6
 #define CHARACTER_CLASS  8 // JIS x4051 class 18 is now map to simplified class 8
 #define IS_ASCII_DIGIT(u) (0x0030 <= (u) && (u) <= 0x0039)
 
 static PRInt8 ContextualAnalysis(
   PRUnichar prev, PRUnichar cur, PRUnichar next)
 {
-   if(U_COMMA == cur)
+   if(U_COMMA == cur || U_SEMICOLON == cur)
    {
-     if(IS_ASCII_DIGIT (prev) && IS_ASCII_DIGIT (next))
+     if((IS_ASCII_DIGIT(prev) || prev == U_NULL) && IS_ASCII_DIGIT(next))
        return NUMERIC_CLASS;
    }
    else if(U_PERIOD == cur)
    {
-     if((IS_ASCII_DIGIT (prev) || (0x0020 == prev)) && 
-         IS_ASCII_DIGIT (next))
+     if((IS_ASCII_DIGIT(prev) || prev == U_SPACE || prev == U_NULL) &&
+         IS_ASCII_DIGIT(next))
        return NUMERIC_CLASS;
- 
+
      // By assigning a full stop  character class only when it's followed by
      // class 6 (numeric), 7, and 8 (character). Note that class 9 (Thai) 
      // doesn't matter, either way, we prevent lines from breaking around 
      // full stop in those cases while  still allowing it to end a line when 
      // followed by CJK  characters. With an additional condition of it being 
      // preceded by  class 0 or class > 5, we make sure that it does not 
      // start a line  (see bug 164759). 
-     PRUint8 pc = GetClass(prev);
+     PRUint8 pc = prev != U_NULL ? GetClass(prev) : CHARACTER_CLASS;
      if((pc > 5 || pc == 0)  && GetClass(next) > 5)
        return CHARACTER_CLASS;
    }
+   else if(U_SLASH == cur || U_HYPHEN == cur || U_EQUAL == cur)
+   {
+     // if slash is a first character, don't break at this point (e.g., "/root")
+     if (U_SLASH == cur && prev == U_NULL)
+       return CHARACTER_CLASS;
+     if (IS_ASCII_DIGIT(next))
+       return NUMERIC_CLASS;
+   }
    else if(U_RIGHT_SINGLE_QUOTATION_MARK == cur)
    {
      // somehow people use this as ' in "it's" sometimes...
      if(U_SPACE != next)
        return CHARACTER_CLASS;
    }
    return GetClass(cur);
 }
@@ -428,26 +445,26 @@ PRBool nsJISx4051LineBreaker::BreakInBet
 
   //now apply western rule.
   return IS_SPACE(aText1[aTextLen1-1]) || IS_SPACE(aText2[0]);
 
 ROUTE_CJK_BETWEEN:
 
   PRInt8 c1, c2;
   if(NEED_CONTEXTUAL_ANALYSIS(aText1[aTextLen1-1]))
-    c1 = ContextualAnalysis((aTextLen1>1)?aText1[aTextLen1-2]:0,
+    c1 = ContextualAnalysis((aTextLen1>1)?aText1[aTextLen1-2]:U_NULL,
                                   aText1[aTextLen1-1],
                                   aText2[0]);
   else 
     c1 = GetClass(aText1[aTextLen1-1]);
 
   if(NEED_CONTEXTUAL_ANALYSIS(aText2[0]))
     c2 = ContextualAnalysis(aText1[aTextLen1-1],
                             aText2[0],
-                            (aTextLen2>1)?aText2[1]:0);
+                            (aTextLen2>1)?aText2[1]:U_NULL);
   else 
     c2 = GetClass(aText2[0]);
 
   /* Handle cases for THAI */
   if((CLASS_THAI == c1) && (CLASS_THAI == c2))
   {
      return (0 == TrbWordBreakPos(aText1, aTextLen1, aText2, aTextLen2));
   }
@@ -476,33 +493,33 @@ PRInt32 nsJISx4051LineBreaker::Next(
   }
   return NS_LINEBREAKER_NEED_MORE_TEXT; // Need more text
 
 ROUTE_CJK_NEXT:
   PRInt8 c1, c2;
   cur = aPos;
   if(NEED_CONTEXTUAL_ANALYSIS(aText[cur]))
   {
-    c1 = ContextualAnalysis((cur>0)?aText[cur-1]:0,
+    c1 = ContextualAnalysis((cur>0)?aText[cur-1]:U_NULL,
                             aText[cur],
-                            (cur<(aLen-1)) ?aText[cur+1]:0);
+                            (cur<(aLen-1)) ?aText[cur+1]:U_NULL);
   } else  {
     c1 = GetClass(aText[cur]);
   }
   
   if(CLASS_THAI == c1) 
      return PRUint32(TrbFollowing(aText, aLen, aPos));
 
   for(cur++; cur <aLen; cur++)
   {
      if(NEED_CONTEXTUAL_ANALYSIS(aText[cur]))
      {
-       c2 = ContextualAnalysis((cur>0)?aText[cur-1]:0,
+       c2 = ContextualAnalysis((cur>0)?aText[cur-1]:U_NULL,
                                aText[cur],
-                               (cur<(aLen-1)) ?aText[cur+1]:0);
+                               (cur<(aLen-1)) ?aText[cur+1]:U_NULL);
      } else {
        c2 = GetClass(aText[cur]);
      }
 
      if(GetPair(c1, c2)) {
        return cur;
      }
      c1 = c2;
@@ -532,33 +549,33 @@ PRInt32 nsJISx4051LineBreaker::Prev(
 
   return NS_LINEBREAKER_NEED_MORE_TEXT; // Need more text
 
 ROUTE_CJK_PREV:
   cur = aPos;
   PRInt8 c1, c2;
   if(NEED_CONTEXTUAL_ANALYSIS(aText[cur-1]))
   {
-    c2 = ContextualAnalysis(((cur-1)>0)?aText[cur-2]:0,
+    c2 = ContextualAnalysis(((cur-1)>0)?aText[cur-2]:U_NULL,
                             aText[cur-1],
-                            (cur<aLen) ?aText[cur]:0);
+                            (cur<aLen) ?aText[cur]:U_NULL);
   } else  {
     c2 = GetClass(aText[cur-1]);
   }
   // To Do: 
   //
   // Should handle CLASS_THAI here
   //
   for(cur--; cur > 0; cur--)
   {
      if(NEED_CONTEXTUAL_ANALYSIS(aText[cur-1]))
      {
-       c1 = ContextualAnalysis(((cur-1)>0)?aText[cur-2]:0,
+       c1 = ContextualAnalysis(((cur-1)>0)?aText[cur-2]:U_NULL,
                                aText[cur-1],
-                               (cur<aLen) ?aText[cur]:0);
+                               (cur<aLen) ?aText[cur]:U_NULL);
      } else {
        c1 = GetClass(aText[cur-1]);
      }
 
      if(GetPair(c1, c2)) {
        return cur;
      }
      c2 = c1;
@@ -573,19 +590,19 @@ nsJISx4051LineBreaker::GetJISx4051Breaks
   PRUint32 cur;
   PRInt8 lastClass = -1;
 
   for (cur = 0; cur < aLength; ++cur) {
     PRUnichar ch = aChars[cur];
     PRInt8 cl;
 
     if (NEED_CONTEXTUAL_ANALYSIS(ch)) {
-      cl = ContextualAnalysis(cur > 0 ? aChars[cur - 1] : 0,
+      cl = ContextualAnalysis(cur > 0 ? aChars[cur - 1] : U_NULL,
                               ch,
-                              cur + 1 < aLength ? aChars[cur + 1] : 0);
+                              cur + 1 < aLength ? aChars[cur + 1] : U_NULL);
     } else {
       cl = GetClass(ch);
     }
 
     PRBool allowBreak;
     if (cur > 0) {
       if (CLASS_THAI == lastClass && CLASS_THAI == cl) {
         allowBreak = 0 == TrbWordBreakPos(aChars, cur, aChars + cur, aLength - cur);
@@ -594,8 +611,38 @@ nsJISx4051LineBreaker::GetJISx4051Breaks
       }
     } else {
       allowBreak = PR_FALSE;
     }
     aBreakBefore[cur] = allowBreak;
     lastClass = cl;
   }
 }
+
+void
+nsJISx4051LineBreaker::GetJISx4051Breaks(const PRUint8* aChars, PRUint32 aLength,
+                                         PRPackedBool* aBreakBefore)
+{
+  PRUint32 cur;
+  PRInt8 lastClass = -1;
+
+  for (cur = 0; cur < aLength; ++cur) {
+    PRUnichar ch = aChars[cur];
+    PRInt8 cl;
+
+    if (NEED_CONTEXTUAL_ANALYSIS(ch)) {
+      cl = ContextualAnalysis(cur > 0 ? aChars[cur - 1] : U_NULL,
+                              ch,
+                              cur + 1 < aLength ? aChars[cur + 1] : U_NULL);
+    } else {
+      cl = GetClass(ch);
+    }
+
+    PRBool allowBreak;
+    if (cur > 0) {
+      allowBreak = GetPair(lastClass, cl);
+    } else {
+      allowBreak = PR_FALSE;
+    }
+    aBreakBefore[cur] = allowBreak;
+    lastClass = cl;
+  }
+}
--- a/intl/lwbrk/src/nsJISx4501LineBreaker.h
+++ b/intl/lwbrk/src/nsJISx4501LineBreaker.h
@@ -52,11 +52,13 @@ public:
                          const PRUnichar* aText2 , PRUint32 aTextLen2);
 
   PRInt32 Next( const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos);
 
   PRInt32 Prev( const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos);
 
   virtual void GetJISx4051Breaks(const PRUnichar* aText, PRUint32 aLength,
                                  PRPackedBool* aBreakBefore);
+  virtual void GetJISx4051Breaks(const PRUint8* aText, PRUint32 aLength,
+                                 PRPackedBool* aBreakBefore);
 };
 
 #endif  /* nsJISx4501LineBreaker_h__ */
--- a/intl/lwbrk/tools/anzx4501.html
+++ b/intl/lwbrk/tools/anzx4501.html
@@ -48,20 +48,21 @@ Analysis of JIS X 4051 to Unicode Genera
 <TD BGCOLOR=yellow>Zp</TD>
 <TD BGCOLOR=yellow>Zs</TD>
 </TR>
 <TR><TH>00_1<TH>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
-<TD>17</TD>
+<TD>14</TD>
+<TD>2</TD>
 <TD></TD>
+<TD BGCOLOR=white>16</TD>
 <TD></TD>
-<TD BGCOLOR=white>17</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
@@ -71,75 +72,74 @@ Analysis of JIS X 4051 to Unicode Genera
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
-<TD>4</TD>
-<TD></TD>
+<TD>1</TD>
 <TD>13</TD>
-<TD></TD>
+<TD>1</TD>
 <TD></TD>
-<TD></TD>
+<TD>1</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 </TR>
 <TR><TH>01_[a]<TH>
 <TD></TD>
 <TD>27</TD>
 <TD>2</TD>
 <TD></TD>
-<TD>29</TD>
-<TD>3</TD>
+<TD>30</TD>
+<TD>6</TD>
 <TD></TD>
-<TD BGCOLOR=white>61</TD>
+<TD BGCOLOR=white>65</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD>5</TD>
 <TD>22</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD>2</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
-<TD>1</TD>
-<TD>1</TD>
-<TD>14</TD>
-<TD>2</TD>
-<TD></TD>
-<TD>9</TD>
-<TD>2</TD>
 <TD></TD>
 <TD>2</TD>
+<TD>14</TD>
+<TD></TD>
+<TD></TD>
+<TD>14</TD>
+<TD></TD>
+<TD></TD>
+<TD>2</TD>
+<TD>3</TD>
 <TD>1</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
-<TD></TD>
 </TR>
 <TR><TH>02_7<TH>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
-<TD>4</TD>
+<TD>1</TD>
 <TD></TD>
 <TD></TD>
-<TD BGCOLOR=white>4</TD>
+<TD BGCOLOR=white>1</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
@@ -150,35 +150,35 @@ Analysis of JIS X 4051 to Unicode Genera
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD>1</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
-<TD>3</TD>
+<TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 </TR>
 <TR><TH>03_8<TH>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
-<TD>4</TD>
+<TD>3</TD>
 <TD></TD>
-<TD BGCOLOR=white>4</TD>
+<TD BGCOLOR=white>3</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
@@ -191,33 +191,33 @@ Analysis of JIS X 4051 to Unicode Genera
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
-<TD>3</TD>
+<TD>2</TD>
 <TD></TD>
 <TD></TD>
 <TD>1</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 </TR>
 <TR><TH>04_9<TH>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
-<TD>5</TD>
-<TD>2</TD>
+<TD>4</TD>
+<TD>1</TD>
 <TD></TD>
-<TD BGCOLOR=white>7</TD>
+<TD BGCOLOR=white>5</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
@@ -228,64 +228,64 @@ Analysis of JIS X 4051 to Unicode Genera
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
-<TD>5</TD>
+<TD>4</TD>
 <TD></TD>
 <TD>1</TD>
 <TD></TD>
 <TD></TD>
-<TD>1</TD>
+<TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 </TR>
 <TR><TH>05_[b]<TH>
-<TD>32</TD>
+<TD>33</TD>
 <TD>154</TD>
 <TD></TD>
 <TD>53</TD>
-<TD>3</TD>
-<TD>316</TD>
 <TD>2</TD>
+<TD>305</TD>
+<TD>13</TD>
 <TD BGCOLOR=white>560</TD>
 <TD>32</TD>
-<TD></TD>
+<TD>1</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD>154</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD>33</TD>
 <TD>20</TD>
 <TD></TD>
-<TD>1</TD>
+<TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD>2</TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
-<TD>7</TD>
-<TD>309</TD>
+<TD></TD>
+<TD>305</TD>
 <TD></TD>
 <TD></TD>
-<TD>2</TD>
+<TD>13</TD>
 </TR>
 <TR><TH>06_15<TH>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD>10</TD>
 <TD></TD>
 <TD></TD>
@@ -356,53 +356,53 @@ Analysis of JIS X 4051 to Unicode Genera
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 </TR>
 <TR><TH>08_18<TH>
-<TD>9</TD>
-<TD>661</TD>
+<TD>10</TD>
+<TD>660</TD>
 <TD>4</TD>
 <TD>130</TD>
-<TD>51</TD>
-<TD>929</TD>
-<TD>14</TD>
-<TD BGCOLOR=white>1798</TD>
+<TD>55</TD>
+<TD>940</TD>
+<TD>2</TD>
+<TD BGCOLOR=white>1801</TD>
 <TD></TD>
-<TD>9</TD>
+<TD>10</TD>
 <TD></TD>
 <TD></TD>
-<TD>370</TD>
+<TD>367</TD>
 <TD>1</TD>
 <TD>5</TD>
 <TD></TD>
-<TD>285</TD>
+<TD>287</TD>
 <TD></TD>
 <TD></TD>
 <TD>4</TD>
 <TD></TD>
 <TD>3</TD>
 <TD>127</TD>
 <TD>3</TD>
-<TD>6</TD>
+<TD>5</TD>
 <TD>3</TD>
-<TD>2</TD>
-<TD>2</TD>
-<TD>32</TD>
-<TD>3</TD>
+<TD>4</TD>
+<TD>6</TD>
+<TD>29</TD>
+<TD>5</TD>
 <TD>12</TD>
-<TD>8</TD>
-<TD>258</TD>
-<TD>651</TD>
+<TD>10</TD>
+<TD>273</TD>
+<TD>645</TD>
 <TD>1</TD>
 <TD>1</TD>
-<TD>12</TD>
+<TD></TD>
 </TR>
 <TR><TH>09_nbsp<TH>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
@@ -488,25 +488,25 @@ Analysis of JIS X 4051 to Unicode Genera
 <TD BGCOLOR=red>05_[b]</TD>
 <TD BGCOLOR=red>06_15</TD>
 <TD BGCOLOR=red>07_16</TD>
 <TD BGCOLOR=red>08_18</TD>
 <TD BGCOLOR=red>09_nbsp</TD>
 <TD BGCOLOR=red>X</TD>
 </TR>
 <TR><TH>00<TH>
-<TD>3</TD>
-<TD>9</TD>
+<TD>6</TD>
+<TD>14</TD>
 <TD></TD>
-<TD>3</TD>
-<TD>3</TD>
-<TD>45</TD>
+<TD>2</TD>
+<TD>1</TD>
+<TD>33</TD>
 <TD>10</TD>
 <TD></TD>
-<TD>149</TD>
+<TD>156</TD>
 <TD></TD>
 <TD></TD>
 </TR>
 <TR><TH>01<TH>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD></TD>
@@ -553,25 +553,25 @@ Analysis of JIS X 4051 to Unicode Genera
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD>226</TD>
 <TD></TD>
 <TD></TD>
 </TR>
 <TR><TH>20<TH>
-<TD>4</TD>
-<TD>6</TD>
-<TD>4</TD>
+<TD></TD>
+<TD>5</TD>
+<TD>1</TD>
 <TD></TD>
 <TD>4</TD>
-<TD>1</TD>
+<TD>13</TD>
 <TD></TD>
 <TD></TD>
-<TD>90</TD>
+<TD>86</TD>
 <TD></TD>
 <TD></TD>
 </TR>
 <TR><TH>21<TH>
 <TD></TD>
 <TD></TD>
 <TD></TD>
 <TD>1</TD>
--- a/intl/lwbrk/tools/jisx4501class.txt
+++ b/intl/lwbrk/tools/jisx4501class.txt
@@ -1,33 +1,26 @@
 0028;;1
+002F;;2
 005B;;1
 007B;;1
-2018;;1
-201B;;1
-201C;;1
-201F;;1
 3008;;1
 300A;;1
 300C;;1
 300E;;1
 3010;;1
 3014;;1
 3016;;1
 3018;;1
 301A;;1
 301D;;1
 0029;;2
 002C;;2
 005D;;2
 007D;;2
-2019;;2
-201A;;2
-201D;;2
-201E;;2
 3001;;2
 3009;;2
 300B;;2
 300D;;2
 300F;;2
 3011;;2
 3015;;2
 3017;;2
@@ -62,52 +55,52 @@ 30E7;;3
 30EE;;3
 30F5;;3
 30F6;;3
 30FC;;3
 30FD;;3
 30FE;;3
 0021;;4
 003F;;4
-003A;;5
+003A;;18
 003B;;5
 30FB;;5
 002E;;6
 3002;;6
 2014;;7
 2024;;2
 2025;;2
 2026;;2
-0024;;8
-005C;;8
+0024;;1
+005C;;1
 00A3;;8
 00A5;;8
 2116;;8
-0025;;9
+0025;;2
 00A2;;9
-00B0;;9
+00B0;;2
 2030;;9
 2031;;9
 2032;;9
 2033;;9
 3000;;10
 3042;3094;11
 3099;309E;3
-002B;;12
-002D;;12
+002B;;18
+002D;;2
 003C;;1
-003D;;12
+003D;;2
 003E;;2
-00A7;;12
-00A9;;12
-00AE;;12
-00B1;;12
-00B6;;12
-00D7;;12
-00F7;;12
+00A7;;18
+00A9;;18
+00AE;;18
+00B1;;18
+00B6;;18
+00D7;;18
+00F7;;18
 203B;;12
 2160;217F;12
 2190;21EA;a12
 2460;24EA;a12
 2500;257F;a12
 2580;2595;a12
 25A0;25EF;a12
 2600;2613;a12