Bug 780979 - Compute length of char to freq order tables. r=smontagu
 author Andrew McCreight Wed, 29 Aug 2012 06:48:15 -0700 changeset 105811 5420af81b778aa2f97705d738fa16fb66bb0ab32 parent 105810 a9858d2ad58bc0e6b1f50c4f8648ba3890bd4b8e child 105812 7566291ca483b3ef7434355e0fe03bb7a40d16f6 push id 55 push user shu@rfrn.org push date Thu, 30 Aug 2012 01:33:09 +0000 reviewers smontagu bugs 780979 milestone 18.0a1
Bug 780979 - Compute length of char to freq order tables. r=smontagu
```--- a/extensions/universalchardet/src/base/Big5Freq.tab
+++ b/extensions/universalchardet/src/base/Big5Freq.tab
@@ -17,20 +17,17 @@
* Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98
* Random Distribution Ration = 512/(5401-512)=0.105
*
* Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
*****************************************************************************/

#define BIG5_TYPICAL_DISTRIBUTION_RATIO (float)0.75

-
-//Char to FreqOrder table ,
-#define BIG5_TABLE_SIZE  5376
-
+// Char to FreqOrder table
static const int16_t Big5CharToFreqOrder[] =
{
1,1801,1506, 255,1431, 198,   9,  82,   6,5008, 177, 202,3681,1256,2821, 110, //   16
3814,  33,3274, 261,  76,  44,2114,  16,2946,2187,1176, 659,3971,  26,3451,2653, //   32
1198,3972,3350,4202, 410,2215, 302, 590, 361,1964,   8, 204,  58,4510,5009,1932, //   48
63,5010,5011, 317,1614,  75, 222, 159,4203,2417,1480,5012,3555,3091, 224,2822, //   64
3682,   3,  10,3973,1471,  29,2787,1135,2866,1940, 873, 130,3275,1123, 312,5013, //   80
4511,2052, 507, 252, 682,5014, 142,1915, 124, 206,2947,  34,3556,3204,  64, 604, //   96```
```--- a/extensions/universalchardet/src/base/CharDistribution.cpp
+++ b/extensions/universalchardet/src/base/CharDistribution.cpp
@@ -5,16 +5,17 @@

#include "CharDistribution.h"

#include "JISFreq.tab"
#include "Big5Freq.tab"
#include "EUCKRFreq.tab"
#include "EUCTWFreq.tab"
#include "GB2312Freq.tab"
+#include "nsMemory.h"

#define SURE_YES 0.99f
#define SURE_NO  0.01f

//return confidence base on received data
float CharDistributionAnalysis::GetConfidence(void)
{
//if we didn't receive any character in our consideration range, or the
@@ -31,47 +32,47 @@ float CharDistributionAnalysis::GetConfi
}
//normalize confidence, (we don't want to be 100% sure)
return SURE_YES;
}

EUCTWDistributionAnalysis::EUCTWDistributionAnalysis()
{
mCharToFreqOrder = EUCTWCharToFreqOrder;
-  mTableSize = EUCTW_TABLE_SIZE;
+  mTableSize = NS_ARRAY_LENGTH(EUCTWCharToFreqOrder);
mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO;
}

EUCKRDistributionAnalysis::EUCKRDistributionAnalysis()
{
mCharToFreqOrder = EUCKRCharToFreqOrder;
-  mTableSize = EUCKR_TABLE_SIZE;
+  mTableSize = NS_ARRAY_LENGTH(EUCKRCharToFreqOrder);
mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO;
}

GB2312DistributionAnalysis::GB2312DistributionAnalysis()
{
mCharToFreqOrder = GB2312CharToFreqOrder;
-  mTableSize = GB2312_TABLE_SIZE;
+  mTableSize = NS_ARRAY_LENGTH(GB2312CharToFreqOrder);
mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO;
}

Big5DistributionAnalysis::Big5DistributionAnalysis()
{
mCharToFreqOrder = Big5CharToFreqOrder;
-  mTableSize = BIG5_TABLE_SIZE;
+  mTableSize = NS_ARRAY_LENGTH(Big5CharToFreqOrder);
mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO;
}

SJISDistributionAnalysis::SJISDistributionAnalysis()
{
mCharToFreqOrder = JISCharToFreqOrder;
-  mTableSize = JIS_TABLE_SIZE;
+  mTableSize = NS_ARRAY_LENGTH(JISCharToFreqOrder);
mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
}

EUCJPDistributionAnalysis::EUCJPDistributionAnalysis()
{
mCharToFreqOrder = JISCharToFreqOrder;
-  mTableSize = JIS_TABLE_SIZE;
+  mTableSize = NS_ARRAY_LENGTH(JISCharToFreqOrder);
mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
}
```
```--- a/extensions/universalchardet/src/base/EUCKRFreq.tab
+++ b/extensions/universalchardet/src/base/EUCKRFreq.tab
@@ -15,19 +15,17 @@
* Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24
* Random Distribution Ration = 512 / (2350-512) = 0.279.
*
* Typical Distribution Ratio
*****************************************************************************/

#define EUCKR_TYPICAL_DISTRIBUTION_RATIO (float) 6.0

-#define EUCKR_TABLE_SIZE  2352
-
-//Char to FreqOrder table ,
+// Char to FreqOrder table
static const int16_t EUCKRCharToFreqOrder[] =
{
13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722,  87,
1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398,
1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488,  20,1733,1269,1734,
945,1400,1735,  47, 904,1270,1736,1737, 773, 248,1738, 409, 313, 786, 429,1739,
116, 987, 813,1401, 683,  75,1204, 145,1740,1741,1742,1743,  16, 847, 667, 622,
708,1744,1745,1746, 966, 787, 304, 129,1747,  60, 820, 123, 676,1748,1749,1750,```
```--- a/extensions/universalchardet/src/base/EUCTWFreq.tab
+++ b/extensions/universalchardet/src/base/EUCTWFreq.tab
@@ -19,19 +19,17 @@
* Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98
* Random Distribution Ration = 512/(5401-512)=0.105
*
* Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
*****************************************************************************/

#define EUCTW_TYPICAL_DISTRIBUTION_RATIO (float)0.75

-//Char to FreqOrder table ,
-#define EUCTW_TABLE_SIZE  8102
-
+// Char to FreqOrder table
static const int16_t EUCTWCharToFreqOrder[] =
{
1,1800,1506, 255,1431, 198,   9,  82,   6,7310, 177, 202,3615,1256,2808, 110, // 2742
3735,  33,3241, 261,  76,  44,2113,  16,2931,2184,1176, 659,3868,  26,3404,2643, // 2758
1198,3869,3313,4060, 410,2211, 302, 590, 361,1963,   8, 204,  58,4296,7311,1931, // 2774
63,7312,7313, 317,1614,  75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, // 2790
3616,   3,  10,3870,1471,  29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, // 2806
4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932,  34,3501,3173,  64, 604, // 2822```
```--- a/extensions/universalchardet/src/base/GB2312Freq.tab
+++ b/extensions/universalchardet/src/base/GB2312Freq.tab
@@ -16,18 +16,16 @@
* Idea Distribution Ratio = 0.79135/(1-0.79135) = 3.79
* Random Distribution Ration = 512 / (3755 - 512) = 0.157
*
* Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR
*****************************************************************************/

#define GB2312_TYPICAL_DISTRIBUTION_RATIO (float)0.9

-#define GB2312_TABLE_SIZE  3760
-
static const int16_t GB2312CharToFreqOrder[] =
{
1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409,
249,4088,1746,1873,2047,1774, 581,1813, 358,1174,3590,1014,1561,4844,2245, 670,
1636,3112, 889,1286, 953, 556,2327,3060,1290,3141, 613, 185,3477,1367, 850,3820,
1715,2428,2642,2303,2732,3041,2562,2648,3566,3946,1349, 388,3098,2091,1360,3585,```
```--- a/extensions/universalchardet/src/base/JISFreq.tab
+++ b/extensions/universalchardet/src/base/JISFreq.tab
@@ -18,20 +18,17 @@
* Idea Distribution Ratio = 0.92635 / (1-0.92635) = 12.58
* Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191
*
* Typical Distribution Ratio, 25% of IDR
*****************************************************************************/

#define JIS_TYPICAL_DISTRIBUTION_RATIO (float) 3.0

-
-//Char to FreqOrder table ,
-#define JIS_TABLE_SIZE  4368
-
+// Char to FreqOrder table
static const int16_t JISCharToFreqOrder[] =
{
40,   1,   6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, //   16
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247,  18, 179,5071, 856,1661, //   32
1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, //   48
2042,1061,1062,  48,  49,  44,  45, 433, 434,1040,1041, 996, 787,2997,1255,4305, //   64
2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, //   80
5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, //   96```