author | Masatoshi Kimura <VYV03354@nifty.ne.jp> |
Sat, 27 Dec 2008 19:02:48 +0100 | |
changeset 23129 | c2384ded74947c4e76d9b6593e3a607495154400 |
parent 23128 | 9e31dcb57815b342febfb5a6018783ffb695ee24 |
child 23130 | 5fa76cb18a0a7ce1f4736e1f11a4031e43862e15 |
push id | 4350 |
push user | dgottwald@mozilla.com |
push date | Sat, 27 Dec 2008 18:03:44 +0000 |
treeherder | mozilla-central@c2384ded7494 [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
bugs | 470635 |
milestone | 1.9.2a1pre |
extensions/universalchardet/src/base/nsCodingStateMachine.h | file | annotate | diff | comparison | revisions | |
extensions/universalchardet/src/base/nsMBCSSM.cpp | file | annotate | diff | comparison | revisions | |
intl/chardet/tools/genucs2be.pl | file | annotate | diff | comparison | revisions | |
intl/chardet/tools/genucs2le.pl | file | annotate | diff | comparison | revisions |
--- a/extensions/universalchardet/src/base/nsCodingStateMachine.h +++ b/extensions/universalchardet/src/base/nsCodingStateMachine.h @@ -91,17 +91,16 @@ protected: extern SMModel UTF8SMModel; extern SMModel Big5SMModel; extern SMModel EUCJPSMModel; extern SMModel EUCKRSMModel; extern SMModel EUCTWSMModel; extern SMModel GB18030SMModel; extern SMModel SJISSMModel; -extern SMModel UCS2BESMModel; extern SMModel HZSMModel; extern SMModel ISO2022CNSMModel; extern SMModel ISO2022JPSMModel; extern SMModel ISO2022KRSMModel; #endif /* nsCodingStateMachine_h__ */
--- a/extensions/universalchardet/src/base/nsMBCSSM.cpp +++ b/extensions/universalchardet/src/base/nsMBCSSM.cpp @@ -429,129 +429,16 @@ SMModel SJISSMModel = { {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls }, 6, {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st }, SJISCharLenTable, "Shift_JIS", }; -static PRUint32 UCS2BE_cls [ 256 / 8 ] = { -PCK4BITS(0,0,0,0,0,0,0,0), // 00 - 07 -PCK4BITS(0,0,1,0,0,2,0,0), // 08 - 0f -PCK4BITS(0,0,0,0,0,0,0,0), // 10 - 17 -PCK4BITS(0,0,0,3,0,0,0,0), // 18 - 1f -PCK4BITS(0,0,0,0,0,0,0,0), // 20 - 27 -PCK4BITS(0,3,3,3,3,3,0,0), // 28 - 2f -PCK4BITS(0,0,0,0,0,0,0,0), // 30 - 37 -PCK4BITS(0,0,0,0,0,0,0,0), // 38 - 3f -PCK4BITS(0,0,0,0,0,0,0,0), // 40 - 47 -PCK4BITS(0,0,0,0,0,0,0,0), // 48 - 4f -PCK4BITS(0,0,0,0,0,0,0,0), // 50 - 57 -PCK4BITS(0,0,0,0,0,0,0,0), // 58 - 5f -PCK4BITS(0,0,0,0,0,0,0,0), // 60 - 67 -PCK4BITS(0,0,0,0,0,0,0,0), // 68 - 6f -PCK4BITS(0,0,0,0,0,0,0,0), // 70 - 77 -PCK4BITS(0,0,0,0,0,0,0,0), // 78 - 7f -PCK4BITS(0,0,0,0,0,0,0,0), // 80 - 87 -PCK4BITS(0,0,0,0,0,0,0,0), // 88 - 8f -PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97 -PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f -PCK4BITS(0,0,0,0,0,0,0,0), // a0 - a7 -PCK4BITS(0,0,0,0,0,0,0,0), // a8 - af -PCK4BITS(0,0,0,0,0,0,0,0), // b0 - b7 -PCK4BITS(0,0,0,0,0,0,0,0), // b8 - bf -PCK4BITS(0,0,0,0,0,0,0,0), // c0 - c7 -PCK4BITS(0,0,0,0,0,0,0,0), // c8 - cf -PCK4BITS(0,0,0,0,0,0,0,0), // d0 - d7 -PCK4BITS(0,0,0,0,0,0,0,0), // d8 - df -PCK4BITS(0,0,0,0,0,0,0,0), // e0 - e7 -PCK4BITS(0,0,0,0,0,0,0,0), // e8 - ef -PCK4BITS(0,0,0,0,0,0,0,0), // f0 - f7 -PCK4BITS(0,0,0,0,0,0,4,5) // f8 - ff -}; - - -static PRUint32 UCS2BE_st [ 7] = { -PCK4BITS( 5, 7, 7,eError, 4, 3,eError,eError),//00-07 -PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f -PCK4BITS(eItsMe,eItsMe, 6, 6, 6, 6,eError,eError),//10-17 -PCK4BITS( 6, 6, 6, 6, 6,eItsMe, 6, 6),//18-1f -PCK4BITS( 6, 6, 6, 6, 5, 7, 7,eError),//20-27 -PCK4BITS( 5, 8, 6, 6,eError, 6, 6, 6),//28-2f -PCK4BITS( 6, 6, 6, 6,eError,eError,eStart,eStart) //30-37 -}; - -static const PRUint32 UCS2BECharLenTable[] = {2, 2, 2, 0, 2, 2}; - -SMModel UCS2BESMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2BE_cls }, - 6, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2BE_st }, - UCS2BECharLenTable, - "UTF-16BE", -}; - -static PRUint32 UCS2LE_cls [ 256 / 8 ] = { -PCK4BITS(0,0,0,0,0,0,0,0), // 00 - 07 -PCK4BITS(0,0,1,0,0,2,0,0), // 08 - 0f -PCK4BITS(0,0,0,0,0,0,0,0), // 10 - 17 -PCK4BITS(0,0,0,3,0,0,0,0), // 18 - 1f -PCK4BITS(0,0,0,0,0,0,0,0), // 20 - 27 -PCK4BITS(0,3,3,3,3,3,0,0), // 28 - 2f -PCK4BITS(0,0,0,0,0,0,0,0), // 30 - 37 -PCK4BITS(0,0,0,0,0,0,0,0), // 38 - 3f -PCK4BITS(0,0,0,0,0,0,0,0), // 40 - 47 -PCK4BITS(0,0,0,0,0,0,0,0), // 48 - 4f -PCK4BITS(0,0,0,0,0,0,0,0), // 50 - 57 -PCK4BITS(0,0,0,0,0,0,0,0), // 58 - 5f -PCK4BITS(0,0,0,0,0,0,0,0), // 60 - 67 -PCK4BITS(0,0,0,0,0,0,0,0), // 68 - 6f -PCK4BITS(0,0,0,0,0,0,0,0), // 70 - 77 -PCK4BITS(0,0,0,0,0,0,0,0), // 78 - 7f -PCK4BITS(0,0,0,0,0,0,0,0), // 80 - 87 -PCK4BITS(0,0,0,0,0,0,0,0), // 88 - 8f -PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97 -PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f -PCK4BITS(0,0,0,0,0,0,0,0), // a0 - a7 -PCK4BITS(0,0,0,0,0,0,0,0), // a8 - af -PCK4BITS(0,0,0,0,0,0,0,0), // b0 - b7 -PCK4BITS(0,0,0,0,0,0,0,0), // b8 - bf -PCK4BITS(0,0,0,0,0,0,0,0), // c0 - c7 -PCK4BITS(0,0,0,0,0,0,0,0), // c8 - cf -PCK4BITS(0,0,0,0,0,0,0,0), // d0 - d7 -PCK4BITS(0,0,0,0,0,0,0,0), // d8 - df -PCK4BITS(0,0,0,0,0,0,0,0), // e0 - e7 -PCK4BITS(0,0,0,0,0,0,0,0), // e8 - ef -PCK4BITS(0,0,0,0,0,0,0,0), // f0 - f7 -PCK4BITS(0,0,0,0,0,0,4,5) // f8 - ff -}; - - -static PRUint32 UCS2LE_st [ 7] = { -PCK4BITS( 6, 6, 7, 6, 4, 3,eError,eError),//00-07 -PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f -PCK4BITS(eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError),//10-17 -PCK4BITS( 5, 5, 5,eError, 5,eError, 6, 6),//18-1f -PCK4BITS( 7, 6, 8, 8, 5, 5, 5,eError),//20-27 -PCK4BITS( 5, 5, 5,eError,eError,eError, 5, 5),//28-2f -PCK4BITS( 5, 5, 5,eError, 5,eError,eStart,eStart) //30-37 -}; - -static const PRUint32 UCS2LECharLenTable[] = {2, 2, 2, 2, 2, 2}; - -SMModel UCS2LESMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2LE_cls }, - 6, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2LE_st }, - UCS2LECharLenTable, - "UTF-16LE", -}; - - static PRUint32 UTF8_cls [ 256 / 8 ] = { //PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07 PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 //allow 0x00 as a legal value PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17 PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27 PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
deleted file mode 100644 --- a/intl/chardet/tools/genucs2be.pl +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/local/bin/perl -use strict; -require "genverifier.pm"; -use genverifier; - - -my(@ucs2be_cls); -my(@ucs2be_st); -my($ucs2be_ver); - - -# We look at the following UCS2 char -# U+FEFF->ItsMe if it is the first Unicode -# U+FFFF->Error -# U+FFFE->Error -# U+0d0d->Error -# U+0a0d->Error -# U+1bxx->Error -# U+29xx->Error -# U+2axx->Error -# U+2bxx->Error -# U+2cxx->Error -# U+2dxx->Error -# -# In UCS2-Big Endian it will be -# -# Ev Od -# FE FF->ItsMe for the first two bytes -# FF FF->Error -# FF FE->Error -# 0d 0d->Error -# 0a 0d->Error -# 1b ->Error -# 29 ->Error -# 2a ->Error -# 2b ->Error -# 2c ->Error -# 2d ->Error -# -# Now we classified the char -# 0a:k1 -# 0d:k2 -# 1b:k3 -# 29-2d:k3 -# fe:k4 -# ff:k5 -# others:k0 -# -@ucs2be_cls = ( - [ 0x0a, 0x0a, 1 ], - [ 0x0d, 0x0d, 2 ], - [ 0x1b, 0x1b, 3 ], - [ 0x29, 0x2d, 3 ], - [ 0xfe, 0xfe, 4 ], - [ 0xff, 0xff, 5 ], - [ 0x00, 0xff, 0 ] -); - - -# For Big Endian -# -# 0:k5->3 -# 0:k4->4 -# 0:k1,k2->7 -# 0:k3->1 -# 0:k0->5 -# 3:k4,k5->1 -# 3:*->6 -# 4:k5->2 -# 4:*->6 -# 5:*->6 -# 6:k1,k2->7 -# 6:k3->1 -# 6:k5->8 -# 6:*->5 -# 7:k2->1 -# 7:*->6 -# 8:k4,k5->1 -# 8:*->6 - -package genverifier; -@ucs2be_st = ( -# 0 1 2 3 4 5 - 5, 7, 7, 1, 4, 3, # state 0 - Start - 1, 1, 1, 1, 1, 1, # state 1 - Error - 2, 2, 2, 2, 2, 2, # state 2 - ItsMe - 6, 6, 6, 6, 1, 1, # state 3 1st byte got FF - 6, 6, 6, 6, 6, 2, # state 4 1st byte got FE - 6, 6, 6, 6, 6, 6, # state 5 Odd byte - 5, 7, 7, 1, 5, 8, # state 6 Even Byte - 6, 6, 1, 6, 6, 6, # state 7 Got 0A or 0D - 6, 6, 6, 6, 1, 1, # state 8 Got FF -); - - -$ucs2be_ver = genverifier::GenVerifier("UCS2BE", "UTF-16BE", - \@ucs2be_cls, 6, \@ucs2be_st); -print $ucs2be_ver; - - -
deleted file mode 100644 --- a/intl/chardet/tools/genucs2le.pl +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/local/bin/perl -use strict; -require "genverifier.pm"; -use genverifier; - - -my(@ucs2le_cls); -my(@ucs2le_st); -my($ucs2le_ver); - -# We look at the following UCS2 -# U+FEFF->ItsMe if it is the first Unicode -# U+FFFF->Error -# U+FFFE->Error -# U+0d0d->Error -# U+0a0d->Error -# U+1bxx->Error -# U+29xx->Error -# U+2axx->Error -# U+2bxx->Error -# U+2cxx->Error -# U+2dxx->Error -# -# In UCS2-Little Endian it will be -# -# Ev Od -# FF FE->ItsMe for the first two bytes -# FF FF->Error -# FE FF->Error -# 0d 0d->Error -# 0d 0a->Error -# 1b->Error -# 29->Error -# 2a->Error -# 2b->Error -# 2c->Error -# 2d->Error -# -# Now we classified the char -# 0a:k1 -# 0d:k2 -# 1b:k3 -# 29-2d:k3 -# fe:k4 -# ff:k5 -# others:k0 - -@ucs2le_cls = ( - [ 0x0a, 0x0a, 1 ], - [ 0x0d, 0x0d, 2 ], - [ 0x1b, 0x1b, 3 ], - [ 0x29, 0x2d, 3 ], - [ 0xfe, 0xfe, 4 ], - [ 0xff, 0xff, 5 ], - [ 0x00, 0xff, 0 ] -); - - -# For Little Endian -# 0:k5->3 -# 0:k4->4 -# 0:k2->7 -# 0:*->6 -# 3:k4->2 -# 3:k5->1 -# 3:k3->1 -# 3:*->5 -# 4:k3,k5->1 -# 4:*->5 -# 5:k4,k5->8 -# 5:k2->7 -# 5:*->6 -# 6:k3->1 -# 6:*->5 -# 7:k1,k2->1 -# 7:k3->1 -# 7:*->5 -# 8:k5->1 -# 8:k3->1 -# 8:*->5 - - -package genverifier; -@ucs2le_st = ( -# 0 1 2 3 4 5 - 6, 6, 7, 6, 4, 3, # state 0 - Start - 1, 1, 1, 1, 1, 1, # state 1 - Error - 2, 2, 2, 2, 2, 2, # state 2 - ItsMe - 5, 5, 5, 1, 2, 1, # state 3 1st byte Got FF - 5, 5, 5, 1, 5, 1, # state 4 1st byte Got FE - 6, 6, 7, 6, 8, 8, # state 5 Even Byte - 5, 5, 5, 1, 5, 5, # state 6 Odd Byte - 5, 1, 1, 1, 5, 5, # state 7 Got 0d - 5, 5, 5, 1, 5, 1, # state 8 Got FF -); - - -$ucs2le_ver = genverifier::GenVerifier("UCS2LE", "UTF-16LE", \@ucs2le_cls, - 6, \@ucs2le_st); -print $ucs2le_ver; - - -