Bug 801330 - Assert if charLenTable is too small. r=smontagu
authorMats Palmgren <matspal@gmail.com>
Sun, 14 Oct 2012 19:10:04 +0200
changeset 110361 a53185bd1aed375e8444340e37e9c3c8f3e8b626
parent 110360 3591fc9c515350562639d2bf95aa911a6f94b6e8
child 110362 baaff43d6a16184ff1a2183bbc6248133b2cf20e
push id93
push usernmatsakis@mozilla.com
push dateWed, 31 Oct 2012 21:26:57 +0000
reviewerssmontagu
bugs801330
milestone19.0a1
Bug 801330 - Assert if charLenTable is too small. r=smontagu
extensions/universalchardet/src/base/nsCodingStateMachine.h
extensions/universalchardet/src/base/nsEscSM.cpp
extensions/universalchardet/src/base/nsMBCSSM.cpp
--- a/extensions/universalchardet/src/base/nsCodingStateMachine.h
+++ b/extensions/universalchardet/src/base/nsCodingStateMachine.h
@@ -1,44 +1,49 @@
 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #ifndef nsCodingStateMachine_h__
 #define nsCodingStateMachine_h__
 
 #include "nsPkgInt.h"
+#include "mozilla/Util.h"
 
 typedef enum {
    eStart = 0,
    eError = 1,
    eItsMe = 2 
 } nsSMState;
 
 #define GETCLASS(c) GETFROMPCK(((unsigned char)(c)), mModel->classTable)
 
 //state machine model
 typedef struct 
 {
   nsPkgInt classTable;
   uint32_t classFactor;
   nsPkgInt stateTable;
   const uint32_t* charLenTable;
+#ifdef DEBUG
+  const size_t charLenTableLength;
+#endif
   const char* name;
 } SMModel;
 
 class nsCodingStateMachine {
 public:
   nsCodingStateMachine(const SMModel* sm) : mModel(sm) { mCurrentState = eStart; }
   nsSMState NextState(char c){
     //for each byte we get its class , if it is first byte, we also get byte length
     uint32_t byteCls = GETCLASS(c);
     if (mCurrentState == eStart)
     { 
       mCurrentBytePos = 0; 
+      MOZ_ASSERT(byteCls < mModel->charLenTableLength);
       mCurrentCharLen = mModel->charLenTable[byteCls];
     }
     //from byte's class and stateTable, we get its next state
     mCurrentState=(nsSMState)GETFROMPCK(mCurrentState*(mModel->classFactor)+byteCls,
                                        mModel->stateTable);
     mCurrentBytePos++;
     return mCurrentState;
   }
@@ -63,10 +68,17 @@ extern const SMModel GB18030SMModel;
 extern const SMModel SJISSMModel;
 
 
 extern const SMModel HZSMModel;
 extern const SMModel ISO2022CNSMModel;
 extern const SMModel ISO2022JPSMModel;
 extern const SMModel ISO2022KRSMModel;
 
+#undef CHAR_LEN_TABLE
+#ifdef DEBUG
+#define CHAR_LEN_TABLE(x) x, mozilla::ArrayLength(x)
+#else
+#define CHAR_LEN_TABLE(x) x
+#endif
+
 #endif /* nsCodingStateMachine_h__ */
 
--- a/extensions/universalchardet/src/base/nsEscSM.cpp
+++ b/extensions/universalchardet/src/base/nsEscSM.cpp
@@ -50,17 +50,17 @@ PCK4BITS(     4,eItsMe,eStart,eStart,eSt
 };
 
 static const uint32_t HZCharLenTable[] = {0, 0, 0, 0, 0, 0};
 
 const SMModel HZSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_cls },
    6,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_st },
-  HZCharLenTable,
+  CHAR_LEN_TABLE(HZCharLenTable),
   "HZ-GB-2312",
 };
 
 
 static const uint32_t ISO2022CN_cls [ 256 / 8 ] = {
 PCK4BITS(2,0,0,0,0,0,0,0),  // 00 - 07 
 PCK4BITS(0,0,0,0,0,0,0,0),  // 08 - 0f 
 PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17 
@@ -108,17 +108,17 @@ PCK4BITS(eError,eError,eError,eError,eEr
 };
 
 static const uint32_t ISO2022CNCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
 
 const SMModel ISO2022CNSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_cls },
   9,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_st },
-  ISO2022CNCharLenTable,
+  CHAR_LEN_TABLE(ISO2022CNCharLenTable),
   "ISO-2022-CN",
 };
 
 static const uint32_t ISO2022JP_cls [ 256 / 8 ] = {
 PCK4BITS(2,0,0,0,0,0,0,0),  // 00 - 07 
 PCK4BITS(0,0,0,0,0,0,2,2),  // 08 - 0f 
 PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17 
 PCK4BITS(0,0,0,1,0,0,0,0),  // 18 - 1f 
@@ -166,17 +166,17 @@ PCK4BITS(eError,eError,eError,eError,eIt
 };
 
 static const uint32_t ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0};
 
 const SMModel ISO2022JPSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls },
   10,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st },
-  ISO2022JPCharLenTable,
+  CHAR_LEN_TABLE(ISO2022JPCharLenTable),
   "ISO-2022-JP",
 };
 
 static const uint32_t ISO2022KR_cls [ 256 / 8 ] = {
 PCK4BITS(2,0,0,0,0,0,0,0),  // 00 - 07 
 PCK4BITS(0,0,0,0,0,0,0,0),  // 08 - 0f 
 PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17 
 PCK4BITS(0,0,0,1,0,0,0,0),  // 18 - 1f 
@@ -220,12 +220,12 @@ PCK4BITS(eError,eError,eError,eItsMe,eSt
 };
 
 static const uint32_t ISO2022KRCharLenTable[] = {0, 0, 0, 0, 0, 0};
 
 const SMModel ISO2022KRSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_cls },
    6,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_st },
-  ISO2022KRCharLenTable,
+  CHAR_LEN_TABLE(ISO2022KRCharLenTable),
   "ISO-2022-KR",
 };
 
--- a/extensions/universalchardet/src/base/nsMBCSSM.cpp
+++ b/extensions/universalchardet/src/base/nsMBCSSM.cpp
@@ -56,17 +56,17 @@ PCK4BITS(eError,eStart,eStart,eStart,eSt
 };
 
 static const uint32_t Big5CharLenTable[] = {0, 1, 1, 2, 0};
 
 SMModel const Big5SMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_cls },
     5,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_st },
-  Big5CharLenTable,
+  CHAR_LEN_TABLE(Big5CharLenTable),
   "Big5",
 };
 
 static const uint32_t EUCJP_cls [ 256 / 8 ] = {
 //PCK4BITS(5,4,4,4,4,4,4,4),  // 00 - 07 
 PCK4BITS(4,4,4,4,4,4,4,4),  // 00 - 07 
 PCK4BITS(4,4,4,4,4,4,5,5),  // 08 - 0f 
 PCK4BITS(4,4,4,4,4,4,4,4),  // 10 - 17 
@@ -111,17 +111,17 @@ PCK4BITS(     3,eError,eError,eError,eSt
 };
 
 static const uint32_t EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0};
 
 const SMModel EUCJPSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls },
    6,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st },
-  EUCJPCharLenTable,
+  CHAR_LEN_TABLE(EUCJPCharLenTable),
   "EUC-JP",
 };
 
 static const uint32_t EUCKR_cls [ 256 / 8 ] = {
 //PCK4BITS(0,1,1,1,1,1,1,1),  // 00 - 07 
 PCK4BITS(1,1,1,1,1,1,1,1),  // 00 - 07 
 PCK4BITS(1,1,1,1,1,1,0,0),  // 08 - 0f 
 PCK4BITS(1,1,1,1,1,1,1,1),  // 10 - 17 
@@ -163,17 +163,17 @@ PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eEr
 };
 
 static const uint32_t EUCKRCharLenTable[] = {0, 1, 2, 0};
 
 const SMModel EUCKRSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_cls },
   4,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_st },
-  EUCKRCharLenTable,
+  CHAR_LEN_TABLE(EUCKRCharLenTable),
   "EUC-KR",
 };
 
 static const uint32_t EUCTW_cls [ 256 / 8 ] = {
 //PCK4BITS(0,2,2,2,2,2,2,2),  // 00 - 07 
 PCK4BITS(2,2,2,2,2,2,2,2),  // 00 - 07 
 PCK4BITS(2,2,2,2,2,2,0,0),  // 08 - 0f 
 PCK4BITS(2,2,2,2,2,2,2,2),  // 10 - 17 
@@ -219,17 +219,17 @@ PCK4BITS(eStart,eError,eStart,eStart,eSt
 };
 
 static const uint32_t EUCTWCharLenTable[] = {0, 0, 1, 2, 2, 2, 3};
 
 const SMModel EUCTWSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_cls },
    7,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_st },
-  EUCTWCharLenTable,
+  CHAR_LEN_TABLE(EUCTWCharLenTable),
   "x-euc-tw",
 };
 
 /* obsolete GB2312 by gb18030
 static uint32_t GB2312_cls [ 256 / 8 ] = {
 //PCK4BITS(0,1,1,1,1,1,1,1),  // 00 - 07 
 PCK4BITS(1,1,1,1,1,1,1,1),  // 00 - 07 
 PCK4BITS(1,1,1,1,1,1,0,0),  // 08 - 0f 
@@ -272,17 +272,17 @@ PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eEr
 };
 
 static const uint32_t GB2312CharLenTable[] = {0, 1, 2, 0};
 
 SMModel GB2312SMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_cls },
    4,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_st },
-  GB2312CharLenTable,
+  CHAR_LEN_TABLE(GB2312CharLenTable),
   "GB2312",
 };
 */
 
 // the following state machine data was created by perl script in 
 // intl/chardet/tools. It should be the same as in PSM detector.
 static const uint32_t GB18030_cls [ 256 / 8 ] = {
 PCK4BITS(1,1,1,1,1,1,1,1),  // 00 - 07 
@@ -335,17 +335,17 @@ PCK4BITS(eError,eError,eStart,eStart,eSt
 // each code range there as well. So it is safe to set it to be 
 // 2 here. 
 static const uint32_t GB18030CharLenTable[] = {0, 1, 1, 1, 1, 1, 2};
 
 const SMModel GB18030SMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_cls },
    7,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_st },
-  GB18030CharLenTable,
+  CHAR_LEN_TABLE(GB18030CharLenTable),
   "GB18030",
 };
 
 // sjis
 
 static const uint32_t SJIS_cls [ 256 / 8 ] = {
 //PCK4BITS(0,1,1,1,1,1,1,1),  // 00 - 07 
 PCK4BITS(1,1,1,1,1,1,1,1),  // 00 - 07 
@@ -392,17 +392,17 @@ PCK4BITS(eItsMe,eItsMe,eError,eError,eSt
 };
 
 static const uint32_t SJISCharLenTable[] = {0, 1, 1, 2, 0, 0};
 
 const SMModel SJISSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls },
    6,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st },
-  SJISCharLenTable,
+  CHAR_LEN_TABLE(SJISCharLenTable),
   "Shift_JIS",
 };
 
 
 static const uint32_t UTF8_cls [ 256 / 8 ] = {
 //PCK4BITS(0,1,1,1,1,1,1,1),  // 00 - 07 
 PCK4BITS(1,1,1,1,1,1,1,1),  // 00 - 07  //allow 0x00 as a legal value
 PCK4BITS(1,1,1,1,1,1,0,0),  // 08 - 0f 
@@ -470,12 +470,12 @@ PCK4BITS(eError,eError,eError,eError,eEr
 
 static const uint32_t UTF8CharLenTable[] = {0, 1, 0, 0, 0, 0, 2, 3, 
                             3, 3, 4, 4, 5, 5, 6, 6 };
 
 const SMModel UTF8SMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls },
    16,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st },
-  UTF8CharLenTable,
+  CHAR_LEN_TABLE(UTF8CharLenTable),
   "UTF-8",
 };