intl/uconv/ucvcn/nsISO2022CNToUnicode.cpp
author Honza Bambas <honzab.moz@firemni.cz>
Wed, 18 Feb 2009 14:31:31 +0100
changeset 25137 9ad175f8b25fe32a5e9641ff59e6439a7ad71b04
parent 5761 9707947eb11b381a0084d2facb8d43b908f78744
child 28503 f24385b58d3a399afd2e6aaf365c72248bbae842
permissions -rw-r--r--
Bug 456705 - Firefox 2.0.0.17 crashes when opening a https-site or on shutdown with FoxyProxy 2.8.5 [@ nsSSLThread::Run], r=kaie

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is mozilla.org code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 1998
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *   Ervin Yan <ervin.yan@sun.com>
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */
#include "nsISO2022CNToUnicode.h"
#include "nsUCSupport.h"
#include "nsICharsetConverterManager.h"
#include "nsIServiceManager.h"

static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);

NS_IMETHODIMP nsISO2022CNToUnicode::GB2312_To_Unicode(unsigned char *aSrc, PRInt32 aSrcLength, PRUnichar * aDest, PRInt32 * aDestLength)
{
    nsresult rv;

    if(!mGB2312_Decoder) {
       // creating a delegate converter (GB2312)
       nsCOMPtr<nsICharsetConverterManager> ccm =
              do_GetService(kCharsetConverterManagerCID, &rv);
       if(NS_FAILED(rv))
          return NS_ERROR_UNEXPECTED;

       rv = ccm->GetUnicodeDecoderRaw("GB2312", getter_AddRefs(mGB2312_Decoder));
       if(NS_FAILED(rv))
          return NS_ERROR_UNEXPECTED;
    }

    if(!mGB2312_Decoder) // failed creating a delegate converter
       return NS_ERROR_UNEXPECTED;

    rv = mGB2312_Decoder->Convert((const char *)aSrc, &aSrcLength, aDest, aDestLength);
    return rv;
}

NS_IMETHODIMP nsISO2022CNToUnicode::EUCTW_To_Unicode(unsigned char *aSrc, PRInt32 aSrcLength, PRUnichar * aDest, PRInt32 * aDestLength)
{
    nsresult rv;

    if(!mEUCTW_Decoder) {
       // creating a delegate converter (x-euc-tw)
       nsCOMPtr<nsICharsetConverterManager> ccm =
              do_GetService(kCharsetConverterManagerCID, &rv);
       if(NS_FAILED(rv))
          return NS_ERROR_UNEXPECTED;

       rv = ccm->GetUnicodeDecoderRaw("x-euc-tw", getter_AddRefs(mEUCTW_Decoder));
       if(NS_FAILED(rv))
          return NS_ERROR_UNEXPECTED;
    }

    if(!mEUCTW_Decoder) // failed creating a delegate converter
       return NS_ERROR_UNEXPECTED;

    rv = mEUCTW_Decoder->Convert((const char *)aSrc, &aSrcLength, aDest, aDestLength);
    return(rv);
}

NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen, PRUnichar * aDest, PRInt32 * aDestLen)
{
  const unsigned char * srcEnd = (unsigned char *)aSrc + *aSrcLen;
  const unsigned char * src = (unsigned char *) aSrc;
  PRUnichar* destEnd = aDest + *aDestLen;
  PRUnichar* dest = aDest;
  nsresult rv;
  PRInt32 aLen; 

  while ((src < srcEnd))
  {
    switch (mState)
    {
      case eState_ASCII:
        if(ESC == *src) {
           mState = eState_ESC;
        } else {
           if(dest+1 >= destEnd)
              goto error1;
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_ESC:    // ESC
        if('$' == *src) {
           mState = eState_ESC_24;
        } else {
           if(dest+2 >= destEnd)
              goto error1;
           *dest++ = (PRUnichar) ESC;
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_ESC_24: // ESC $
        if(')' == *src) {
           mState = eState_ESC_24_29;
        } else if('*' == *src) {
           mState = eState_ESC_24_2A;
        } else if('+' == *src) {
           mState = eState_ESC_24_2B;
        } else {
           if(dest+3 >= destEnd)
              goto error1;
           *dest++ = (PRUnichar) ESC;
           *dest++ = (PRUnichar) '$';
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_ESC_24_29: // ESC $ )
        if('A' == *src) {
           mState = eState_ESC_24_29_A;
        } else if('G' == *src) {
           mState = eState_ESC_24_29_G;
        } else {
           if(dest+4 >= destEnd)
              goto error1;
           *dest++ = (PRUnichar) ESC;
           *dest++ = (PRUnichar) '$';
           *dest++ = (PRUnichar) ')';
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_ESC_24_29_A:  // ESC $ ) A
        if(SO == *src) {
           mState = eState_GB2312_1980;
           mRunLength = 0;
        } else {
           if(dest+5 >= destEnd)
              goto error1;
           *dest++ = (PRUnichar) ESC;
           *dest++ = (PRUnichar) '$';
           *dest++ = (PRUnichar) ')';
           *dest++ = (PRUnichar) 'A';
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_GB2312_1980:   // ESC $ ) A SO
        if(SI == *src) { // Shift-In (SI)
           mState = eState_ESC_24_29_A_SO_SI;
           if (mRunLength == 0) {
              if(dest+1 >= destEnd)
                 goto error1;
              *dest++ = 0xFFFD;
           }
           mRunLength = 0;
        } else if(ESC == *src) {
           mState = eState_ESC;
        } else {
           if(0x20 < *src && *src < 0x7f) {
              mData = *src;
              mState = eState_GB2312_1980_2ndbyte;
           } else {
              if(dest+1 >= destEnd)
                 goto error1;
              *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
           }
        }
        break; 

      case eState_GB2312_1980_2ndbyte:  // ESC $ ) A SO
        if(0x20 < *src && *src < 0x7f) {
           unsigned char gb[2];
           PRInt32 gbLen = 2;

           gb[0] = mData | 0x80;
           gb[1] = *src | 0x80;

           aLen = destEnd - dest;
           rv = GB2312_To_Unicode(gb, gbLen, dest, &aLen);
           ++mRunLength;
           if(rv == NS_OK_UDEC_MOREOUTPUT) {
              goto error1;
           } else if(NS_FAILED(rv)) {
              goto error2;
           }

           dest += aLen;
        } else {
           if(dest+2 >= destEnd)
              goto error1;
           *dest++ = (PRUnichar) mData;
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
        }
        mState = eState_GB2312_1980;
        break;

      case eState_ESC_24_29_A_SO_SI:  // ESC $ ) A SO SI
        if(SO == *src) {
           mState = eState_GB2312_1980;
           mRunLength = 0;
        } else if(ESC == *src) {
           mState = eState_ESC;
        } else {
           if(dest+1 >= destEnd)
              goto error1;
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ESC_24_29_A_SO_SI;
        }
        break;

      case eState_ESC_24_29_G:   // ESC $ ) G
        if(SO == *src) {
           mState = eState_CNS11643_1;
           mRunLength = 0;
        } else {
           if(dest+5 >= destEnd)
              goto error1;
           *dest++ = (PRUnichar) ESC;
           *dest++ = (PRUnichar) '$';
           *dest++ = (PRUnichar) ')';
           *dest++ = (PRUnichar) 'G';
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_CNS11643_1:   // ESC $ ) G SO
        if(SI == *src) { // Shift-In (SI)
           mState = eState_ESC_24_29_G_SO_SI;
           if (mRunLength == 0) {
              if(dest+1 >= destEnd)
                 goto error1;
              *dest++ = 0xFFFD;
           }
           mRunLength = 0;
        } else if(ESC == *src) {
           mState = eState_ESC;
        } else {
           if(0x20 < *src && *src < 0x7f) {
              mData = *src;
              mState = eState_CNS11643_1_2ndbyte;
           } else {
              if(dest+1 >= destEnd)
                 goto error1;
              *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
           }
        }
        break;

      case eState_CNS11643_1_2ndbyte:  // ESC $ ) G SO
        if(0x20 < *src && *src < 0x7f) {
           unsigned char cns[4];
           PRInt32 cnsLen = 2;

           cns[0] = mData | 0x80;
           cns[1] = *src | 0x80;

           aLen = destEnd - dest;
           rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
           ++mRunLength;
           if(rv == NS_OK_UDEC_MOREOUTPUT) {
              goto error1;
           } else if(NS_FAILED(rv)) {
              goto error2;
           }

           dest += aLen;
        } else {
           if(dest+2 >= destEnd)
              goto error1;
           *dest++ = (PRUnichar) mData;
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
        }
        mState = eState_CNS11643_1;
        break;

      case eState_ESC_24_29_G_SO_SI: // ESC $ ) G SO SI
        if(SO == *src) {
           mState = eState_CNS11643_1;
           mRunLength = 0;
        } else if(ESC == *src) {
           mState = eState_ESC;
        } else {
           if(dest+1 >= destEnd)
              goto error1;
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ESC_24_29_G_SO_SI;
        }
        break;

      case eState_ESC_24_2A: // ESC $ *
        if('H' == *src) {
           mState = eState_ESC_24_2A_H;
        } else {
           if(dest+4 >= destEnd)
              goto error1;
           *dest++ = (PRUnichar) ESC;
           *dest++ = (PRUnichar) '$';
           *dest++ = (PRUnichar) '*';
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_ESC_24_2A_H:  // ESC $ * H
        if(ESC == *src) {
           mState = eState_ESC_24_2A_H_ESC;
        } else {
           if(dest+5 >= destEnd)
              goto error1;
           *dest++ = (PRUnichar) ESC;
           *dest++ = (PRUnichar) '$';
           *dest++ = (PRUnichar) '*';
           *dest++ = (PRUnichar) 'H';
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ASCII;
        } 
        break;

      case eState_ESC_24_2A_H_ESC:  // ESC $ * H ESC
        if(SS2 == *src) {
           mState = eState_CNS11643_2;
           mRunLength = 0;
        } else if('$' == *src) {
           mState = eState_ESC_24;
        } else {
           if(dest+6 >= destEnd)
              goto error1;
           *dest++ = (PRUnichar) ESC;
           *dest++ = (PRUnichar) '$';
           *dest++ = (PRUnichar) '*';
           *dest++ = (PRUnichar) 'H';
           *dest++ = (PRUnichar) ESC;
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_CNS11643_2:  // ESC $ * H ESC SS2
        if(SI == *src) { // Shift-In (SI)
           mState = eState_ESC_24_2A_H_ESC_SS2_SI;
           if (mRunLength == 0) {
              if(dest+1 >= destEnd)
                 goto error1;
              *dest++ = 0xFFFD;
           }
           mRunLength = 0;
        } else if(ESC == *src) {
           mState = eState_ESC_24_2A_H_ESC;
        } else {
           if(0x20 < *src && *src < 0x7f) {
              mData = *src;
              mState = eState_CNS11643_2_2ndbyte;
           } else {
              if(dest+1 >= destEnd)
                 goto error1;
              *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
           }
        }
        break;

      case eState_CNS11643_2_2ndbyte:   // ESC $ * H ESC SS2
        if(0x20 < *src && *src < 0x7f) {
           unsigned char cns[4];
           PRInt32 cnsLen = 4;
 
           cns[0] = (unsigned char) MBYTE;
           cns[1] = (unsigned char) (PMASK + 2);
           cns[2] = mData | 0x80;
           cns[3] = *src | 0x80;
 
           aLen = destEnd - dest;
           rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
           ++mRunLength;
           if(rv == NS_OK_UDEC_MOREOUTPUT) {
              goto error1;
           } else if(NS_FAILED(rv)) {
              goto error2;
           }

           dest += aLen;
        } else {
           if(dest+2 >= destEnd)
              goto error1;
           *dest++ = (PRUnichar) mData;
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
        }
        mState = eState_CNS11643_2;
        break;

      case eState_ESC_24_2A_H_ESC_SS2_SI:  // ESC $ * H ESC SS2 SI
        if(ESC == *src) {
           mState = eState_ESC_24_2A_H_ESC_SS2_SI_ESC;
        } else {
           if(dest+1 >= destEnd)
              goto error1;
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ESC_24_2A_H_ESC_SS2_SI;
        }
        break;

      case eState_ESC_24_2A_H_ESC_SS2_SI_ESC:  // ESC $ * H ESC SS2 SI ESC
        if(SS2 == *src) {
           mState = eState_CNS11643_2;
           mRunLength = 0;
        } else if('$' == *src) {
           mState = eState_ESC_24;
        } else {
           if(dest+1 >= destEnd)
              goto error1;
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ESC_24_2A_H_ESC_SS2_SI;
        }
        break;

      case eState_ESC_24_2B: // ESC $ +
        if('I' <= *src && *src <= 'M') {
            mState = eState_ESC_24_2B_I;
            mPlaneID = *src - 'I' + 3;
        } else {
           if(dest+4 >= destEnd)
              goto error1;
           *dest++ = (PRUnichar) ESC;
           *dest++ = (PRUnichar) '$';
           *dest++ = (PRUnichar) '+';
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_ESC_24_2B_I:  // ESC $ + I
        if(ESC == *src) {
           mState = eState_ESC_24_2B_I_ESC;
        } else {
           if(dest+5 >= destEnd)
              goto error1;
           *dest++ = (PRUnichar) ESC;
           *dest++ = (PRUnichar) '$';
           *dest++ = (PRUnichar) '+';
           *dest++ = (PRUnichar) 'I' + mPlaneID - 3;
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_ESC_24_2B_I_ESC:  // ESC $ + I ESC
        if(SS3 == *src) {
           mState = eState_CNS11643_3;
           mRunLength = 0;
        } else if('$' == *src) {
           mState = eState_ESC_24;
        } else {
           if(dest+6 >= destEnd)
              goto error1;
           *dest++ = (PRUnichar) ESC;
           *dest++ = (PRUnichar) '$';
           *dest++ = (PRUnichar) '+';
           *dest++ = (PRUnichar) 'I' + mPlaneID - 3;
           *dest++ = (PRUnichar) ESC;
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ASCII;
        }
        break;

      case eState_CNS11643_3:   // ESC $ + I ESC SS3
        if(SI == *src) { // Shift-In (SI)
           mState = eState_ESC_24_2B_I_ESC_SS3_SI;
           if (mRunLength == 0) {
              if(dest+1 >= destEnd)
                 goto error1;
              *dest++ = 0xFFFD;
           }
           mRunLength = 0;
        } else if(ESC == *src) {
           mState = eState_ESC_24_2B_I_ESC;
        } else {
           if(0x20 < *src && *src < 0x7f) {
              mData = *src;
              mState = eState_CNS11643_3_2ndbyte;
           } else {
              if(dest+1 >= destEnd)
                 goto error1;
              *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
           }
        }

        break;

      case eState_CNS11643_3_2ndbyte:  // ESC $ + I ESC SS3
        if(0x20 < *src && *src < 0x7f) {
           unsigned char cns[4];
           PRInt32 cnsLen = 4;

           cns[0] = (unsigned char) MBYTE;
           cns[1] = (unsigned char) (PMASK + mPlaneID);
           cns[2] = mData | 0x80;
           cns[3] = *src | 0x80;

           aLen = destEnd - dest;
           rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
           ++mRunLength;
           if(rv == NS_OK_UDEC_MOREOUTPUT) {
              goto error1;
           } else if(NS_FAILED(rv)) {
              goto error2;
           }

           dest += aLen;
        } else {
           if(dest+2 >= destEnd)
              goto error1;
           *dest++ = (PRUnichar) mData;
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
        }
        mState = eState_CNS11643_3;
        break;

      case eState_ESC_24_2B_I_ESC_SS3_SI:  // ESC $ + I ESC SS3 SI
        if(ESC == *src) {
           mState = eState_ESC_24_2B_I_ESC_SS3_SI_ESC;
        } else {
           if(dest+1 >= destEnd)
              goto error1;
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ESC_24_2B_I_ESC_SS3_SI;
        }
        break;

      case eState_ESC_24_2B_I_ESC_SS3_SI_ESC:  // ESC $ + I ESC SS3 SI ESC
        if(SS3 == *src) {
           mState = eState_CNS11643_3;
           mRunLength = 0;
        } else if('$' == *src) {
           mState = eState_ESC_24;
        } else {
           if(dest+1 >= destEnd)
              goto error1;
           *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;

           mState = eState_ESC_24_2B_I_ESC_SS3_SI;
        }
        break;

    } // switch
    src++;
  }

  *aDestLen = dest- aDest;
  return NS_OK;

error1:
  *aDestLen = dest-aDest;
  src++;
  if ((mState == eState_ASCII) && (src == srcEnd)) {
    return NS_OK;
  }
  *aSrcLen = src - (const unsigned char*)aSrc;
  return NS_OK_UDEC_MOREOUTPUT;

error2:
  *aSrcLen = src - (const unsigned char*)aSrc;
  *aDestLen = dest-aDest;
  mState = eState_ASCII;
  return NS_ERROR_UNEXPECTED;
}