intl/chardet/nsCyrillicDetector.cpp
author Hector Zhao <bzhao@mozilla.com>
Wed, 26 Nov 2014 16:37:37 +0800
changeset 234116 4ebb521ae031b2c6a52d7a8977036bbc0af750e2
parent 217660 bda1cceef8d22643b72ceefe2b95d31cd4c79663
permissions -rw-r--r--
Bug 1105189 - Look for the distribution/bundles directory under Contents/Resources due to v2 signing requirements. r=bsmedberg, a=lsblakk

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nscore.h"
#include "nsCyrillicProb.h"
#include <stdio.h>

#include "nsCOMPtr.h"
#include "nsISupports.h"
#include "nsICharsetDetector.h"
#include "nsICharsetDetectionObserver.h"
#include "nsIStringCharsetDetector.h"
#include "nsCyrillicDetector.h"

//----------------------------------------------------------------------
// Interface nsISupports [implementation]
NS_IMPL_ISUPPORTS(nsCyrXPCOMDetector, nsICharsetDetector)
NS_IMPL_ISUPPORTS(nsCyrXPCOMStringDetector, nsIStringCharsetDetector)

void nsCyrillicDetector::HandleData(const char* aBuf, uint32_t aLen)
{
   uint8_t cls;
   const char* b;
   uint32_t i;
   if(mDone) 
      return;
   for(i=0, b=aBuf;i<aLen;i++,b++)
   {
     for(unsigned j=0;j<mItems;j++)
     {
        if( 0x80 & *b)
           cls = mCyrillicClass[j][(*b) & 0x7F];
        else 
           cls = 0;
        NS_ASSERTION( cls <= 32 , "illegal character class");
        mProb[j] += gCyrillicProb[mLastCls[j]][cls];
        mLastCls[j] = cls;
     } 
   }
   // We now only based on the first block we receive
   DataEnd();
}

//---------------------------------------------------------------------
#define THRESHOLD_RATIO 1.5f
void nsCyrillicDetector::DataEnd()
{
   uint32_t max=0;
   uint8_t  maxIdx=0;
   uint8_t j;
   if(mDone) 
      return;
   for(j=0;j<mItems;j++) {
      if(mProb[j] > max)
      {
           max = mProb[j];
           maxIdx= j;
      }
   }

   if( 0 == max ) // if we didn't get any 8 bits data 
     return;

#ifdef DEBUG
   for(j=0;j<mItems;j++) 
      printf("Charset %s->\t%d\n", mCharsets[j], mProb[j]);
#endif
   this->Report(mCharsets[maxIdx]);
   mDone = true;
}

//---------------------------------------------------------------------
nsCyrXPCOMDetector:: nsCyrXPCOMDetector(uint8_t aItems, 
                      const uint8_t ** aCyrillicClass, 
                      const char **aCharsets)
	     : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
{
    mObserver = nullptr;
}

//---------------------------------------------------------------------
nsCyrXPCOMDetector::~nsCyrXPCOMDetector() 
{
}

//---------------------------------------------------------------------
NS_IMETHODIMP nsCyrXPCOMDetector::Init(
  nsICharsetDetectionObserver* aObserver)
{
  NS_ASSERTION(mObserver == nullptr , "Init twice");
  if(nullptr == aObserver)
     return NS_ERROR_ILLEGAL_VALUE;

  mObserver = aObserver;
  return NS_OK;
}

//----------------------------------------------------------
NS_IMETHODIMP nsCyrXPCOMDetector::DoIt(
  const char* aBuf, uint32_t aLen, bool* oDontFeedMe)
{
  NS_ASSERTION(mObserver != nullptr , "have not init yet");

  if((nullptr == aBuf) || (nullptr == oDontFeedMe))
     return NS_ERROR_ILLEGAL_VALUE;

  this->HandleData(aBuf, aLen);
  *oDontFeedMe = false;
  return NS_OK;
}

//----------------------------------------------------------
NS_IMETHODIMP nsCyrXPCOMDetector::Done()
{
  NS_ASSERTION(mObserver != nullptr , "have not init yet");
  this->DataEnd();
  return NS_OK;
}

//----------------------------------------------------------
void nsCyrXPCOMDetector::Report(const char* aCharset)
{
  NS_ASSERTION(mObserver != nullptr , "have not init yet");
  mObserver->Notify(aCharset, eBestAnswer);
}

//---------------------------------------------------------------------
nsCyrXPCOMStringDetector:: nsCyrXPCOMStringDetector(uint8_t aItems, 
                      const uint8_t ** aCyrillicClass, 
                      const char **aCharsets)
	     : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
{
}

//---------------------------------------------------------------------
nsCyrXPCOMStringDetector::~nsCyrXPCOMStringDetector() 
{
}

//---------------------------------------------------------------------
void nsCyrXPCOMStringDetector::Report(const char *aCharset) 
{
   mResult = aCharset;
}

//---------------------------------------------------------------------
NS_IMETHODIMP nsCyrXPCOMStringDetector::DoIt(const char* aBuf, uint32_t aLen, 
                     const char** oCharset, nsDetectionConfident &oConf)
{
   mResult = nullptr;
   mDone = false;
   this->HandleData(aBuf, aLen); 
   this->DataEnd();
   *oCharset=mResult;
   oConf = eBestAnswer;
   return NS_OK;
}