Bug 472764 - "gloda full-text search always uses SQLite's porter stemmer without regard to effective locale, etc." "patch v5" [r=asuth, sr=bienvenu, a=blocking-thunderbird3]
authorMakoto Kato <m_kato@ga2.so-net.ne.jp>
Tue, 20 Oct 2009 17:52:00 -0700
changeset 4207 4aab2fb7d9e1a218fe2d6bb666f54ffdcbff9135
parent 4206 839c5e5860263f87fd415092513a1b38947b3ff4
child 4208 1ef3fd69325ae769bc160626d0cd92257b619cc1
push idunknown
push userunknown
push dateunknown
reviewersasuth, bienvenu, blocking-thunderbird3
bugs472764
Bug 472764 - "gloda full-text search always uses SQLite's porter stemmer without regard to effective locale, etc." "patch v5" [r=asuth, sr=bienvenu, a=blocking-thunderbird3]
mail/installer/windows/packages-static
mailnews/build/Makefile.in
mailnews/build/nsMailModule.cpp
mailnews/db/gloda/modules/datastore.js
mailnews/db/gloda/modules/msg_search.js
mailnews/db/gloda/test/unit/test_intl.js
mailnews/extensions/Makefile.in
mailnews/extensions/fts3/Makefile.in
mailnews/extensions/fts3/build/Makefile.in
mailnews/extensions/fts3/build/nsFts3TokenizerFactory.cpp
mailnews/extensions/fts3/public/Makefile.in
mailnews/extensions/fts3/public/nsIFts3Tokenizer.idl
mailnews/extensions/fts3/src/Makefile.in
mailnews/extensions/fts3/src/README.mozilla
mailnews/extensions/fts3/src/fts3_porter.c
mailnews/extensions/fts3/src/fts3_tokenizer.h
mailnews/extensions/fts3/src/nsFts3Tokenizer.cpp
mailnews/extensions/fts3/src/nsFts3Tokenizer.h
mailnews/extensions/fts3/src/nsFts3TokenizerCID.h
mailnews/makefiles.sh
suite/installer/unix/packages
suite/installer/windows/packages
--- a/mail/installer/windows/packages-static
+++ b/mail/installer/windows/packages-static
@@ -77,16 +77,17 @@ bin\defaults\profile\localstore.rdf
 bin\defaults\profile\prefs.js
 bin\defaults\profile\mimeTypes.rdf
 
 bin\isp\*
 
 bin\components\aboutRights.js
 bin\components\activity.xpt
 bin\components\addrbook.xpt
+bin\components\fts3tok.xpt
 bin\components\mime.xpt
 bin\components\steel.xpt
 bin\components\msgbase.xpt
 bin\components\msgcompo.xpt
 bin\components\msgdb.xpt
 bin\components\msgimap.xpt
 bin\components\msglocal.xpt
 bin\components\msgnews.xpt
--- a/mailnews/build/Makefile.in
+++ b/mailnews/build/Makefile.in
@@ -64,16 +64,17 @@ MODULE_NAME	= nsMailModule
 REQUIRES	= xpcom \
 		  addrbook \
 		  appcomps \
 		  toolkitcomps \
 		  content \
 		  dom \
 		  docshell \
 		  editor \
+		  fts3tok \
 		  gfx \
 		  intl \
 	     	  locale \
 		  layout \
 		  lwbrk \
 	          mailview \
 	          msglocal \
 		  msgcompose \
@@ -84,16 +85,17 @@ REQUIRES	= xpcom \
 		  mime \
 		  mork \
 		  necko \
 		  nkcache \
 		  pref \
 		  rdf \
 		  rdfutil \
 		  string \
+		  sqlite3 \
 		  txmgr \
 		  widget \
 		  webbrwsr \
 		  uconv \
 		  uriloader \
 		  $(ZLIB_REQUIRES) \
 		  $(NULL)
 
@@ -122,16 +124,17 @@ SHARED_LIBRARY_LIBS = \
 	        ../compose/src/$(LIB_PREFIX)msgcompose_s.$(LIB_SUFFIX) \
 	        ../db/msgdb/src/$(LIB_PREFIX)msgdb_s.$(LIB_SUFFIX) \
 	        ../imap/src/$(LIB_PREFIX)msgimap_s.$(LIB_SUFFIX) \
 	        ../addrbook/src/$(LIB_PREFIX)addrbook_s.$(LIB_SUFFIX) \
 	        ../news/src/$(LIB_PREFIX)msgnews_s.$(LIB_SUFFIX) \
 	        ../mime/src/$(LIB_PREFIX)mime_s.$(LIB_SUFFIX) \
 	        ../mime/emitters/src/$(LIB_PREFIX)emitterutil_s.$(LIB_SUFFIX) \
 	        ../extensions/bayesian-spam-filter/src/$(LIB_PREFIX)bayesflt_s.$(LIB_SUFFIX) \
+	        ../extensions/fts3/src/$(LIB_PREFIX)fts3tok_s.$(LIB_SUFFIX) \
 	        ../extensions/mailviews/src/$(LIB_PREFIX)mailview_s.$(LIB_SUFFIX) \
                 ../extensions/mdn/src/$(LIB_PREFIX)msgmdn_s.$(LIB_SUFFIX) \
                 ../mime/cthandlers/vcard/$(LIB_PREFIX)vcard_s.$(LIB_SUFFIX) \
                 ../mime/cthandlers/glue/$(LIB_PREFIX)mimecthglue_s.$(LIB_SUFFIX) \
                 $(NULL)
 
 ifdef MOZILLA_INTERNAL_API
 EXTRA_DSO_LDOPTS = \
@@ -167,16 +170,17 @@ LOCAL_INCLUDES = -I$(srcdir) \
                  -I$(srcdir)/../addrbook/src \
                  -I$(srcdir)/../imap/src \
                  -I$(srcdir)/../compose/src \
                  -I$(srcdir)/../base/search/src \
                  -I$(srcdir)/../mime/src \
                  -I$(srcdir)/../mime/emitters/src \
                  -I$(srcdir)/../news/src \
                  -I$(srcdir)/../extensions/bayesian-spam-filter/src \
+                 -I$(srcdir)/../extensions/fts3/src \
                  -I$(srcdir)/../extensions/mailviews/src \
                  -I$(srcdir)/../extensions/mdn/src \
                  $(NULL)
 
 include $(topsrcdir)/config/rules.mk
 
 ifeq ($(OS_ARCH),WINNT)
 OS_LIBS	+= $(call EXPAND_LIBNAME,shell32)
--- a/mailnews/build/nsMailModule.cpp
+++ b/mailnews/build/nsMailModule.cpp
@@ -311,16 +311,22 @@
 #include "nsMsgMdnCID.h"
 #include "nsMsgMdnGenerator.h"
 
 ///////////////////////////////////////////////////////////////////////////////
 // vcard includes
 ///////////////////////////////////////////////////////////////////////////////
 #include "nsMimeContentTypeHandler.h"
 
+///////////////////////////////////////////////////////////////////////////////
+// FTS3 Tokenizer
+///////////////////////////////////////////////////////////////////////////////
+#include "nsFts3TokenizerCID.h"
+#include "nsFts3Tokenizer.h"
+
 ////////////////////////////////////////////////////////////////////////////////
 // mailnews base factories
 ////////////////////////////////////////////////////////////////////////////////
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsMessengerBootstrap)
 NS_GENERIC_FACTORY_CONSTRUCTOR_INIT(nsMsgMailSession, Init)
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsMessenger)
 NS_GENERIC_FACTORY_CONSTRUCTOR_INIT(nsMsgAccountManager, Init)
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsMsgAccount)
@@ -514,16 +520,20 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsMimeHea
 ////////////////////////////////////////////////////////////////////////////////
 // mime emitter factories
 ////////////////////////////////////////////////////////////////////////////////
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsMimeRawEmitter)
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsMimeXmlEmitter)
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsMimePlainEmitter)
 NS_GENERIC_FACTORY_CONSTRUCTOR_INIT(nsMimeHtmlDisplayEmitter, Init)
 
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsFts3Tokenizer)
+
 static NS_METHOD RegisterMimeEmitter(nsIComponentManager *aCompMgr, nsIFile *aPath, const char *registryLocation,
                                      const char *componentType, const nsModuleComponentInfo *info)
 {
   nsresult rv;
   nsCOMPtr<nsICategoryManager> catman = do_GetService(NS_CATEGORYMANAGER_CONTRACTID, &rv);
   if (NS_FAILED(rv)) return rv;
   nsCString previous;
   return catman->AddCategoryEntry("mime-emitter", info->mContractID, info->mContractID,
@@ -1265,16 +1275,22 @@ static const nsModuleComponentInfo gComp
       NS_MSGMDNGENERATOR_CONTRACTID, nsMsgMdnGeneratorConstructor },
 
     ////////////////////////////////////////////////////////////////////////////////
     // mdn  components
     ////////////////////////////////////////////////////////////////////////////////
     { "MIME VCard Handler", NS_VCARD_CONTENT_TYPE_HANDLER_CID, "@mozilla.org/mimecth;1?type=text/x-vcard",
        nsVCardMimeContentTypeHandlerConstructor, },
 
+    ////////////////////////////////////////////////////////////////////////////////
+    // FTS3 tokenizer components
+    ////////////////////////////////////////////////////////////////////////////////
+    { "FTS3 Tokenier", NS_FTS3TOKENIZER_CID,
+      NS_FTS3TOKENIZER_CONTRACTID, nsFts3TokenizerConstructor },
+
 #ifdef MOZ_SUITE
     ////////////////////////////////////////////////////////////////////////////////
     // suite general startup
     ////////////////////////////////////////////////////////////////////////////////
     { "Address Book Manager Startup Handler", NS_ABMANAGER_CID,
       NS_ABMANAGERSTARTUPHANDLER_CONTRACTID, nsAbManagerConstructor },
     { "Compose Service", NS_MSGCOMPOSESERVICE_CID,
       NS_MSGCOMPOSESTARTUPHANDLER_CONTRACTID, nsMsgComposeServiceConstructor },
--- a/mailnews/db/gloda/modules/datastore.js
+++ b/mailnews/db/gloda/modules/datastore.js
@@ -503,17 +503,17 @@ var GlodaDatastore = {
   kConstraintIn: 1,
   kConstraintRanges: 2,
   kConstraintEquals: 3,
   kConstraintStringLike: 4,
   kConstraintFulltext: 5,
 
   /* ******************* SCHEMA ******************* */
 
-  _schemaVersion: 13,
+  _schemaVersion: 14,
   _schema: {
     tables: {
 
       // ----- Messages
       folderLocations: {
         columns: [
           ["id", "INTEGER PRIMARY KEY"],
           ["folderURI", "TEXT NOT NULL"],
@@ -737,16 +737,21 @@ var GlodaDatastore = {
     // It does exist, but we (someday) might need to upgrade the schema
     else {
       // (Exceptions may be thrown if the database is corrupt)
       { // try {
         dbConnection = dbService.openUnsharedDatabase(dbFile);
         // see _createDB...
         dbConnection.executeSimpleSQL("PRAGMA cache_size = 8192");
 
+        // Register custom tokenizer to index all language text
+        var tokenizer = Cc["@mozilla.org/messenger/fts3tokenizer;1"].
+                          getService(Ci.nsIFts3Tokenizer);
+        tokenizer.registerTokenizer(dbConnection);
+
         if (dbConnection.schemaVersion != this._schemaVersion) {
           this._log.debug("Need to migrate database.  (DB version: " +
             dbConnection.schemaVersion + " desired version: " +
             this._schemaVersion);
           dbConnection = this._migrate(dbService, dbFile,
                                        dbConnection,
                                        dbConnection.schemaVersion,
                                        this._schemaVersion);
@@ -858,16 +863,20 @@ var GlodaDatastore = {
     //  windows).  Increasing the page size to 4096 increases the actual byte
     //  turnover significantly for rollback journals than a page size of 1024,
     //  and since the rollback journal has to be fsynced, that is undesirable.
     dbConnection.executeSimpleSQL("PRAGMA page_size = 1024");
     // This is a maximum number of pages to be used.  If the database does not
     //  get this large, then the memory does not get used.
     // Do not forget to update the code in _init if you change this value.
     dbConnection.executeSimpleSQL("PRAGMA cache_size = 8192");
+    // Register custom tokenizer to index all language text
+    var tokenizer = Cc["@mozilla.org/messenger/fts3tokenizer;1"].
+                      getService(Ci.nsIFts3Tokenizer);
+    tokenizer.registerTokenizer(dbConnection);
 
     dbConnection.beginTransaction();
     try {
       this._createSchema(dbConnection);
       dbConnection.commitTransaction();
     }
     catch(ex) {
       dbConnection.rollbackTransaction();
@@ -883,17 +892,17 @@ var GlodaDatastore = {
     this._log.info("Creating table: " + aTableName);
     aDBConnection.createTable(aTableName,
       [(coldef[0] + " " + coldef[1]) for each
        ([i, coldef] in Iterator(aTableDef.columns))].join(", "));
 
     // - Create the fulltext table if applicable
     if (aTableDef.fulltextColumns) {
       let createFulltextSQL = "CREATE VIRTUAL TABLE " + aTableName + "Text" +
-        " USING fts3(tokenize porter, " +
+        " USING fts3(tokenize mozporter, " +
         [(coldef[0] + " " + coldef[1]) for each
          ([i, coldef] in Iterator(aTableDef.fulltextColumns))].join(", ") +
         ")";
       this._log.info("Creating fulltext table: " + createFulltextSQL);
       aDBConnection.executeSimpleSQL(createFulltextSQL);
     }
 
     // - Create its indices
@@ -989,17 +998,19 @@ var GlodaDatastore = {
     // - note that I screwed up and failed to mark the schema change; apparently
     //   no database will claim to be version 13...
     // version 14:
     // - new attributes: forwarded, repliedTo, bcc, recipients
     // - altered fromMeTo and fromMeCc to fromMe
     // - altered toMe and ccMe to just be toMe
     // - exposes bcc to cc-related attributes
     // - MIME type DB schema overhaul
-    if (aCurVersion < 14) {
+    // version 15:
+    // - change tokenizer to mozporter to support CJK
+    if (aCurVersion < 15) {
       aDBConnection.close();
       aDBFile.remove(false);
       this._log.warn("Global database has been purged due to schema change.");
       return this._createDB(aDBService, aDBFile);
     }
 
     aDBConnection.schemaVersion = aNewVersion;
 
--- a/mailnews/db/gloda/modules/msg_search.js
+++ b/mailnews/db/gloda/modules/msg_search.js
@@ -249,21 +249,51 @@ GlodaMsgSearcher.prototype = {
       explicitSQL: FULLTEXT_QUERY_EXPLICIT_SQL,
       // osets is 0-based column number 14 (volatile to column changes)
       // dascore becomes 0-based column number 15
       outerWrapColumns: [DASCORE_SQL_SNIPPET + " AS dascore"],
       // save the offset column for extra analysis
       stashColumns: [14]
     });
 
+    // CJK character is indexed by bi-gram, so we need split it if CJK
+    let querywords = new Array();
+    this.fulltextTerms.forEach(function (term) {
+        let lastpos = 0;
+        let code;
+        for (var i = 1; i < term.length - 1; i++) {
+            code = term.charCodeAt(i);
+            // not CJK. Don't use bi-gram
+            if (code < 0x2000 || (code >= 0xa000 && c < 0xac00))
+                continue;
+
+            // bi-gram search text
+            querywords.push(term.substring(lastpos, i+1));
+            lastpos = i;
+        }
+
+        if (term.length) {
+            let querylast = term.substring(lastpos);
+            if (querylast.length == 1) {
+               code = querylast.charCodeAt(0);
+               if (code >= 0x2000 && !(code >= 0xa000 && code < 0xac00))
+               // Users uses just 1 character as search string.
+               // We have to consider it  for CJK (there is 1 character word in CJK)
+               querylast += "*";
+            }
+            querywords.push(querylast);
+        }
+    });
+
     let fulltextQueryString;
+
     if (this.andTerms)
-      fulltextQueryString = '"' + this.fulltextTerms.join('" "') + '"';
+      fulltextQueryString = '"' + querywords.join('" "') + '"';
     else
-      fulltextQueryString = '"' + this.fulltextTerms.join('" OR "') + '"';
+      fulltextQueryString = '"' + querywords.join('" OR "') + '"';
 
     query.fulltextMatches(fulltextQueryString);
     query.orderBy(this.sortBy);
     query.limit(this.retrievalLimit);
 
     return query;
   },
 
--- a/mailnews/db/gloda/test/unit/test_intl.js
+++ b/mailnews/db/gloda/test/unit/test_intl.js
@@ -21,16 +21,18 @@ var intlPhrases = [
       'euc-jp': ['=?shift-jis?b?jqmTrppTid2LQA==?=',
                  '\xbc\xab\xc6\xb0\xd3\xb4\xb2\xdf\xb5\xa1'],
       'shift-jis': ['=?shift-jis?b?jqmTrppTid2LQA==?=',
                     '\x8e\xa9\x93\xae\x9aS\x89\xdd\x8b@']
     }
   }
 ];
 
+var resultList = [];
+
 /**
  * For each phrase in the intlPhrases array (we are parameterized over it using
  *  parameterizeTest in the 'tests' declaration), create a message where the
  *  subject, body, and attachment name are populated using the encodings in
  *  the phrase's "encodings" attribute, one encoding per message.  Make sure
  *  that the strings as exposed by the gloda representation are equal to the
  *  expected/actual value.
  */
@@ -46,16 +48,17 @@ function test_index(aPhrase) {
       attachments: [
         {filename: quoted, body: "gabba gabba hey"},
       ],
       // save off the actual value for checking
       callerData: [charset, aPhrase.actual]
     });
 
     messages.push(smsg);
+    resultList.push(smsg);
   }
 
   indexMessages(messages, verify_index, next_test);
 }
 
 /**
  * Does the per-message verification for test_index.  Knows what is right for
  *  each message because of the callerData attribute on the synthetic message.
@@ -71,19 +74,29 @@ function verify_index(smsg, gmsg) {
   LOG.debug("body: " + indexedBodyText +
       " (len: " + indexedBodyText.length + ")");
   do_check_eq(actual, indexedBodyText);
   LOG.debug("attachment name:" + attachmentName +
       " (len: " + attachmentName.length + ")");
   do_check_eq(actual, attachmentName);
 }
 
+function test_intl_fulltextsearch()
+{
+  var query = Gloda.newQuery(Gloda.NOUN_MESSAGE);
+  /* CJK text is bi-gram */
+  query.bodyMatches('\u81ea\u52d5');
+  queryExpect(query, resultList);
+}
+
+
 /* ===== Driver ===== */
 
 var tests = [
   parameterizeTest(test_index, intlPhrases),
+  test_intl_fulltextsearch,
 ];
 
 function run_test() {
   // use mbox injection because the fake server chokes sometimes right now
   injectMessagesUsing(INJECT_MBOX);
   glodaHelperRunTests(tests);
 }
--- a/mailnews/extensions/Makefile.in
+++ b/mailnews/extensions/Makefile.in
@@ -38,17 +38,17 @@
 DEPTH		= ../..
 topsrcdir	= @top_srcdir@
 srcdir		= @srcdir@
 VPATH		= @srcdir@
 
 include $(DEPTH)/config/autoconf.mk
 
 # these extensions are not optional
-PARALLEL_DIRS	= mdn mailviews bayesian-spam-filter offline-startup newsblog
+PARALLEL_DIRS	= mdn mailviews bayesian-spam-filter offline-startup newsblog fts3
 
 ifdef MOZ_PSM
 BUILD_SMIME=1
 endif
 
 ifdef BUILD_SMIME
 PARALLEL_DIRS	+= smime
 endif
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/Makefile.in
@@ -0,0 +1,55 @@
+#
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Mozilla Japan.
+# Portions created by the Initial Developer are Copyright (C) 2009
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Makoto Kato <m_kato@ga2.so-net.ne.jp>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either of the GNU General Public License Version 2 or later (the "GPL"),
+# or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+DEPTH		= ../../..
+topsrcdir	= @top_srcdir@
+
+srcdir		= @srcdir@
+VPATH		= @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MODULE		= fts3tok
+
+PARALLEL_DIRS	= public src
+
+ifndef MOZ_STATIC_MAIL_BUILD
+DIRS	= build
+endif
+
+include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/build/Makefile.in
@@ -0,0 +1,92 @@
+#
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Mozilla Japan.
+# Portions created by the Initial Developer are Copyright (C) 2009
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Makoto Kato <m_kato@ga2.so-net.ne.jp>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either of the GNU General Public License Version 2 or later (the "GPL"),
+# or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+DEPTH		= ../../../..
+topsrcdir	= @top_srcdir@
+srcdir		= @srcdir@
+VPATH		= @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MODULE		= fts3tok
+LIBRARY_NAME	= fts3tok
+META_COMPONENT = mail
+EXPORT_LIBRARY = 1
+SHORT_LIBNAME	= fts3tok
+IS_COMPONENT	= 1
+MODULE_NAME	= nsFts3TokenizerModule
+ifndef MOZ_INCOMPLETE_EXTERNAL_LINKAGE
+MOZILLA_INTERNAL_API = 1
+endif
+
+REQUIRES	= xpcom \
+		  string \
+		  sqlite3 \
+		  storage \
+		  $(NULL)
+
+ifeq ($(USE_SHORT_LIBNAME),1)
+EXTRA_DSO_LIBS = msgbsutl
+else
+EXTRA_DSO_LIBS = msgbaseutil
+endif
+
+CPPSRCS		= nsFts3TokenizerFactory.cpp
+
+LOCAL_INCLUDES += -I$(srcdir)/../src
+
+SHARED_LIBRARY_LIBS = \
+		../src/$(LIB_PREFIX)fts3tok_s.$(LIB_SUFFIX) \
+		$(NULL)
+
+ifdef MOZILLA_INTERNAL_API
+EXTRA_DSO_LDOPTS = \
+		$(LIBS_DIR) \
+		$(EXTRA_DSO_LIBS) \
+		$(MOZ_COMPONENT_LIBS) \
+		$(NULL)
+else
+EXTRA_DSO_LDOPTS = \
+	$(LIBS_DIR) \
+	$(EXTRA_DSO_LIBS) \
+	$(XPCOM_GLUE_LDOPTS) \
+	$(NSPR_LIBS) \
+	$(NULL)
+endif
+
+include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/build/nsFts3TokenizerFactory.cpp
@@ -0,0 +1,61 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Japan.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Makoto Kato <m_kato@ga2.so-net.ne.jp>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "nsISupports.h"
+#include "nsCOMPtr.h"
+
+#include "nsIFactory.h"
+#include "nsIGenericFactory.h"
+#include "nsIServiceManager.h"
+#include "nsIModule.h"
+
+#include "nsFts3TokenizerCID.h"
+
+#include "nsFts3Tokenizer.h"
+
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsFts3Tokenizer)
+
+static const nsModuleComponentInfo components[] =
+{
+    {"FTS3 Tokenizer",
+     NS_FTS3TOKENIZER_CID,
+     NS_FTS3TOKENIZER_CONTRACTID,
+     nsFts3TokenizerConstructor}
+};
+
+NS_IMPL_NSGETMODULE(nsFts3TokenizerModule, components)
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/public/Makefile.in
@@ -0,0 +1,52 @@
+#
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Mozilla Japan.
+# Portions created by the Initial Developer are Copyright (C) 2009
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Makoto Kato <m_kato@ga2.so-net.ne.jp>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either of the GNU General Public License Version 2 or later (the "GPL"),
+# or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+DEPTH		= ../../../..
+topsrcdir	= @top_srcdir@
+srcdir		= @srcdir@
+VPATH		= @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MODULE		= fts3tok
+
+XPIDLSRCS	= \
+		nsIFts3Tokenizer.idl \
+		$(NULL)
+
+include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/public/nsIFts3Tokenizer.idl
@@ -0,0 +1,48 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Japan.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Makoto Kato <m_kato@ga2.so-net.ne.jp>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "nsISupports.idl"
+
+interface mozIStorageConnection;
+
+[scriptable, uuid(136c88ea-7003-4fe8-8835-333fd18e598c)]
+interface nsIFts3Tokenizer : nsISupports {
+    // register FTS3 tokenizer module for "mozporter" tokenizer
+    // mozporter is based by porter tokenizer with bi-gram tokenizer for CJK
+    void registerTokenizer(in mozIStorageConnection connection);
+};
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/src/Makefile.in
@@ -0,0 +1,74 @@
+#
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Mozilla Japan.
+# Portions created by the Initial Developer are Copyright (C) 2009
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Makoto Kato <m_kato@ga2.so-net.ne.jp>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either of the GNU General Public License Version 2 or later (the "GPL"),
+# or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+DEPTH		= ../../../..
+topsrcdir       = @top_srcdir@
+srcdir          = @srcdir@
+VPATH           = @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MODULE		= fts3tok
+LIBRARY_NAME	= fts3tok_s
+META_COMPONENT = mail
+ifndef MOZ_INCOMPLETE_EXTERNAL_LINKAGE
+MOZILLA_INTERNAL_API = 1
+endif
+
+REQUIRES	= \
+		xpcom \
+		string \
+		sqlite3 \
+		storage \
+		$(NULL)
+
+CPPSRCS		= \
+		nsFts3Tokenizer.cpp \
+		$(NULL)
+
+CSRCS		= \
+		fts3_porter.c \
+		$(NULL)
+
+LOCAL_INCLUDES = \
+		$(SQLITE_CFLAGS)
+
+# we don't want the shared lib, but we want to force the creation of a static lib.
+FORCE_STATIC_LIB = 1
+
+include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/src/README.mozilla
@@ -0,0 +1,3 @@
+fts3_porter.c code is from SQLite3.
+
+This customized tokenizer "mozporter" by Mozilla supports CJK indexing using bi-gram. So you have to use bi-gram search string if you wanto to search CJK character.
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/src/fts3_porter.c
@@ -0,0 +1,768 @@
+/*
+** 2006 September 30
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** Implementation of the full-text-search tokenizer that implements
+** a Porter stemmer.
+*/
+
+/*
+** The code in this file is only compiled if:
+**
+**     * The FTS3 module is being built as an extension
+**       (in which case SQLITE_CORE is not defined), or
+**
+**     * The FTS3 module is being built into the core of
+**       SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
+*/
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "fts3_tokenizer.h"
+
+/* need some defined to compile without sqlite3 code */
+
+#define sqlite3_malloc malloc
+#define sqlite3_free free
+#define sqlite3_realloc realloc
+
+static const unsigned char sqlite3Utf8Trans1[] = {
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
+};
+
+#define READ_UTF8(zIn, zTerm, c)                           \
+  c = *(zIn++);                                            \
+  if( c>=0xc0 ){                                           \
+    c = sqlite3Utf8Trans1[c-0xc0];                         \
+    while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){            \
+      c = (c<<6) + (0x3f & *(zIn++));                      \
+    }                                                      \
+    if( c<0x80                                             \
+        || (c&0xFFFFF800)==0xD800                          \
+        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }        \
+  }
+
+/* end of compatible block to complie codes */
+
+/*
+** Class derived from sqlite3_tokenizer
+*/
+typedef struct porter_tokenizer {
+  sqlite3_tokenizer base;      /* Base class */
+} porter_tokenizer;
+
+/*
+** Class derived from sqlit3_tokenizer_cursor
+*/
+typedef struct porter_tokenizer_cursor {
+  sqlite3_tokenizer_cursor base;
+  const char *zInput;          /* input we are tokenizing */
+  int nInput;                  /* size of the input */
+  int iOffset;                 /* current position in zInput */
+  int iToken;                  /* index of next token to be returned */
+  char *zToken;                /* storage for current token */
+  int nAllocated;              /* space allocated to zToken buffer */
+  int iPrevBigramOffset;       /* previous result was bi-gram */
+} porter_tokenizer_cursor;
+
+
+/* Forward declaration */
+static const sqlite3_tokenizer_module porterTokenizerModule;
+
+
+/*
+** Create a new tokenizer instance.
+*/
+static int porterCreate(
+  int argc, const char * const *argv,
+  sqlite3_tokenizer **ppTokenizer
+){
+  porter_tokenizer *t;
+  t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
+  if( t==NULL ) return SQLITE_NOMEM;
+  memset(t, 0, sizeof(*t));
+  *ppTokenizer = &t->base;
+  return SQLITE_OK;
+}
+
+/*
+** Destroy a tokenizer
+*/
+static int porterDestroy(sqlite3_tokenizer *pTokenizer){
+  sqlite3_free(pTokenizer);
+  return SQLITE_OK;
+}
+
+/*
+** Prepare to begin tokenizing a particular string.  The input
+** string to be tokenized is zInput[0..nInput-1].  A cursor
+** used to incrementally tokenize this string is returned in 
+** *ppCursor.
+*/
+static int porterOpen(
+  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
+  const char *zInput, int nInput,        /* String to be tokenized */
+  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
+){
+  porter_tokenizer_cursor *c;
+
+  c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
+  if( c==NULL ) return SQLITE_NOMEM;
+
+  c->zInput = zInput;
+  if( zInput==0 ){
+    c->nInput = 0;
+  }else if( nInput<0 ){
+    c->nInput = (int)strlen(zInput);
+  }else{
+    c->nInput = nInput;
+  }
+  c->iOffset = 0;                 /* start tokenizing at the beginning */
+  c->iToken = 0;
+  c->zToken = NULL;               /* no space allocated, yet. */
+  c->nAllocated = 0;
+  c->iPrevBigramOffset = 0;
+
+  *ppCursor = &c->base;
+  return SQLITE_OK;
+}
+
+/*
+** Close a tokenization cursor previously opened by a call to
+** porterOpen() above.
+*/
+static int porterClose(sqlite3_tokenizer_cursor *pCursor){
+  porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
+  sqlite3_free(c->zToken);
+  sqlite3_free(c);
+  return SQLITE_OK;
+}
+/*
+** Vowel or consonant
+*/
+static const char cType[] = {
+   0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
+   1, 1, 1, 2, 1
+};
+
+/*
+** isConsonant() and isVowel() determine if their first character in
+** the string they point to is a consonant or a vowel, according
+** to Porter ruls.  
+**
+** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
+** 'Y' is a consonant unless it follows another consonant,
+** in which case it is a vowel.
+**
+** In these routine, the letters are in reverse order.  So the 'y' rule
+** is that 'y' is a consonant unless it is followed by another
+** consonent.
+*/
+static int isVowel(const char*);
+static int isConsonant(const char *z){
+  int j;
+  char x = *z;
+  if( x==0 ) return 0;
+  assert( x>='a' && x<='z' );
+  j = cType[x-'a'];
+  if( j<2 ) return j;
+  return z[1]==0 || isVowel(z + 1);
+}
+static int isVowel(const char *z){
+  int j;
+  char x = *z;
+  if( x==0 ) return 0;
+  assert( x>='a' && x<='z' );
+  j = cType[x-'a'];
+  if( j<2 ) return 1-j;
+  return isConsonant(z + 1);
+}
+
+/*
+** Let any sequence of one or more vowels be represented by V and let
+** C be sequence of one or more consonants.  Then every word can be
+** represented as:
+**
+**           [C] (VC){m} [V]
+**
+** In prose:  A word is an optional consonant followed by zero or
+** vowel-consonant pairs followed by an optional vowel.  "m" is the
+** number of vowel consonant pairs.  This routine computes the value
+** of m for the first i bytes of a word.
+**
+** Return true if the m-value for z is 1 or more.  In other words,
+** return true if z contains at least one vowel that is followed
+** by a consonant.
+**
+** In this routine z[] is in reverse order.  So we are really looking
+** for an instance of of a consonant followed by a vowel.
+*/
+static int m_gt_0(const char *z){
+  while( isVowel(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isConsonant(z) ){ z++; }
+  return *z!=0;
+}
+
+/* Like mgt0 above except we are looking for a value of m which is
+** exactly 1
+*/
+static int m_eq_1(const char *z){
+  while( isVowel(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isConsonant(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isVowel(z) ){ z++; }
+  if( *z==0 ) return 1;
+  while( isConsonant(z) ){ z++; }
+  return *z==0;
+}
+
+/* Like mgt0 above except we are looking for a value of m>1 instead
+** or m>0
+*/
+static int m_gt_1(const char *z){
+  while( isVowel(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isConsonant(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isVowel(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isConsonant(z) ){ z++; }
+  return *z!=0;
+}
+
+/*
+** Return TRUE if there is a vowel anywhere within z[0..n-1]
+*/
+static int hasVowel(const char *z){
+  while( isConsonant(z) ){ z++; }
+  return *z!=0;
+}
+
+/*
+** Return TRUE if the word ends in a double consonant.
+**
+** The text is reversed here. So we are really looking at
+** the first two characters of z[].
+*/
+static int doubleConsonant(const char *z){
+  return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
+}
+
+/*
+** Return TRUE if the word ends with three letters which
+** are consonant-vowel-consonent and where the final consonant
+** is not 'w', 'x', or 'y'.
+**
+** The word is reversed here.  So we are really checking the
+** first three letters and the first one cannot be in [wxy].
+*/
+static int star_oh(const char *z){
+  return
+    z[0]!=0 && isConsonant(z) &&
+    z[0]!='w' && z[0]!='x' && z[0]!='y' &&
+    z[1]!=0 && isVowel(z+1) &&
+    z[2]!=0 && isConsonant(z+2);
+}
+
+/*
+** If the word ends with zFrom and xCond() is true for the stem
+** of the word that preceeds the zFrom ending, then change the 
+** ending to zTo.
+**
+** The input word *pz and zFrom are both in reverse order.  zTo
+** is in normal order. 
+**
+** Return TRUE if zFrom matches.  Return FALSE if zFrom does not
+** match.  Not that TRUE is returned even if xCond() fails and
+** no substitution occurs.
+*/
+static int stem(
+  char **pz,             /* The word being stemmed (Reversed) */
+  const char *zFrom,     /* If the ending matches this... (Reversed) */
+  const char *zTo,       /* ... change the ending to this (not reversed) */
+  int (*xCond)(const char*)   /* Condition that must be true */
+){
+  char *z = *pz;
+  while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
+  if( *zFrom!=0 ) return 0;
+  if( xCond && !xCond(z) ) return 1;
+  while( *zTo ){
+    *(--z) = *(zTo++);
+  }
+  *pz = z;
+  return 1;
+}
+
+/*
+** This is the fallback stemmer used when the porter stemmer is
+** inappropriate.  The input word is copied into the output with
+** US-ASCII case folding.  If the input word is too long (more
+** than 20 bytes if it contains no digits or more than 6 bytes if
+** it contains digits) then word is truncated to 20 or 6 bytes
+** by taking 10 or 3 bytes from the beginning and end.
+*/
+static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
+  int i, mx, j;
+  int hasDigit = 0;
+  for(i=0; i<nIn; i++){
+    int c = zIn[i];
+    if( c>='A' && c<='Z' ){
+      zOut[i] = c - 'A' + 'a';
+    }else{
+      if( c>='0' && c<='9' ) hasDigit = 1;
+      zOut[i] = c;
+    }
+  }
+  mx = hasDigit ? 3 : 10;
+  if( nIn>mx*2 ){
+    for(j=mx, i=nIn-mx; i<nIn; i++, j++){
+      zOut[j] = zOut[i];
+    }
+    i = j;
+  }
+  zOut[i] = 0;
+  *pnOut = i;
+}
+
+
+/*
+** Stem the input word zIn[0..nIn-1].  Store the output in zOut.
+** zOut is at least big enough to hold nIn bytes.  Write the actual
+** size of the output word (exclusive of the '\0' terminator) into *pnOut.
+**
+** Any upper-case characters in the US-ASCII character set ([A-Z])
+** are converted to lower case.  Upper-case UTF characters are
+** unchanged.
+**
+** Words that are longer than about 20 bytes are stemmed by retaining
+** a few bytes from the beginning and the end of the word.  If the
+** word contains digits, 3 bytes are taken from the beginning and
+** 3 bytes from the end.  For long words without digits, 10 bytes
+** are taken from each end.  US-ASCII case folding still applies.
+** 
+** If the input word contains not digits but does characters not 
+** in [a-zA-Z] then no stemming is attempted and this routine just 
+** copies the input into the input into the output with US-ASCII
+** case folding.
+**
+** Stemming never increases the length of the word.  So there is
+** no chance of overflowing the zOut buffer.
+*/
+static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
+  int i, j, c;
+  char zReverse[28];
+  char *z, *z2;
+  if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
+    /* The word is too big or too small for the porter stemmer.
+    ** Fallback to the copy stemmer */
+    copy_stemmer(zIn, nIn, zOut, pnOut);
+    return;
+  }
+  for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
+    c = zIn[i];
+    if( c>='A' && c<='Z' ){
+      zReverse[j] = c + 'a' - 'A';
+    }else if( c>='a' && c<='z' ){
+      zReverse[j] = c;
+    }else{
+      /* The use of a character not in [a-zA-Z] means that we fallback
+      ** to the copy stemmer */
+      copy_stemmer(zIn, nIn, zOut, pnOut);
+      return;
+    }
+  }
+  memset(&zReverse[sizeof(zReverse)-5], 0, 5);
+  z = &zReverse[j+1];
+
+
+  /* Step 1a */
+  if( z[0]=='s' ){
+    if(
+     !stem(&z, "sess", "ss", 0) &&
+     !stem(&z, "sei", "i", 0)  &&
+     !stem(&z, "ss", "ss", 0)
+    ){
+      z++;
+    }
+  }
+
+  /* Step 1b */  
+  z2 = z;
+  if( stem(&z, "dee", "ee", m_gt_0) ){
+    /* Do nothing.  The work was all in the test */
+  }else if( 
+     (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
+      && z!=z2
+  ){
+     if( stem(&z, "ta", "ate", 0) ||
+         stem(&z, "lb", "ble", 0) ||
+         stem(&z, "zi", "ize", 0) ){
+       /* Do nothing.  The work was all in the test */
+     }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
+       z++;
+     }else if( m_eq_1(z) && star_oh(z) ){
+       *(--z) = 'e';
+     }
+  }
+
+  /* Step 1c */
+  if( z[0]=='y' && hasVowel(z+1) ){
+    z[0] = 'i';
+  }
+
+  /* Step 2 */
+  switch( z[1] ){
+   case 'a':
+     stem(&z, "lanoita", "ate", m_gt_0) ||
+     stem(&z, "lanoit", "tion", m_gt_0);
+     break;
+   case 'c':
+     stem(&z, "icne", "ence", m_gt_0) ||
+     stem(&z, "icna", "ance", m_gt_0);
+     break;
+   case 'e':
+     stem(&z, "rezi", "ize", m_gt_0);
+     break;
+   case 'g':
+     stem(&z, "igol", "log", m_gt_0);
+     break;
+   case 'l':
+     stem(&z, "ilb", "ble", m_gt_0) ||
+     stem(&z, "illa", "al", m_gt_0) ||
+     stem(&z, "iltne", "ent", m_gt_0) ||
+     stem(&z, "ile", "e", m_gt_0) ||
+     stem(&z, "ilsuo", "ous", m_gt_0);
+     break;
+   case 'o':
+     stem(&z, "noitazi", "ize", m_gt_0) ||
+     stem(&z, "noita", "ate", m_gt_0) ||
+     stem(&z, "rota", "ate", m_gt_0);
+     break;
+   case 's':
+     stem(&z, "msila", "al", m_gt_0) ||
+     stem(&z, "ssenevi", "ive", m_gt_0) ||
+     stem(&z, "ssenluf", "ful", m_gt_0) ||
+     stem(&z, "ssensuo", "ous", m_gt_0);
+     break;
+   case 't':
+     stem(&z, "itila", "al", m_gt_0) ||
+     stem(&z, "itivi", "ive", m_gt_0) ||
+     stem(&z, "itilib", "ble", m_gt_0);
+     break;
+  }
+
+  /* Step 3 */
+  switch( z[0] ){
+   case 'e':
+     stem(&z, "etaci", "ic", m_gt_0) ||
+     stem(&z, "evita", "", m_gt_0)   ||
+     stem(&z, "ezila", "al", m_gt_0);
+     break;
+   case 'i':
+     stem(&z, "itici", "ic", m_gt_0);
+     break;
+   case 'l':
+     stem(&z, "laci", "ic", m_gt_0) ||
+     stem(&z, "luf", "", m_gt_0);
+     break;
+   case 's':
+     stem(&z, "ssen", "", m_gt_0);
+     break;
+  }
+
+  /* Step 4 */
+  switch( z[1] ){
+   case 'a':
+     if( z[0]=='l' && m_gt_1(z+2) ){
+       z += 2;
+     }
+     break;
+   case 'c':
+     if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e')  && m_gt_1(z+4)  ){
+       z += 4;
+     }
+     break;
+   case 'e':
+     if( z[0]=='r' && m_gt_1(z+2) ){
+       z += 2;
+     }
+     break;
+   case 'i':
+     if( z[0]=='c' && m_gt_1(z+2) ){
+       z += 2;
+     }
+     break;
+   case 'l':
+     if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
+       z += 4;
+     }
+     break;
+   case 'n':
+     if( z[0]=='t' ){
+       if( z[2]=='a' ){
+         if( m_gt_1(z+3) ){
+           z += 3;
+         }
+       }else if( z[2]=='e' ){
+         stem(&z, "tneme", "", m_gt_1) ||
+         stem(&z, "tnem", "", m_gt_1) ||
+         stem(&z, "tne", "", m_gt_1);
+       }
+     }
+     break;
+   case 'o':
+     if( z[0]=='u' ){
+       if( m_gt_1(z+2) ){
+         z += 2;
+       }
+     }else if( z[3]=='s' || z[3]=='t' ){
+       stem(&z, "noi", "", m_gt_1);
+     }
+     break;
+   case 's':
+     if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
+       z += 3;
+     }
+     break;
+   case 't':
+     stem(&z, "eta", "", m_gt_1) ||
+     stem(&z, "iti", "", m_gt_1);
+     break;
+   case 'u':
+     if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
+       z += 3;
+     }
+     break;
+   case 'v':
+   case 'z':
+     if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
+       z += 3;
+     }
+     break;
+  }
+
+  /* Step 5a */
+  if( z[0]=='e' ){
+    if( m_gt_1(z+1) ){
+      z++;
+    }else if( m_eq_1(z+1) && !star_oh(z+1) ){
+      z++;
+    }
+  }
+
+  /* Step 5b */
+  if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
+    z++;
+  }
+
+  /* z[] is now the stemmed word in reverse order.  Flip it back
+  ** around into forward order and return.
+  */
+  *pnOut = i = strlen(z);
+  zOut[i] = 0;
+  while( *z ){
+    zOut[--i] = *(z++);
+  }
+}
+
+/*
+** Characters that can be part of a token.  We assume any character
+** whose value is greater than 0x80 (any UTF character) can be
+** part of a token.  In other words, delimiters all must have
+** values of 0x7f or lower.
+*/
+static const char porterIdChar[] = {
+/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
+};
+
+#define IS_SPACE(x)    (((x)>=0x2000&&(x)<=0x200a) || (x)==0x205f)
+#define IS_JA_DELIM(x) (((x)==0x3001)||((x)==0xFF64)||((x)==0xFF0E)||((x)==0x3002)||((x)==0xFF61)||((x)==0xFF0C))
+
+#define BIGRAM_RESET   0
+#define BIGRAM_UNKNOWN 1
+#define BIGRAM_USE     2
+#define BIGRAM_ASCII   3
+
+static int isDelim(
+  const unsigned char *zCur,    /* IN: current pointer of token */
+  const unsigned char *zTerm,   /* IN: last pointer of token */
+  int *len,                     /* OUT: analyzed bytes in this token */
+  int *state                    /* IN/OUT: analyze state */
+){
+  const unsigned char *zIn;
+  unsigned short c;
+  int delim;
+
+  /* ASCII character range has rule */
+  if( !(*zCur & 0x80) ){
+    *len = 1;
+    delim = (*zCur<0x30 || !porterIdChar[*zCur-0x30]);
+    if( *state==BIGRAM_USE || *state==BIGRAM_UNKNOWN ){
+      /* previous maybe CJK and current is ascii */
+      *state = BIGRAM_ASCII; /*ascii*/
+      delim = 1; /* must break */
+    }else if( delim==1 ){
+      /* this is delimtter character */
+      *state = BIGRAM_RESET; /*reset*/
+    }else{
+      *state = BIGRAM_ASCII; /*ascii*/
+    }
+    return delim;
+  }
+
+  /* convert to UTF-16 to analyze character */
+  zIn = zCur;
+  READ_UTF8(zIn, zTerm, c);
+  *len = zIn - zCur;
+
+  /* this isn't CJK range, so return as no delim */
+  if( c<0x2000 || (c>=0xa000 && c<0xac00) ){
+    *state = BIGRAM_RESET; /*reset*/
+    return 0;
+  }
+
+  /* this is space character or delim character */
+  if( IS_SPACE(c) || IS_JA_DELIM(c) ){
+    *state = BIGRAM_RESET; /* reset */
+    return 1;
+  }
+
+  if( *state==BIGRAM_ASCII ){
+    /* Previous is ascii and current maybe CJK */
+    *state = BIGRAM_UNKNOWN; /* mark as unknown */
+    return 1; /* must break */
+  }
+
+  /* We have no rule for CJK!. use bi-gram */
+  if( *state==BIGRAM_UNKNOWN || *state==BIGRAM_USE ){
+    /* previous state is unknown.  mark as bi-gram */
+    *state = BIGRAM_USE;
+    return 1;
+  }
+
+  *state = BIGRAM_UNKNOWN; /* mark as unknown */
+  return 0;
+}
+
+/*
+** Extract the next token from a tokenization cursor.  The cursor must
+** have been opened by a prior call to porterOpen().
+*/
+static int porterNext(
+  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by porterOpen */
+  const char **pzToken,               /* OUT: *pzToken is the token text */
+  int *pnBytes,                       /* OUT: Number of bytes in token */
+  int *piStartOffset,                 /* OUT: Starting offset of token */
+  int *piEndOffset,                   /* OUT: Ending offset of token */
+  int *piPosition                     /* OUT: Position integer of token */
+){
+  porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
+  const unsigned char *z = c->zInput;
+  int len = 0;
+  int state;
+
+  while( c->iOffset<c->nInput ){
+    int iStartOffset, ch;
+
+    if (c->iPrevBigramOffset == 0) {
+      /* Scan past delimiter characters */
+      state = BIGRAM_RESET; /* reset */
+      while( c->iOffset<c->nInput && isDelim(z + c->iOffset, z + c->nInput - 1, &len, &state)){
+        c->iOffset += len;
+      }
+
+    } else {
+      /* for bigram indexing, use previous offset */
+      c->iOffset = c->iPrevBigramOffset;
+    }
+
+    /* Count non-delimiter characters. */
+    iStartOffset = c->iOffset;
+
+    state = BIGRAM_RESET; /* state is reset */
+    while( c->iOffset<c->nInput && !isDelim(z + c->iOffset, z + c->nInput - 1, &len, &state)){
+      c->iOffset += len;
+    }
+
+    if( state==BIGRAM_USE ){
+      /* Split word by bigram */
+      c->iPrevBigramOffset = c->iOffset;
+      c->iOffset += len;
+    } else {
+      /* Reset bigram offset */
+      c->iPrevBigramOffset = 0;
+    }
+
+    if( c->iOffset>iStartOffset ){
+      int n = c->iOffset-iStartOffset;
+      if( n>c->nAllocated ){
+        c->nAllocated = n+20;
+        c->zToken = sqlite3_realloc(c->zToken, c->nAllocated);
+        if( c->zToken==NULL ) return SQLITE_NOMEM;
+      }
+      if( state==BIGRAM_USE ){
+        /* This is by bigram. So it is unnecessary to convert word */
+        copy_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
+      } else {
+        porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
+      }
+      *pzToken = c->zToken;
+      *piStartOffset = iStartOffset;
+      *piEndOffset = c->iOffset;
+      *piPosition = c->iToken++;
+      return SQLITE_OK;
+    }
+  }
+  return SQLITE_DONE;
+}
+
+/*
+** The set of routines that implement the porter-stemmer tokenizer
+*/
+static const sqlite3_tokenizer_module porterTokenizerModule = {
+  0,
+  porterCreate,
+  porterDestroy,
+  porterOpen,
+  porterClose,
+  porterNext,
+};
+
+/*
+** Allocate a new porter tokenizer.  Return a pointer to the new
+** tokenizer in *ppModule
+*/
+void sqlite3Fts3PorterTokenizerModule(
+  sqlite3_tokenizer_module const**ppModule
+){
+  *ppModule = &porterTokenizerModule;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/src/fts3_tokenizer.h
@@ -0,0 +1,148 @@
+/*
+** 2006 July 10
+**
+** The author disclaims copyright to this source code.
+**
+*************************************************************************
+** Defines the interface to tokenizers used by fulltext-search.  There
+** are three basic components:
+**
+** sqlite3_tokenizer_module is a singleton defining the tokenizer
+** interface functions.  This is essentially the class structure for
+** tokenizers.
+**
+** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
+** including customization information defined at creation time.
+**
+** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
+** tokens from a particular input.
+*/
+#ifndef _FTS3_TOKENIZER_H_
+#define _FTS3_TOKENIZER_H_
+
+/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
+** If tokenizers are to be allowed to call sqlite3_*() functions, then
+** we will need a way to register the API consistently.
+*/
+#include "sqlite3.h"
+
+/*
+** Structures used by the tokenizer interface. When a new tokenizer
+** implementation is registered, the caller provides a pointer to
+** an sqlite3_tokenizer_module containing pointers to the callback
+** functions that make up an implementation.
+**
+** When an fts3 table is created, it passes any arguments passed to
+** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
+** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
+** implementation. The xCreate() function in turn returns an 
+** sqlite3_tokenizer structure representing the specific tokenizer to
+** be used for the fts3 table (customized by the tokenizer clause arguments).
+**
+** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
+** method is called. It returns an sqlite3_tokenizer_cursor object
+** that may be used to tokenize a specific input buffer based on
+** the tokenization rules supplied by a specific sqlite3_tokenizer
+** object.
+*/
+typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
+typedef struct sqlite3_tokenizer sqlite3_tokenizer;
+typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
+
+struct sqlite3_tokenizer_module {
+
+  /*
+  ** Structure version. Should always be set to 0.
+  */
+  int iVersion;
+
+  /*
+  ** Create a new tokenizer. The values in the argv[] array are the
+  ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
+  ** TABLE statement that created the fts3 table. For example, if
+  ** the following SQL is executed:
+  **
+  **   CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
+  **
+  ** then argc is set to 2, and the argv[] array contains pointers
+  ** to the strings "arg1" and "arg2".
+  **
+  ** This method should return either SQLITE_OK (0), or an SQLite error 
+  ** code. If SQLITE_OK is returned, then *ppTokenizer should be set
+  ** to point at the newly created tokenizer structure. The generic
+  ** sqlite3_tokenizer.pModule variable should not be initialised by
+  ** this callback. The caller will do so.
+  */
+  int (*xCreate)(
+    int argc,                           /* Size of argv array */
+    const char *const*argv,             /* Tokenizer argument strings */
+    sqlite3_tokenizer **ppTokenizer     /* OUT: Created tokenizer */
+  );
+
+  /*
+  ** Destroy an existing tokenizer. The fts3 module calls this method
+  ** exactly once for each successful call to xCreate().
+  */
+  int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
+
+  /*
+  ** Create a tokenizer cursor to tokenize an input buffer. The caller
+  ** is responsible for ensuring that the input buffer remains valid
+  ** until the cursor is closed (using the xClose() method). 
+  */
+  int (*xOpen)(
+    sqlite3_tokenizer *pTokenizer,       /* Tokenizer object */
+    const char *pInput, int nBytes,      /* Input buffer */
+    sqlite3_tokenizer_cursor **ppCursor  /* OUT: Created tokenizer cursor */
+  );
+
+  /*
+  ** Destroy an existing tokenizer cursor. The fts3 module calls this 
+  ** method exactly once for each successful call to xOpen().
+  */
+  int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
+
+  /*
+  ** Retrieve the next token from the tokenizer cursor pCursor. This
+  ** method should either return SQLITE_OK and set the values of the
+  ** "OUT" variables identified below, or SQLITE_DONE to indicate that
+  ** the end of the buffer has been reached, or an SQLite error code.
+  **
+  ** *ppToken should be set to point at a buffer containing the 
+  ** normalized version of the token (i.e. after any case-folding and/or
+  ** stemming has been performed). *pnBytes should be set to the length
+  ** of this buffer in bytes. The input text that generated the token is
+  ** identified by the byte offsets returned in *piStartOffset and
+  ** *piEndOffset. *piStartOffset should be set to the index of the first
+  ** byte of the token in the input buffer. *piEndOffset should be set
+  ** to the index of the first byte just past the end of the token in
+  ** the input buffer.
+  **
+  ** The buffer *ppToken is set to point at is managed by the tokenizer
+  ** implementation. It is only required to be valid until the next call
+  ** to xNext() or xClose(). 
+  */
+  /* TODO(shess) current implementation requires pInput to be
+  ** nul-terminated.  This should either be fixed, or pInput/nBytes
+  ** should be converted to zInput.
+  */
+  int (*xNext)(
+    sqlite3_tokenizer_cursor *pCursor,   /* Tokenizer cursor */
+    const char **ppToken, int *pnBytes,  /* OUT: Normalized text for token */
+    int *piStartOffset,  /* OUT: Byte offset of token in input buffer */
+    int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */
+    int *piPosition      /* OUT: Number of tokens returned before this one */
+  );
+};
+
+struct sqlite3_tokenizer {
+  const sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
+  /* Tokenizer implementations will typically add additional fields */
+};
+
+struct sqlite3_tokenizer_cursor {
+  sqlite3_tokenizer *pTokenizer;       /* Tokenizer for this cursor. */
+  /* Tokenizer implementations will typically add additional fields */
+};
+
+#endif /* _FTS3_TOKENIZER_H_ */
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/src/nsFts3Tokenizer.cpp
@@ -0,0 +1,83 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Japan.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Makoto Kato <m_kato@ga2.so-net.ne.jp>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "nsFts3Tokenizer.h"
+
+#include "nsIFts3Tokenizer.h"
+#include "mozIStorageConnection.h"
+#include "mozIStorageStatement.h"
+#include "nsString.h"
+
+extern "C" void sqlite3Fts3PorterTokenizerModule(
+  sqlite3_tokenizer_module const**ppModule);
+
+NS_IMPL_ISUPPORTS1(nsFts3Tokenizer,nsIFts3Tokenizer)
+
+nsFts3Tokenizer::nsFts3Tokenizer()
+{
+}
+
+nsFts3Tokenizer::~nsFts3Tokenizer()
+{
+}
+
+NS_IMETHODIMP
+nsFts3Tokenizer::RegisterTokenizer(mozIStorageConnection *connection)
+{
+    nsresult rv;
+    nsCOMPtr <mozIStorageStatement> selectStatement;
+
+    rv = connection->CreateStatement(NS_LITERAL_CSTRING(
+      "SELECT fts3_tokenizer(?1, ?2)"),
+      getter_AddRefs(selectStatement));
+    NS_ENSURE_SUCCESS(rv, rv);
+
+    const sqlite3_tokenizer_module* module = nsnull;
+    sqlite3Fts3PorterTokenizerModule(&module);
+    if (!module)
+        return NS_ERROR_FAILURE;
+
+    rv = selectStatement->BindStringParameter(0, NS_LITERAL_STRING("mozporter"));
+    rv = selectStatement->BindBlobParameter(1, (PRUint8*)&module, sizeof(module));
+    NS_ENSURE_SUCCESS(rv, rv);
+
+    PRBool hasMore;
+    rv = selectStatement->ExecuteStep(&hasMore);
+
+    return rv;
+}
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/src/nsFts3Tokenizer.h
@@ -0,0 +1,59 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Japan.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Makoto Kato <m_kato@ga2.so-net.ne.jp>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef nsFts3Tokenizer_h__
+#define nsFts3Tokenizer_h__
+
+#include "nsCOMPtr.h"
+#include "nsIFts3Tokenizer.h"
+#include "fts3_tokenizer.h"
+
+extern const sqlite3_tokenizer_module* getWindowsTokenizer();
+
+class nsFts3Tokenizer : public nsIFts3Tokenizer {
+public:
+    NS_DECL_ISUPPORTS
+    NS_DECL_NSIFTS3TOKENIZER
+
+    nsFts3Tokenizer();
+
+private:
+  ~nsFts3Tokenizer();
+};
+
+#endif
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/src/nsFts3TokenizerCID.h
@@ -0,0 +1,49 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Japan.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Makoto Kato <m_kato@ga2.so-net.ne.jp>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+ 
+#ifndef nsFts3TokenizerCID_h__
+#define nsFts3TokenizerCID_h__
+
+#define NS_FTS3TOKENIZER_CONTRACTID \
+  "@mozilla.org/messenger/fts3tokenizer;1"
+#define NS_FTS3TOKENIZER_CID			\
+{ /* a67d724d-0015-4e2e-8cad-b84775330924 */	\
+  0xa67d724d, 0x0015, 0x4e2e,			\
+  { 0x8c, 0xad, 0xb8, 0x47, 0x75, 0x33, 0x09, 0x24 }}
+
+#endif /* nsFts3TokenizerCID_h__ */
--- a/mailnews/makefiles.sh
+++ b/mailnews/makefiles.sh
@@ -68,16 +68,20 @@ mailnews/db/msgdb/build/Makefile
 mailnews/db/msgdb/public/Makefile
 mailnews/db/msgdb/src/Makefile
 mailnews/db/msgdb/test/Makefile
 mailnews/extensions/Makefile
 mailnews/extensions/bayesian-spam-filter/Makefile
 mailnews/extensions/bayesian-spam-filter/build/Makefile
 mailnews/extensions/bayesian-spam-filter/src/Makefile
 mailnews/extensions/bayesian-spam-filter/test/Makefile
+mailnews/extensions/fts3/Makefile
+mailnews/extensions/fts3/build/Makefile
+mailnews/extensions/fts3/public/Makefile
+mailnews/extensions/fts3/src/Makefile
 mailnews/extensions/mailviews/Makefile
 mailnews/extensions/mailviews/build/Makefile
 mailnews/extensions/mailviews/public/Makefile
 mailnews/extensions/mailviews/content/Makefile
 mailnews/extensions/mailviews/src/Makefile
 mailnews/extensions/mdn/Makefile
 mailnews/extensions/mdn/build/Makefile
 mailnews/extensions/mdn/src/Makefile
--- a/suite/installer/unix/packages
+++ b/suite/installer/unix/packages
@@ -517,16 +517,17 @@ bin/updater
 ; component binaries
 bin/components/libimport.so
 bin/components/libmail.so
 bin/components/libmsgsmime.so
 #endif
 
 ; component xpts
 bin/components/addrbook.xpt
+bin/components/fts3tok.xpt
 bin/components/import.xpt
 bin/components/impComm4xMail.xpt
 bin/components/mailview.xpt
 bin/components/mime.xpt
 bin/components/msgbase.xpt
 bin/components/msgcompose.xpt
 bin/components/msgdb.xpt
 bin/components/msgimap.xpt
--- a/suite/installer/windows/packages
+++ b/suite/installer/windows/packages
@@ -527,16 +527,17 @@ bin\updater.exe
 bin\components\import.dll
 bin\components\mail.dll
 bin\components\msgMapi.dll
 bin\components\msgsmime.dll
 #endif
 
 ; component xpts
 bin\components\addrbook.xpt
+bin\components\fts3tok.xpt
 bin\components\import.xpt
 bin\components\impComm4xMail.xpt
 bin\components\mailview.xpt
 bin\components\mapihook.xpt
 bin\components\mime.xpt
 bin\components\msgbase.xpt
 bin\components\msgcompo.xpt
 bin\components\msgdb.xpt