--- a/mail/installer/windows/packages-static
+++ b/mail/installer/windows/packages-static
@@ -77,16 +77,17 @@ bin\defaults\profile\localstore.rdf
bin\defaults\profile\prefs.js
bin\defaults\profile\mimeTypes.rdf
bin\isp\*
bin\components\aboutRights.js
bin\components\activity.xpt
bin\components\addrbook.xpt
+bin\components\fts3tok.xpt
bin\components\mime.xpt
bin\components\steel.xpt
bin\components\msgbase.xpt
bin\components\msgcompo.xpt
bin\components\msgdb.xpt
bin\components\msgimap.xpt
bin\components\msglocal.xpt
bin\components\msgnews.xpt
--- a/mailnews/build/Makefile.in
+++ b/mailnews/build/Makefile.in
@@ -64,16 +64,17 @@ MODULE_NAME = nsMailModule
REQUIRES = xpcom \
addrbook \
appcomps \
toolkitcomps \
content \
dom \
docshell \
editor \
+ fts3tok \
gfx \
intl \
locale \
layout \
lwbrk \
mailview \
msglocal \
msgcompose \
@@ -84,16 +85,17 @@ REQUIRES = xpcom \
mime \
mork \
necko \
nkcache \
pref \
rdf \
rdfutil \
string \
+ sqlite3 \
txmgr \
widget \
webbrwsr \
uconv \
uriloader \
$(ZLIB_REQUIRES) \
$(NULL)
@@ -122,16 +124,17 @@ SHARED_LIBRARY_LIBS = \
../compose/src/$(LIB_PREFIX)msgcompose_s.$(LIB_SUFFIX) \
../db/msgdb/src/$(LIB_PREFIX)msgdb_s.$(LIB_SUFFIX) \
../imap/src/$(LIB_PREFIX)msgimap_s.$(LIB_SUFFIX) \
../addrbook/src/$(LIB_PREFIX)addrbook_s.$(LIB_SUFFIX) \
../news/src/$(LIB_PREFIX)msgnews_s.$(LIB_SUFFIX) \
../mime/src/$(LIB_PREFIX)mime_s.$(LIB_SUFFIX) \
../mime/emitters/src/$(LIB_PREFIX)emitterutil_s.$(LIB_SUFFIX) \
../extensions/bayesian-spam-filter/src/$(LIB_PREFIX)bayesflt_s.$(LIB_SUFFIX) \
+ ../extensions/fts3/src/$(LIB_PREFIX)fts3tok_s.$(LIB_SUFFIX) \
../extensions/mailviews/src/$(LIB_PREFIX)mailview_s.$(LIB_SUFFIX) \
../extensions/mdn/src/$(LIB_PREFIX)msgmdn_s.$(LIB_SUFFIX) \
../mime/cthandlers/vcard/$(LIB_PREFIX)vcard_s.$(LIB_SUFFIX) \
../mime/cthandlers/glue/$(LIB_PREFIX)mimecthglue_s.$(LIB_SUFFIX) \
$(NULL)
ifdef MOZILLA_INTERNAL_API
EXTRA_DSO_LDOPTS = \
@@ -167,16 +170,17 @@ LOCAL_INCLUDES = -I$(srcdir) \
-I$(srcdir)/../addrbook/src \
-I$(srcdir)/../imap/src \
-I$(srcdir)/../compose/src \
-I$(srcdir)/../base/search/src \
-I$(srcdir)/../mime/src \
-I$(srcdir)/../mime/emitters/src \
-I$(srcdir)/../news/src \
-I$(srcdir)/../extensions/bayesian-spam-filter/src \
+ -I$(srcdir)/../extensions/fts3/src \
-I$(srcdir)/../extensions/mailviews/src \
-I$(srcdir)/../extensions/mdn/src \
$(NULL)
include $(topsrcdir)/config/rules.mk
ifeq ($(OS_ARCH),WINNT)
OS_LIBS += $(call EXPAND_LIBNAME,shell32)
--- a/mailnews/build/nsMailModule.cpp
+++ b/mailnews/build/nsMailModule.cpp
@@ -311,16 +311,22 @@
#include "nsMsgMdnCID.h"
#include "nsMsgMdnGenerator.h"
///////////////////////////////////////////////////////////////////////////////
// vcard includes
///////////////////////////////////////////////////////////////////////////////
#include "nsMimeContentTypeHandler.h"
+///////////////////////////////////////////////////////////////////////////////
+// FTS3 Tokenizer
+///////////////////////////////////////////////////////////////////////////////
+#include "nsFts3TokenizerCID.h"
+#include "nsFts3Tokenizer.h"
+
////////////////////////////////////////////////////////////////////////////////
// mailnews base factories
////////////////////////////////////////////////////////////////////////////////
NS_GENERIC_FACTORY_CONSTRUCTOR(nsMessengerBootstrap)
NS_GENERIC_FACTORY_CONSTRUCTOR_INIT(nsMsgMailSession, Init)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsMessenger)
NS_GENERIC_FACTORY_CONSTRUCTOR_INIT(nsMsgAccountManager, Init)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsMsgAccount)
@@ -514,16 +520,20 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsMimeHea
////////////////////////////////////////////////////////////////////////////////
// mime emitter factories
////////////////////////////////////////////////////////////////////////////////
NS_GENERIC_FACTORY_CONSTRUCTOR(nsMimeRawEmitter)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsMimeXmlEmitter)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsMimePlainEmitter)
NS_GENERIC_FACTORY_CONSTRUCTOR_INIT(nsMimeHtmlDisplayEmitter, Init)
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsFts3Tokenizer)
+
static NS_METHOD RegisterMimeEmitter(nsIComponentManager *aCompMgr, nsIFile *aPath, const char *registryLocation,
const char *componentType, const nsModuleComponentInfo *info)
{
nsresult rv;
nsCOMPtr<nsICategoryManager> catman = do_GetService(NS_CATEGORYMANAGER_CONTRACTID, &rv);
if (NS_FAILED(rv)) return rv;
nsCString previous;
return catman->AddCategoryEntry("mime-emitter", info->mContractID, info->mContractID,
@@ -1265,16 +1275,22 @@ static const nsModuleComponentInfo gComp
NS_MSGMDNGENERATOR_CONTRACTID, nsMsgMdnGeneratorConstructor },
////////////////////////////////////////////////////////////////////////////////
// mdn components
////////////////////////////////////////////////////////////////////////////////
{ "MIME VCard Handler", NS_VCARD_CONTENT_TYPE_HANDLER_CID, "@mozilla.org/mimecth;1?type=text/x-vcard",
nsVCardMimeContentTypeHandlerConstructor, },
+ ////////////////////////////////////////////////////////////////////////////////
+ // FTS3 tokenizer components
+ ////////////////////////////////////////////////////////////////////////////////
+ { "FTS3 Tokenier", NS_FTS3TOKENIZER_CID,
+ NS_FTS3TOKENIZER_CONTRACTID, nsFts3TokenizerConstructor },
+
#ifdef MOZ_SUITE
////////////////////////////////////////////////////////////////////////////////
// suite general startup
////////////////////////////////////////////////////////////////////////////////
{ "Address Book Manager Startup Handler", NS_ABMANAGER_CID,
NS_ABMANAGERSTARTUPHANDLER_CONTRACTID, nsAbManagerConstructor },
{ "Compose Service", NS_MSGCOMPOSESERVICE_CID,
NS_MSGCOMPOSESTARTUPHANDLER_CONTRACTID, nsMsgComposeServiceConstructor },
--- a/mailnews/db/gloda/modules/datastore.js
+++ b/mailnews/db/gloda/modules/datastore.js
@@ -503,17 +503,17 @@ var GlodaDatastore = {
kConstraintIn: 1,
kConstraintRanges: 2,
kConstraintEquals: 3,
kConstraintStringLike: 4,
kConstraintFulltext: 5,
/* ******************* SCHEMA ******************* */
- _schemaVersion: 13,
+ _schemaVersion: 14,
_schema: {
tables: {
// ----- Messages
folderLocations: {
columns: [
["id", "INTEGER PRIMARY KEY"],
["folderURI", "TEXT NOT NULL"],
@@ -737,16 +737,21 @@ var GlodaDatastore = {
// It does exist, but we (someday) might need to upgrade the schema
else {
// (Exceptions may be thrown if the database is corrupt)
{ // try {
dbConnection = dbService.openUnsharedDatabase(dbFile);
// see _createDB...
dbConnection.executeSimpleSQL("PRAGMA cache_size = 8192");
+ // Register custom tokenizer to index all language text
+ var tokenizer = Cc["@mozilla.org/messenger/fts3tokenizer;1"].
+ getService(Ci.nsIFts3Tokenizer);
+ tokenizer.registerTokenizer(dbConnection);
+
if (dbConnection.schemaVersion != this._schemaVersion) {
this._log.debug("Need to migrate database. (DB version: " +
dbConnection.schemaVersion + " desired version: " +
this._schemaVersion);
dbConnection = this._migrate(dbService, dbFile,
dbConnection,
dbConnection.schemaVersion,
this._schemaVersion);
@@ -858,16 +863,20 @@ var GlodaDatastore = {
// windows). Increasing the page size to 4096 increases the actual byte
// turnover significantly for rollback journals than a page size of 1024,
// and since the rollback journal has to be fsynced, that is undesirable.
dbConnection.executeSimpleSQL("PRAGMA page_size = 1024");
// This is a maximum number of pages to be used. If the database does not
// get this large, then the memory does not get used.
// Do not forget to update the code in _init if you change this value.
dbConnection.executeSimpleSQL("PRAGMA cache_size = 8192");
+ // Register custom tokenizer to index all language text
+ var tokenizer = Cc["@mozilla.org/messenger/fts3tokenizer;1"].
+ getService(Ci.nsIFts3Tokenizer);
+ tokenizer.registerTokenizer(dbConnection);
dbConnection.beginTransaction();
try {
this._createSchema(dbConnection);
dbConnection.commitTransaction();
}
catch(ex) {
dbConnection.rollbackTransaction();
@@ -883,17 +892,17 @@ var GlodaDatastore = {
this._log.info("Creating table: " + aTableName);
aDBConnection.createTable(aTableName,
[(coldef[0] + " " + coldef[1]) for each
([i, coldef] in Iterator(aTableDef.columns))].join(", "));
// - Create the fulltext table if applicable
if (aTableDef.fulltextColumns) {
let createFulltextSQL = "CREATE VIRTUAL TABLE " + aTableName + "Text" +
- " USING fts3(tokenize porter, " +
+ " USING fts3(tokenize mozporter, " +
[(coldef[0] + " " + coldef[1]) for each
([i, coldef] in Iterator(aTableDef.fulltextColumns))].join(", ") +
")";
this._log.info("Creating fulltext table: " + createFulltextSQL);
aDBConnection.executeSimpleSQL(createFulltextSQL);
}
// - Create its indices
@@ -989,17 +998,19 @@ var GlodaDatastore = {
// - note that I screwed up and failed to mark the schema change; apparently
// no database will claim to be version 13...
// version 14:
// - new attributes: forwarded, repliedTo, bcc, recipients
// - altered fromMeTo and fromMeCc to fromMe
// - altered toMe and ccMe to just be toMe
// - exposes bcc to cc-related attributes
// - MIME type DB schema overhaul
- if (aCurVersion < 14) {
+ // version 15:
+ // - change tokenizer to mozporter to support CJK
+ if (aCurVersion < 15) {
aDBConnection.close();
aDBFile.remove(false);
this._log.warn("Global database has been purged due to schema change.");
return this._createDB(aDBService, aDBFile);
}
aDBConnection.schemaVersion = aNewVersion;
--- a/mailnews/db/gloda/modules/msg_search.js
+++ b/mailnews/db/gloda/modules/msg_search.js
@@ -249,21 +249,51 @@ GlodaMsgSearcher.prototype = {
explicitSQL: FULLTEXT_QUERY_EXPLICIT_SQL,
// osets is 0-based column number 14 (volatile to column changes)
// dascore becomes 0-based column number 15
outerWrapColumns: [DASCORE_SQL_SNIPPET + " AS dascore"],
// save the offset column for extra analysis
stashColumns: [14]
});
+ // CJK character is indexed by bi-gram, so we need split it if CJK
+ let querywords = new Array();
+ this.fulltextTerms.forEach(function (term) {
+ let lastpos = 0;
+ let code;
+ for (var i = 1; i < term.length - 1; i++) {
+ code = term.charCodeAt(i);
+ // not CJK. Don't use bi-gram
+ if (code < 0x2000 || (code >= 0xa000 && c < 0xac00))
+ continue;
+
+ // bi-gram search text
+ querywords.push(term.substring(lastpos, i+1));
+ lastpos = i;
+ }
+
+ if (term.length) {
+ let querylast = term.substring(lastpos);
+ if (querylast.length == 1) {
+ code = querylast.charCodeAt(0);
+ if (code >= 0x2000 && !(code >= 0xa000 && code < 0xac00))
+ // Users uses just 1 character as search string.
+ // We have to consider it for CJK (there is 1 character word in CJK)
+ querylast += "*";
+ }
+ querywords.push(querylast);
+ }
+ });
+
let fulltextQueryString;
+
if (this.andTerms)
- fulltextQueryString = '"' + this.fulltextTerms.join('" "') + '"';
+ fulltextQueryString = '"' + querywords.join('" "') + '"';
else
- fulltextQueryString = '"' + this.fulltextTerms.join('" OR "') + '"';
+ fulltextQueryString = '"' + querywords.join('" OR "') + '"';
query.fulltextMatches(fulltextQueryString);
query.orderBy(this.sortBy);
query.limit(this.retrievalLimit);
return query;
},
--- a/mailnews/db/gloda/test/unit/test_intl.js
+++ b/mailnews/db/gloda/test/unit/test_intl.js
@@ -21,16 +21,18 @@ var intlPhrases = [
'euc-jp': ['=?shift-jis?b?jqmTrppTid2LQA==?=',
'\xbc\xab\xc6\xb0\xd3\xb4\xb2\xdf\xb5\xa1'],
'shift-jis': ['=?shift-jis?b?jqmTrppTid2LQA==?=',
'\x8e\xa9\x93\xae\x9aS\x89\xdd\x8b@']
}
}
];
+var resultList = [];
+
/**
* For each phrase in the intlPhrases array (we are parameterized over it using
* parameterizeTest in the 'tests' declaration), create a message where the
* subject, body, and attachment name are populated using the encodings in
* the phrase's "encodings" attribute, one encoding per message. Make sure
* that the strings as exposed by the gloda representation are equal to the
* expected/actual value.
*/
@@ -46,16 +48,17 @@ function test_index(aPhrase) {
attachments: [
{filename: quoted, body: "gabba gabba hey"},
],
// save off the actual value for checking
callerData: [charset, aPhrase.actual]
});
messages.push(smsg);
+ resultList.push(smsg);
}
indexMessages(messages, verify_index, next_test);
}
/**
* Does the per-message verification for test_index. Knows what is right for
* each message because of the callerData attribute on the synthetic message.
@@ -71,19 +74,29 @@ function verify_index(smsg, gmsg) {
LOG.debug("body: " + indexedBodyText +
" (len: " + indexedBodyText.length + ")");
do_check_eq(actual, indexedBodyText);
LOG.debug("attachment name:" + attachmentName +
" (len: " + attachmentName.length + ")");
do_check_eq(actual, attachmentName);
}
+function test_intl_fulltextsearch()
+{
+ var query = Gloda.newQuery(Gloda.NOUN_MESSAGE);
+ /* CJK text is bi-gram */
+ query.bodyMatches('\u81ea\u52d5');
+ queryExpect(query, resultList);
+}
+
+
/* ===== Driver ===== */
var tests = [
parameterizeTest(test_index, intlPhrases),
+ test_intl_fulltextsearch,
];
function run_test() {
// use mbox injection because the fake server chokes sometimes right now
injectMessagesUsing(INJECT_MBOX);
glodaHelperRunTests(tests);
}
--- a/mailnews/extensions/Makefile.in
+++ b/mailnews/extensions/Makefile.in
@@ -38,17 +38,17 @@
DEPTH = ../..
topsrcdir = @top_srcdir@
srcdir = @srcdir@
VPATH = @srcdir@
include $(DEPTH)/config/autoconf.mk
# these extensions are not optional
-PARALLEL_DIRS = mdn mailviews bayesian-spam-filter offline-startup newsblog
+PARALLEL_DIRS = mdn mailviews bayesian-spam-filter offline-startup newsblog fts3
ifdef MOZ_PSM
BUILD_SMIME=1
endif
ifdef BUILD_SMIME
PARALLEL_DIRS += smime
endif
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/Makefile.in
@@ -0,0 +1,55 @@
+#
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Mozilla Japan.
+# Portions created by the Initial Developer are Copyright (C) 2009
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Makoto Kato <m_kato@ga2.so-net.ne.jp>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either of the GNU General Public License Version 2 or later (the "GPL"),
+# or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+DEPTH = ../../..
+topsrcdir = @top_srcdir@
+
+srcdir = @srcdir@
+VPATH = @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MODULE = fts3tok
+
+PARALLEL_DIRS = public src
+
+ifndef MOZ_STATIC_MAIL_BUILD
+DIRS = build
+endif
+
+include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/build/Makefile.in
@@ -0,0 +1,92 @@
+#
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Mozilla Japan.
+# Portions created by the Initial Developer are Copyright (C) 2009
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Makoto Kato <m_kato@ga2.so-net.ne.jp>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either of the GNU General Public License Version 2 or later (the "GPL"),
+# or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+DEPTH = ../../../..
+topsrcdir = @top_srcdir@
+srcdir = @srcdir@
+VPATH = @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MODULE = fts3tok
+LIBRARY_NAME = fts3tok
+META_COMPONENT = mail
+EXPORT_LIBRARY = 1
+SHORT_LIBNAME = fts3tok
+IS_COMPONENT = 1
+MODULE_NAME = nsFts3TokenizerModule
+ifndef MOZ_INCOMPLETE_EXTERNAL_LINKAGE
+MOZILLA_INTERNAL_API = 1
+endif
+
+REQUIRES = xpcom \
+ string \
+ sqlite3 \
+ storage \
+ $(NULL)
+
+ifeq ($(USE_SHORT_LIBNAME),1)
+EXTRA_DSO_LIBS = msgbsutl
+else
+EXTRA_DSO_LIBS = msgbaseutil
+endif
+
+CPPSRCS = nsFts3TokenizerFactory.cpp
+
+LOCAL_INCLUDES += -I$(srcdir)/../src
+
+SHARED_LIBRARY_LIBS = \
+ ../src/$(LIB_PREFIX)fts3tok_s.$(LIB_SUFFIX) \
+ $(NULL)
+
+ifdef MOZILLA_INTERNAL_API
+EXTRA_DSO_LDOPTS = \
+ $(LIBS_DIR) \
+ $(EXTRA_DSO_LIBS) \
+ $(MOZ_COMPONENT_LIBS) \
+ $(NULL)
+else
+EXTRA_DSO_LDOPTS = \
+ $(LIBS_DIR) \
+ $(EXTRA_DSO_LIBS) \
+ $(XPCOM_GLUE_LDOPTS) \
+ $(NSPR_LIBS) \
+ $(NULL)
+endif
+
+include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/build/nsFts3TokenizerFactory.cpp
@@ -0,0 +1,61 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Japan.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Makoto Kato <m_kato@ga2.so-net.ne.jp>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "nsISupports.h"
+#include "nsCOMPtr.h"
+
+#include "nsIFactory.h"
+#include "nsIGenericFactory.h"
+#include "nsIServiceManager.h"
+#include "nsIModule.h"
+
+#include "nsFts3TokenizerCID.h"
+
+#include "nsFts3Tokenizer.h"
+
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsFts3Tokenizer)
+
+static const nsModuleComponentInfo components[] =
+{
+ {"FTS3 Tokenizer",
+ NS_FTS3TOKENIZER_CID,
+ NS_FTS3TOKENIZER_CONTRACTID,
+ nsFts3TokenizerConstructor}
+};
+
+NS_IMPL_NSGETMODULE(nsFts3TokenizerModule, components)
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/public/Makefile.in
@@ -0,0 +1,52 @@
+#
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Mozilla Japan.
+# Portions created by the Initial Developer are Copyright (C) 2009
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Makoto Kato <m_kato@ga2.so-net.ne.jp>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either of the GNU General Public License Version 2 or later (the "GPL"),
+# or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+DEPTH = ../../../..
+topsrcdir = @top_srcdir@
+srcdir = @srcdir@
+VPATH = @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MODULE = fts3tok
+
+XPIDLSRCS = \
+ nsIFts3Tokenizer.idl \
+ $(NULL)
+
+include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/public/nsIFts3Tokenizer.idl
@@ -0,0 +1,48 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Japan.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Makoto Kato <m_kato@ga2.so-net.ne.jp>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "nsISupports.idl"
+
+interface mozIStorageConnection;
+
+[scriptable, uuid(136c88ea-7003-4fe8-8835-333fd18e598c)]
+interface nsIFts3Tokenizer : nsISupports {
+ // register FTS3 tokenizer module for "mozporter" tokenizer
+ // mozporter is based by porter tokenizer with bi-gram tokenizer for CJK
+ void registerTokenizer(in mozIStorageConnection connection);
+};
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/src/Makefile.in
@@ -0,0 +1,74 @@
+#
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Mozilla Japan.
+# Portions created by the Initial Developer are Copyright (C) 2009
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Makoto Kato <m_kato@ga2.so-net.ne.jp>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either of the GNU General Public License Version 2 or later (the "GPL"),
+# or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+DEPTH = ../../../..
+topsrcdir = @top_srcdir@
+srcdir = @srcdir@
+VPATH = @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MODULE = fts3tok
+LIBRARY_NAME = fts3tok_s
+META_COMPONENT = mail
+ifndef MOZ_INCOMPLETE_EXTERNAL_LINKAGE
+MOZILLA_INTERNAL_API = 1
+endif
+
+REQUIRES = \
+ xpcom \
+ string \
+ sqlite3 \
+ storage \
+ $(NULL)
+
+CPPSRCS = \
+ nsFts3Tokenizer.cpp \
+ $(NULL)
+
+CSRCS = \
+ fts3_porter.c \
+ $(NULL)
+
+LOCAL_INCLUDES = \
+ $(SQLITE_CFLAGS)
+
+# we don't want the shared lib, but we want to force the creation of a static lib.
+FORCE_STATIC_LIB = 1
+
+include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/src/README.mozilla
@@ -0,0 +1,3 @@
+fts3_porter.c code is from SQLite3.
+
+This customized tokenizer "mozporter" by Mozilla supports CJK indexing using bi-gram. So you have to use bi-gram search string if you wanto to search CJK character.
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/src/fts3_porter.c
@@ -0,0 +1,768 @@
+/*
+** 2006 September 30
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+*************************************************************************
+** Implementation of the full-text-search tokenizer that implements
+** a Porter stemmer.
+*/
+
+/*
+** The code in this file is only compiled if:
+**
+** * The FTS3 module is being built as an extension
+** (in which case SQLITE_CORE is not defined), or
+**
+** * The FTS3 module is being built into the core of
+** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
+*/
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "fts3_tokenizer.h"
+
+/* need some defined to compile without sqlite3 code */
+
+#define sqlite3_malloc malloc
+#define sqlite3_free free
+#define sqlite3_realloc realloc
+
+static const unsigned char sqlite3Utf8Trans1[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
+};
+
+#define READ_UTF8(zIn, zTerm, c) \
+ c = *(zIn++); \
+ if( c>=0xc0 ){ \
+ c = sqlite3Utf8Trans1[c-0xc0]; \
+ while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \
+ c = (c<<6) + (0x3f & *(zIn++)); \
+ } \
+ if( c<0x80 \
+ || (c&0xFFFFF800)==0xD800 \
+ || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
+ }
+
+/* end of compatible block to complie codes */
+
+/*
+** Class derived from sqlite3_tokenizer
+*/
+typedef struct porter_tokenizer {
+ sqlite3_tokenizer base; /* Base class */
+} porter_tokenizer;
+
+/*
+** Class derived from sqlit3_tokenizer_cursor
+*/
+typedef struct porter_tokenizer_cursor {
+ sqlite3_tokenizer_cursor base;
+ const char *zInput; /* input we are tokenizing */
+ int nInput; /* size of the input */
+ int iOffset; /* current position in zInput */
+ int iToken; /* index of next token to be returned */
+ char *zToken; /* storage for current token */
+ int nAllocated; /* space allocated to zToken buffer */
+ int iPrevBigramOffset; /* previous result was bi-gram */
+} porter_tokenizer_cursor;
+
+
+/* Forward declaration */
+static const sqlite3_tokenizer_module porterTokenizerModule;
+
+
+/*
+** Create a new tokenizer instance.
+*/
+static int porterCreate(
+ int argc, const char * const *argv,
+ sqlite3_tokenizer **ppTokenizer
+){
+ porter_tokenizer *t;
+ t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
+ if( t==NULL ) return SQLITE_NOMEM;
+ memset(t, 0, sizeof(*t));
+ *ppTokenizer = &t->base;
+ return SQLITE_OK;
+}
+
+/*
+** Destroy a tokenizer
+*/
+static int porterDestroy(sqlite3_tokenizer *pTokenizer){
+ sqlite3_free(pTokenizer);
+ return SQLITE_OK;
+}
+
+/*
+** Prepare to begin tokenizing a particular string. The input
+** string to be tokenized is zInput[0..nInput-1]. A cursor
+** used to incrementally tokenize this string is returned in
+** *ppCursor.
+*/
+static int porterOpen(
+ sqlite3_tokenizer *pTokenizer, /* The tokenizer */
+ const char *zInput, int nInput, /* String to be tokenized */
+ sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
+){
+ porter_tokenizer_cursor *c;
+
+ c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
+ if( c==NULL ) return SQLITE_NOMEM;
+
+ c->zInput = zInput;
+ if( zInput==0 ){
+ c->nInput = 0;
+ }else if( nInput<0 ){
+ c->nInput = (int)strlen(zInput);
+ }else{
+ c->nInput = nInput;
+ }
+ c->iOffset = 0; /* start tokenizing at the beginning */
+ c->iToken = 0;
+ c->zToken = NULL; /* no space allocated, yet. */
+ c->nAllocated = 0;
+ c->iPrevBigramOffset = 0;
+
+ *ppCursor = &c->base;
+ return SQLITE_OK;
+}
+
+/*
+** Close a tokenization cursor previously opened by a call to
+** porterOpen() above.
+*/
+static int porterClose(sqlite3_tokenizer_cursor *pCursor){
+ porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
+ sqlite3_free(c->zToken);
+ sqlite3_free(c);
+ return SQLITE_OK;
+}
+/*
+** Vowel or consonant
+*/
+static const char cType[] = {
+ 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
+ 1, 1, 1, 2, 1
+};
+
+/*
+** isConsonant() and isVowel() determine if their first character in
+** the string they point to is a consonant or a vowel, according
+** to Porter ruls.
+**
+** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
+** 'Y' is a consonant unless it follows another consonant,
+** in which case it is a vowel.
+**
+** In these routine, the letters are in reverse order. So the 'y' rule
+** is that 'y' is a consonant unless it is followed by another
+** consonent.
+*/
+static int isVowel(const char*);
+static int isConsonant(const char *z){
+ int j;
+ char x = *z;
+ if( x==0 ) return 0;
+ assert( x>='a' && x<='z' );
+ j = cType[x-'a'];
+ if( j<2 ) return j;
+ return z[1]==0 || isVowel(z + 1);
+}
+static int isVowel(const char *z){
+ int j;
+ char x = *z;
+ if( x==0 ) return 0;
+ assert( x>='a' && x<='z' );
+ j = cType[x-'a'];
+ if( j<2 ) return 1-j;
+ return isConsonant(z + 1);
+}
+
+/*
+** Let any sequence of one or more vowels be represented by V and let
+** C be sequence of one or more consonants. Then every word can be
+** represented as:
+**
+** [C] (VC){m} [V]
+**
+** In prose: A word is an optional consonant followed by zero or
+** vowel-consonant pairs followed by an optional vowel. "m" is the
+** number of vowel consonant pairs. This routine computes the value
+** of m for the first i bytes of a word.
+**
+** Return true if the m-value for z is 1 or more. In other words,
+** return true if z contains at least one vowel that is followed
+** by a consonant.
+**
+** In this routine z[] is in reverse order. So we are really looking
+** for an instance of of a consonant followed by a vowel.
+*/
+static int m_gt_0(const char *z){
+ while( isVowel(z) ){ z++; }
+ if( *z==0 ) return 0;
+ while( isConsonant(z) ){ z++; }
+ return *z!=0;
+}
+
+/* Like mgt0 above except we are looking for a value of m which is
+** exactly 1
+*/
+static int m_eq_1(const char *z){
+ while( isVowel(z) ){ z++; }
+ if( *z==0 ) return 0;
+ while( isConsonant(z) ){ z++; }
+ if( *z==0 ) return 0;
+ while( isVowel(z) ){ z++; }
+ if( *z==0 ) return 1;
+ while( isConsonant(z) ){ z++; }
+ return *z==0;
+}
+
+/* Like mgt0 above except we are looking for a value of m>1 instead
+** or m>0
+*/
+static int m_gt_1(const char *z){
+ while( isVowel(z) ){ z++; }
+ if( *z==0 ) return 0;
+ while( isConsonant(z) ){ z++; }
+ if( *z==0 ) return 0;
+ while( isVowel(z) ){ z++; }
+ if( *z==0 ) return 0;
+ while( isConsonant(z) ){ z++; }
+ return *z!=0;
+}
+
+/*
+** Return TRUE if there is a vowel anywhere within z[0..n-1]
+*/
+static int hasVowel(const char *z){
+ while( isConsonant(z) ){ z++; }
+ return *z!=0;
+}
+
+/*
+** Return TRUE if the word ends in a double consonant.
+**
+** The text is reversed here. So we are really looking at
+** the first two characters of z[].
+*/
+static int doubleConsonant(const char *z){
+ return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
+}
+
+/*
+** Return TRUE if the word ends with three letters which
+** are consonant-vowel-consonent and where the final consonant
+** is not 'w', 'x', or 'y'.
+**
+** The word is reversed here. So we are really checking the
+** first three letters and the first one cannot be in [wxy].
+*/
+static int star_oh(const char *z){
+ return
+ z[0]!=0 && isConsonant(z) &&
+ z[0]!='w' && z[0]!='x' && z[0]!='y' &&
+ z[1]!=0 && isVowel(z+1) &&
+ z[2]!=0 && isConsonant(z+2);
+}
+
+/*
+** If the word ends with zFrom and xCond() is true for the stem
+** of the word that preceeds the zFrom ending, then change the
+** ending to zTo.
+**
+** The input word *pz and zFrom are both in reverse order. zTo
+** is in normal order.
+**
+** Return TRUE if zFrom matches. Return FALSE if zFrom does not
+** match. Not that TRUE is returned even if xCond() fails and
+** no substitution occurs.
+*/
+static int stem(
+ char **pz, /* The word being stemmed (Reversed) */
+ const char *zFrom, /* If the ending matches this... (Reversed) */
+ const char *zTo, /* ... change the ending to this (not reversed) */
+ int (*xCond)(const char*) /* Condition that must be true */
+){
+ char *z = *pz;
+ while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
+ if( *zFrom!=0 ) return 0;
+ if( xCond && !xCond(z) ) return 1;
+ while( *zTo ){
+ *(--z) = *(zTo++);
+ }
+ *pz = z;
+ return 1;
+}
+
+/*
+** This is the fallback stemmer used when the porter stemmer is
+** inappropriate. The input word is copied into the output with
+** US-ASCII case folding. If the input word is too long (more
+** than 20 bytes if it contains no digits or more than 6 bytes if
+** it contains digits) then word is truncated to 20 or 6 bytes
+** by taking 10 or 3 bytes from the beginning and end.
+*/
+static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
+ int i, mx, j;
+ int hasDigit = 0;
+ for(i=0; i<nIn; i++){
+ int c = zIn[i];
+ if( c>='A' && c<='Z' ){
+ zOut[i] = c - 'A' + 'a';
+ }else{
+ if( c>='0' && c<='9' ) hasDigit = 1;
+ zOut[i] = c;
+ }
+ }
+ mx = hasDigit ? 3 : 10;
+ if( nIn>mx*2 ){
+ for(j=mx, i=nIn-mx; i<nIn; i++, j++){
+ zOut[j] = zOut[i];
+ }
+ i = j;
+ }
+ zOut[i] = 0;
+ *pnOut = i;
+}
+
+
+/*
+** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
+** zOut is at least big enough to hold nIn bytes. Write the actual
+** size of the output word (exclusive of the '\0' terminator) into *pnOut.
+**
+** Any upper-case characters in the US-ASCII character set ([A-Z])
+** are converted to lower case. Upper-case UTF characters are
+** unchanged.
+**
+** Words that are longer than about 20 bytes are stemmed by retaining
+** a few bytes from the beginning and the end of the word. If the
+** word contains digits, 3 bytes are taken from the beginning and
+** 3 bytes from the end. For long words without digits, 10 bytes
+** are taken from each end. US-ASCII case folding still applies.
+**
+** If the input word contains not digits but does characters not
+** in [a-zA-Z] then no stemming is attempted and this routine just
+** copies the input into the input into the output with US-ASCII
+** case folding.
+**
+** Stemming never increases the length of the word. So there is
+** no chance of overflowing the zOut buffer.
+*/
+static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
+ int i, j, c;
+ char zReverse[28];
+ char *z, *z2;
+ if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
+ /* The word is too big or too small for the porter stemmer.
+ ** Fallback to the copy stemmer */
+ copy_stemmer(zIn, nIn, zOut, pnOut);
+ return;
+ }
+ for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
+ c = zIn[i];
+ if( c>='A' && c<='Z' ){
+ zReverse[j] = c + 'a' - 'A';
+ }else if( c>='a' && c<='z' ){
+ zReverse[j] = c;
+ }else{
+ /* The use of a character not in [a-zA-Z] means that we fallback
+ ** to the copy stemmer */
+ copy_stemmer(zIn, nIn, zOut, pnOut);
+ return;
+ }
+ }
+ memset(&zReverse[sizeof(zReverse)-5], 0, 5);
+ z = &zReverse[j+1];
+
+
+ /* Step 1a */
+ if( z[0]=='s' ){
+ if(
+ !stem(&z, "sess", "ss", 0) &&
+ !stem(&z, "sei", "i", 0) &&
+ !stem(&z, "ss", "ss", 0)
+ ){
+ z++;
+ }
+ }
+
+ /* Step 1b */
+ z2 = z;
+ if( stem(&z, "dee", "ee", m_gt_0) ){
+ /* Do nothing. The work was all in the test */
+ }else if(
+ (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
+ && z!=z2
+ ){
+ if( stem(&z, "ta", "ate", 0) ||
+ stem(&z, "lb", "ble", 0) ||
+ stem(&z, "zi", "ize", 0) ){
+ /* Do nothing. The work was all in the test */
+ }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
+ z++;
+ }else if( m_eq_1(z) && star_oh(z) ){
+ *(--z) = 'e';
+ }
+ }
+
+ /* Step 1c */
+ if( z[0]=='y' && hasVowel(z+1) ){
+ z[0] = 'i';
+ }
+
+ /* Step 2 */
+ switch( z[1] ){
+ case 'a':
+ stem(&z, "lanoita", "ate", m_gt_0) ||
+ stem(&z, "lanoit", "tion", m_gt_0);
+ break;
+ case 'c':
+ stem(&z, "icne", "ence", m_gt_0) ||
+ stem(&z, "icna", "ance", m_gt_0);
+ break;
+ case 'e':
+ stem(&z, "rezi", "ize", m_gt_0);
+ break;
+ case 'g':
+ stem(&z, "igol", "log", m_gt_0);
+ break;
+ case 'l':
+ stem(&z, "ilb", "ble", m_gt_0) ||
+ stem(&z, "illa", "al", m_gt_0) ||
+ stem(&z, "iltne", "ent", m_gt_0) ||
+ stem(&z, "ile", "e", m_gt_0) ||
+ stem(&z, "ilsuo", "ous", m_gt_0);
+ break;
+ case 'o':
+ stem(&z, "noitazi", "ize", m_gt_0) ||
+ stem(&z, "noita", "ate", m_gt_0) ||
+ stem(&z, "rota", "ate", m_gt_0);
+ break;
+ case 's':
+ stem(&z, "msila", "al", m_gt_0) ||
+ stem(&z, "ssenevi", "ive", m_gt_0) ||
+ stem(&z, "ssenluf", "ful", m_gt_0) ||
+ stem(&z, "ssensuo", "ous", m_gt_0);
+ break;
+ case 't':
+ stem(&z, "itila", "al", m_gt_0) ||
+ stem(&z, "itivi", "ive", m_gt_0) ||
+ stem(&z, "itilib", "ble", m_gt_0);
+ break;
+ }
+
+ /* Step 3 */
+ switch( z[0] ){
+ case 'e':
+ stem(&z, "etaci", "ic", m_gt_0) ||
+ stem(&z, "evita", "", m_gt_0) ||
+ stem(&z, "ezila", "al", m_gt_0);
+ break;
+ case 'i':
+ stem(&z, "itici", "ic", m_gt_0);
+ break;
+ case 'l':
+ stem(&z, "laci", "ic", m_gt_0) ||
+ stem(&z, "luf", "", m_gt_0);
+ break;
+ case 's':
+ stem(&z, "ssen", "", m_gt_0);
+ break;
+ }
+
+ /* Step 4 */
+ switch( z[1] ){
+ case 'a':
+ if( z[0]=='l' && m_gt_1(z+2) ){
+ z += 2;
+ }
+ break;
+ case 'c':
+ if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
+ z += 4;
+ }
+ break;
+ case 'e':
+ if( z[0]=='r' && m_gt_1(z+2) ){
+ z += 2;
+ }
+ break;
+ case 'i':
+ if( z[0]=='c' && m_gt_1(z+2) ){
+ z += 2;
+ }
+ break;
+ case 'l':
+ if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
+ z += 4;
+ }
+ break;
+ case 'n':
+ if( z[0]=='t' ){
+ if( z[2]=='a' ){
+ if( m_gt_1(z+3) ){
+ z += 3;
+ }
+ }else if( z[2]=='e' ){
+ stem(&z, "tneme", "", m_gt_1) ||
+ stem(&z, "tnem", "", m_gt_1) ||
+ stem(&z, "tne", "", m_gt_1);
+ }
+ }
+ break;
+ case 'o':
+ if( z[0]=='u' ){
+ if( m_gt_1(z+2) ){
+ z += 2;
+ }
+ }else if( z[3]=='s' || z[3]=='t' ){
+ stem(&z, "noi", "", m_gt_1);
+ }
+ break;
+ case 's':
+ if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
+ z += 3;
+ }
+ break;
+ case 't':
+ stem(&z, "eta", "", m_gt_1) ||
+ stem(&z, "iti", "", m_gt_1);
+ break;
+ case 'u':
+ if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
+ z += 3;
+ }
+ break;
+ case 'v':
+ case 'z':
+ if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
+ z += 3;
+ }
+ break;
+ }
+
+ /* Step 5a */
+ if( z[0]=='e' ){
+ if( m_gt_1(z+1) ){
+ z++;
+ }else if( m_eq_1(z+1) && !star_oh(z+1) ){
+ z++;
+ }
+ }
+
+ /* Step 5b */
+ if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
+ z++;
+ }
+
+ /* z[] is now the stemmed word in reverse order. Flip it back
+ ** around into forward order and return.
+ */
+ *pnOut = i = strlen(z);
+ zOut[i] = 0;
+ while( *z ){
+ zOut[--i] = *(z++);
+ }
+}
+
+/*
+** Characters that can be part of a token. We assume any character
+** whose value is greater than 0x80 (any UTF character) can be
+** part of a token. In other words, delimiters all must have
+** values of 0x7f or lower.
+*/
+static const char porterIdChar[] = {
+/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
+};
+
+#define IS_SPACE(x) (((x)>=0x2000&&(x)<=0x200a) || (x)==0x205f)
+#define IS_JA_DELIM(x) (((x)==0x3001)||((x)==0xFF64)||((x)==0xFF0E)||((x)==0x3002)||((x)==0xFF61)||((x)==0xFF0C))
+
+#define BIGRAM_RESET 0
+#define BIGRAM_UNKNOWN 1
+#define BIGRAM_USE 2
+#define BIGRAM_ASCII 3
+
+static int isDelim(
+ const unsigned char *zCur, /* IN: current pointer of token */
+ const unsigned char *zTerm, /* IN: last pointer of token */
+ int *len, /* OUT: analyzed bytes in this token */
+ int *state /* IN/OUT: analyze state */
+){
+ const unsigned char *zIn;
+ unsigned short c;
+ int delim;
+
+ /* ASCII character range has rule */
+ if( !(*zCur & 0x80) ){
+ *len = 1;
+ delim = (*zCur<0x30 || !porterIdChar[*zCur-0x30]);
+ if( *state==BIGRAM_USE || *state==BIGRAM_UNKNOWN ){
+ /* previous maybe CJK and current is ascii */
+ *state = BIGRAM_ASCII; /*ascii*/
+ delim = 1; /* must break */
+ }else if( delim==1 ){
+ /* this is delimtter character */
+ *state = BIGRAM_RESET; /*reset*/
+ }else{
+ *state = BIGRAM_ASCII; /*ascii*/
+ }
+ return delim;
+ }
+
+ /* convert to UTF-16 to analyze character */
+ zIn = zCur;
+ READ_UTF8(zIn, zTerm, c);
+ *len = zIn - zCur;
+
+ /* this isn't CJK range, so return as no delim */
+ if( c<0x2000 || (c>=0xa000 && c<0xac00) ){
+ *state = BIGRAM_RESET; /*reset*/
+ return 0;
+ }
+
+ /* this is space character or delim character */
+ if( IS_SPACE(c) || IS_JA_DELIM(c) ){
+ *state = BIGRAM_RESET; /* reset */
+ return 1;
+ }
+
+ if( *state==BIGRAM_ASCII ){
+ /* Previous is ascii and current maybe CJK */
+ *state = BIGRAM_UNKNOWN; /* mark as unknown */
+ return 1; /* must break */
+ }
+
+ /* We have no rule for CJK!. use bi-gram */
+ if( *state==BIGRAM_UNKNOWN || *state==BIGRAM_USE ){
+ /* previous state is unknown. mark as bi-gram */
+ *state = BIGRAM_USE;
+ return 1;
+ }
+
+ *state = BIGRAM_UNKNOWN; /* mark as unknown */
+ return 0;
+}
+
+/*
+** Extract the next token from a tokenization cursor. The cursor must
+** have been opened by a prior call to porterOpen().
+*/
+static int porterNext(
+ sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
+ const char **pzToken, /* OUT: *pzToken is the token text */
+ int *pnBytes, /* OUT: Number of bytes in token */
+ int *piStartOffset, /* OUT: Starting offset of token */
+ int *piEndOffset, /* OUT: Ending offset of token */
+ int *piPosition /* OUT: Position integer of token */
+){
+ porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
+ const unsigned char *z = c->zInput;
+ int len = 0;
+ int state;
+
+ while( c->iOffset<c->nInput ){
+ int iStartOffset, ch;
+
+ if (c->iPrevBigramOffset == 0) {
+ /* Scan past delimiter characters */
+ state = BIGRAM_RESET; /* reset */
+ while( c->iOffset<c->nInput && isDelim(z + c->iOffset, z + c->nInput - 1, &len, &state)){
+ c->iOffset += len;
+ }
+
+ } else {
+ /* for bigram indexing, use previous offset */
+ c->iOffset = c->iPrevBigramOffset;
+ }
+
+ /* Count non-delimiter characters. */
+ iStartOffset = c->iOffset;
+
+ state = BIGRAM_RESET; /* state is reset */
+ while( c->iOffset<c->nInput && !isDelim(z + c->iOffset, z + c->nInput - 1, &len, &state)){
+ c->iOffset += len;
+ }
+
+ if( state==BIGRAM_USE ){
+ /* Split word by bigram */
+ c->iPrevBigramOffset = c->iOffset;
+ c->iOffset += len;
+ } else {
+ /* Reset bigram offset */
+ c->iPrevBigramOffset = 0;
+ }
+
+ if( c->iOffset>iStartOffset ){
+ int n = c->iOffset-iStartOffset;
+ if( n>c->nAllocated ){
+ c->nAllocated = n+20;
+ c->zToken = sqlite3_realloc(c->zToken, c->nAllocated);
+ if( c->zToken==NULL ) return SQLITE_NOMEM;
+ }
+ if( state==BIGRAM_USE ){
+ /* This is by bigram. So it is unnecessary to convert word */
+ copy_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
+ } else {
+ porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
+ }
+ *pzToken = c->zToken;
+ *piStartOffset = iStartOffset;
+ *piEndOffset = c->iOffset;
+ *piPosition = c->iToken++;
+ return SQLITE_OK;
+ }
+ }
+ return SQLITE_DONE;
+}
+
+/*
+** The set of routines that implement the porter-stemmer tokenizer
+*/
+static const sqlite3_tokenizer_module porterTokenizerModule = {
+ 0,
+ porterCreate,
+ porterDestroy,
+ porterOpen,
+ porterClose,
+ porterNext,
+};
+
+/*
+** Allocate a new porter tokenizer. Return a pointer to the new
+** tokenizer in *ppModule
+*/
+void sqlite3Fts3PorterTokenizerModule(
+ sqlite3_tokenizer_module const**ppModule
+){
+ *ppModule = &porterTokenizerModule;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/src/fts3_tokenizer.h
@@ -0,0 +1,148 @@
+/*
+** 2006 July 10
+**
+** The author disclaims copyright to this source code.
+**
+*************************************************************************
+** Defines the interface to tokenizers used by fulltext-search. There
+** are three basic components:
+**
+** sqlite3_tokenizer_module is a singleton defining the tokenizer
+** interface functions. This is essentially the class structure for
+** tokenizers.
+**
+** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
+** including customization information defined at creation time.
+**
+** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
+** tokens from a particular input.
+*/
+#ifndef _FTS3_TOKENIZER_H_
+#define _FTS3_TOKENIZER_H_
+
+/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
+** If tokenizers are to be allowed to call sqlite3_*() functions, then
+** we will need a way to register the API consistently.
+*/
+#include "sqlite3.h"
+
+/*
+** Structures used by the tokenizer interface. When a new tokenizer
+** implementation is registered, the caller provides a pointer to
+** an sqlite3_tokenizer_module containing pointers to the callback
+** functions that make up an implementation.
+**
+** When an fts3 table is created, it passes any arguments passed to
+** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
+** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
+** implementation. The xCreate() function in turn returns an
+** sqlite3_tokenizer structure representing the specific tokenizer to
+** be used for the fts3 table (customized by the tokenizer clause arguments).
+**
+** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
+** method is called. It returns an sqlite3_tokenizer_cursor object
+** that may be used to tokenize a specific input buffer based on
+** the tokenization rules supplied by a specific sqlite3_tokenizer
+** object.
+*/
+typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
+typedef struct sqlite3_tokenizer sqlite3_tokenizer;
+typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
+
+struct sqlite3_tokenizer_module {
+
+ /*
+ ** Structure version. Should always be set to 0.
+ */
+ int iVersion;
+
+ /*
+ ** Create a new tokenizer. The values in the argv[] array are the
+ ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
+ ** TABLE statement that created the fts3 table. For example, if
+ ** the following SQL is executed:
+ **
+ ** CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
+ **
+ ** then argc is set to 2, and the argv[] array contains pointers
+ ** to the strings "arg1" and "arg2".
+ **
+ ** This method should return either SQLITE_OK (0), or an SQLite error
+ ** code. If SQLITE_OK is returned, then *ppTokenizer should be set
+ ** to point at the newly created tokenizer structure. The generic
+ ** sqlite3_tokenizer.pModule variable should not be initialised by
+ ** this callback. The caller will do so.
+ */
+ int (*xCreate)(
+ int argc, /* Size of argv array */
+ const char *const*argv, /* Tokenizer argument strings */
+ sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
+ );
+
+ /*
+ ** Destroy an existing tokenizer. The fts3 module calls this method
+ ** exactly once for each successful call to xCreate().
+ */
+ int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
+
+ /*
+ ** Create a tokenizer cursor to tokenize an input buffer. The caller
+ ** is responsible for ensuring that the input buffer remains valid
+ ** until the cursor is closed (using the xClose() method).
+ */
+ int (*xOpen)(
+ sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
+ const char *pInput, int nBytes, /* Input buffer */
+ sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
+ );
+
+ /*
+ ** Destroy an existing tokenizer cursor. The fts3 module calls this
+ ** method exactly once for each successful call to xOpen().
+ */
+ int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
+
+ /*
+ ** Retrieve the next token from the tokenizer cursor pCursor. This
+ ** method should either return SQLITE_OK and set the values of the
+ ** "OUT" variables identified below, or SQLITE_DONE to indicate that
+ ** the end of the buffer has been reached, or an SQLite error code.
+ **
+ ** *ppToken should be set to point at a buffer containing the
+ ** normalized version of the token (i.e. after any case-folding and/or
+ ** stemming has been performed). *pnBytes should be set to the length
+ ** of this buffer in bytes. The input text that generated the token is
+ ** identified by the byte offsets returned in *piStartOffset and
+ ** *piEndOffset. *piStartOffset should be set to the index of the first
+ ** byte of the token in the input buffer. *piEndOffset should be set
+ ** to the index of the first byte just past the end of the token in
+ ** the input buffer.
+ **
+ ** The buffer *ppToken is set to point at is managed by the tokenizer
+ ** implementation. It is only required to be valid until the next call
+ ** to xNext() or xClose().
+ */
+ /* TODO(shess) current implementation requires pInput to be
+ ** nul-terminated. This should either be fixed, or pInput/nBytes
+ ** should be converted to zInput.
+ */
+ int (*xNext)(
+ sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
+ const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
+ int *piStartOffset, /* OUT: Byte offset of token in input buffer */
+ int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
+ int *piPosition /* OUT: Number of tokens returned before this one */
+ );
+};
+
+struct sqlite3_tokenizer {
+ const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
+ /* Tokenizer implementations will typically add additional fields */
+};
+
+struct sqlite3_tokenizer_cursor {
+ sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
+ /* Tokenizer implementations will typically add additional fields */
+};
+
+#endif /* _FTS3_TOKENIZER_H_ */
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/src/nsFts3Tokenizer.cpp
@@ -0,0 +1,83 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Japan.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Makoto Kato <m_kato@ga2.so-net.ne.jp>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "nsFts3Tokenizer.h"
+
+#include "nsIFts3Tokenizer.h"
+#include "mozIStorageConnection.h"
+#include "mozIStorageStatement.h"
+#include "nsString.h"
+
+extern "C" void sqlite3Fts3PorterTokenizerModule(
+ sqlite3_tokenizer_module const**ppModule);
+
+NS_IMPL_ISUPPORTS1(nsFts3Tokenizer,nsIFts3Tokenizer)
+
+nsFts3Tokenizer::nsFts3Tokenizer()
+{
+}
+
+nsFts3Tokenizer::~nsFts3Tokenizer()
+{
+}
+
+NS_IMETHODIMP
+nsFts3Tokenizer::RegisterTokenizer(mozIStorageConnection *connection)
+{
+ nsresult rv;
+ nsCOMPtr <mozIStorageStatement> selectStatement;
+
+ rv = connection->CreateStatement(NS_LITERAL_CSTRING(
+ "SELECT fts3_tokenizer(?1, ?2)"),
+ getter_AddRefs(selectStatement));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ const sqlite3_tokenizer_module* module = nsnull;
+ sqlite3Fts3PorterTokenizerModule(&module);
+ if (!module)
+ return NS_ERROR_FAILURE;
+
+ rv = selectStatement->BindStringParameter(0, NS_LITERAL_STRING("mozporter"));
+ rv = selectStatement->BindBlobParameter(1, (PRUint8*)&module, sizeof(module));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ PRBool hasMore;
+ rv = selectStatement->ExecuteStep(&hasMore);
+
+ return rv;
+}
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/src/nsFts3Tokenizer.h
@@ -0,0 +1,59 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Japan.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Makoto Kato <m_kato@ga2.so-net.ne.jp>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef nsFts3Tokenizer_h__
+#define nsFts3Tokenizer_h__
+
+#include "nsCOMPtr.h"
+#include "nsIFts3Tokenizer.h"
+#include "fts3_tokenizer.h"
+
+extern const sqlite3_tokenizer_module* getWindowsTokenizer();
+
+class nsFts3Tokenizer : public nsIFts3Tokenizer {
+public:
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSIFTS3TOKENIZER
+
+ nsFts3Tokenizer();
+
+private:
+ ~nsFts3Tokenizer();
+};
+
+#endif
new file mode 100644
--- /dev/null
+++ b/mailnews/extensions/fts3/src/nsFts3TokenizerCID.h
@@ -0,0 +1,49 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Japan.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Makoto Kato <m_kato@ga2.so-net.ne.jp>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef nsFts3TokenizerCID_h__
+#define nsFts3TokenizerCID_h__
+
+#define NS_FTS3TOKENIZER_CONTRACTID \
+ "@mozilla.org/messenger/fts3tokenizer;1"
+#define NS_FTS3TOKENIZER_CID \
+{ /* a67d724d-0015-4e2e-8cad-b84775330924 */ \
+ 0xa67d724d, 0x0015, 0x4e2e, \
+ { 0x8c, 0xad, 0xb8, 0x47, 0x75, 0x33, 0x09, 0x24 }}
+
+#endif /* nsFts3TokenizerCID_h__ */
--- a/mailnews/makefiles.sh
+++ b/mailnews/makefiles.sh
@@ -68,16 +68,20 @@ mailnews/db/msgdb/build/Makefile
mailnews/db/msgdb/public/Makefile
mailnews/db/msgdb/src/Makefile
mailnews/db/msgdb/test/Makefile
mailnews/extensions/Makefile
mailnews/extensions/bayesian-spam-filter/Makefile
mailnews/extensions/bayesian-spam-filter/build/Makefile
mailnews/extensions/bayesian-spam-filter/src/Makefile
mailnews/extensions/bayesian-spam-filter/test/Makefile
+mailnews/extensions/fts3/Makefile
+mailnews/extensions/fts3/build/Makefile
+mailnews/extensions/fts3/public/Makefile
+mailnews/extensions/fts3/src/Makefile
mailnews/extensions/mailviews/Makefile
mailnews/extensions/mailviews/build/Makefile
mailnews/extensions/mailviews/public/Makefile
mailnews/extensions/mailviews/content/Makefile
mailnews/extensions/mailviews/src/Makefile
mailnews/extensions/mdn/Makefile
mailnews/extensions/mdn/build/Makefile
mailnews/extensions/mdn/src/Makefile
--- a/suite/installer/unix/packages
+++ b/suite/installer/unix/packages
@@ -517,16 +517,17 @@ bin/updater
; component binaries
bin/components/libimport.so
bin/components/libmail.so
bin/components/libmsgsmime.so
#endif
; component xpts
bin/components/addrbook.xpt
+bin/components/fts3tok.xpt
bin/components/import.xpt
bin/components/impComm4xMail.xpt
bin/components/mailview.xpt
bin/components/mime.xpt
bin/components/msgbase.xpt
bin/components/msgcompose.xpt
bin/components/msgdb.xpt
bin/components/msgimap.xpt
--- a/suite/installer/windows/packages
+++ b/suite/installer/windows/packages
@@ -527,16 +527,17 @@ bin\updater.exe
bin\components\import.dll
bin\components\mail.dll
bin\components\msgMapi.dll
bin\components\msgsmime.dll
#endif
; component xpts
bin\components\addrbook.xpt
+bin\components\fts3tok.xpt
bin\components\import.xpt
bin\components\impComm4xMail.xpt
bin\components\mailview.xpt
bin\components\mapihook.xpt
bin\components\mime.xpt
bin\components\msgbase.xpt
bin\components\msgcompo.xpt
bin\components\msgdb.xpt