Bug 496453 - "Add corpus information calls to nsIJunkMailPlugin" [r+sr=bienvenu]
authorKent James <kent@caspia.com>
Tue, 09 Jun 2009 09:08:41 +0100
changeset 2807 119303f10845456473ab867388b1f0415bcf3a6d
parent 2806 22248bfae2b78bd25a93ad4e180404c42759442e
child 2808 61d5f8fd0fe2d3bca890b4b7e69af89c270df8f0
push idunknown
push userunknown
push dateunknown
bugs496453
Bug 496453 - "Add corpus information calls to nsIJunkMailPlugin" [r+sr=bienvenu]
mailnews/base/search/public/nsIMsgFilterPlugin.idl
mailnews/extensions/bayesian-spam-filter/src/nsBayesianFilter.cpp
mailnews/extensions/bayesian-spam-filter/test/unit/test_junkAsTraits.js
--- a/mailnews/base/search/public/nsIMsgFilterPlugin.idl
+++ b/mailnews/base/search/public/nsIMsgFilterPlugin.idl
@@ -140,17 +140,17 @@ interface nsIMsgTraitDetailListener : ns
   void onMessageTraitDetails(in string aMsgUri,
     in unsigned long aProTrait,
     in unsigned long tokenCount,
     [array, size_is(tokenCount)] in wstring tokenStrings,
     [array, size_is(tokenCount)] in unsigned long tokenPercents,
     [array, size_is(tokenCount)] in unsigned long runningPercents);
 };
 
-[scriptable, uuid(59BA2A0D-2C87-44a0-8ADD-83266A9EA474)]
+[scriptable, uuid(EDB05079-3F8A-46a6-A596-E7FD8E12216B)]
 interface nsIJunkMailPlugin : nsIMsgFilterPlugin
 {
     /**
      * Message classifications.
      */
     const nsMsgJunkStatus UNCLASSIFIED = 0;
     const nsMsgJunkStatus GOOD = 1;
     const nsMsgJunkStatus JUNK = 2;
@@ -301,9 +301,21 @@ interface nsIJunkMailPlugin : nsIMsgFilt
      * @param aMsgWindow       current message window (may be null)
      */
     void detailMessage(
         in string aMsgURI,
         in unsigned long aProTrait,
         in unsigned long aAntiTrait,
         in nsIMsgTraitDetailListener aListener,
         [optional] in nsIMsgWindow aMsgWindow);
+
+    /**
+     * Gives information on token and message count information in the
+     * training data corpus
+     *
+     * @param aTrait           trait id (may be null)
+     * @param aMessageCount    count of messages that have been trained with aTrait
+     *
+     * @return                 token count for all traits
+     */
+
+    unsigned long corpusCounts(in unsigned long aTrait, out unsigned long aMessageCount);
 };
--- a/mailnews/extensions/bayesian-spam-filter/src/nsBayesianFilter.cpp
+++ b/mailnews/extensions/bayesian-spam-filter/src/nsBayesianFilter.cpp
@@ -2117,16 +2117,31 @@ NS_IMETHODIMP nsBayesianFilter::DetailMe
   TokenStreamListener *tokenListener = new TokenStreamListener(analyzer);
   if (!tokenListener)
     return NS_ERROR_OUT_OF_MEMORY;
 
   analyzer->setTokenListener(tokenListener);
   return tokenizeMessage(aMsgURI, aMsgWindow, analyzer);
 }
 
+NS_IMETHODIMP nsBayesianFilter::CorpusCounts(PRUint32 aTrait,
+                                             PRUint32 *aMessageCount,
+                                             PRUint32 *aTokenCount)
+{
+  NS_ENSURE_ARG_POINTER(aTokenCount);
+  if (mCorpus)
+  {
+    *aTokenCount = mCorpus.countTokens();
+    if (aTrait && aMessageCount)
+      *aMessageCount = mCorpus.getMessageCount(aTrait);
+    return NS_OK;
+  }
+  return NS_ERROR_FAILURE;
+}
+
 /* Corpus Store */
 
 /*
     Format of the training file for version 1:
     [0xFEEDFACE]
     [number good messages][number bad messages]
     [number good tokens]
     [count][length of word]word
--- a/mailnews/extensions/bayesian-spam-filter/test/unit/test_junkAsTraits.js
+++ b/mailnews/extensions/bayesian-spam-filter/test/unit/test_junkAsTraits.js
@@ -55,22 +55,23 @@ const kIsSpamScore = nsIJunkMailPlugin.I
 
 // command functions for test data
 const kTrainJ = 0;  // train using junk method
 const kTrainT = 1;  // train using trait method
 const kClassJ = 2;  // classify using junk method
 const kClassT = 3;  // classify using trait method
 const kForgetJ = 4; // forget training using junk method
 const kForgetT = 5; // forget training using trait method
+const kCounts = 6;  // test token and message counts
 
 var gProArray = [], gAntiArray = []; // traits arrays, pro is junk, anti is good
 var gTest; // currently active test
 
 // The tests array defines the tests to attempt. Format of
-// an element "test" of this array:
+// an element "test" of this array (except for kCounts):
 //
 //   test.command: function to perform, see definitions above
 //   test.fileName: file containing message to test
 //   test.junkPercent: sets the classification (for Class or Forget commands)
 //                     tests the classification (for Class commands)
 //                     As a special case for the no-training tests, if
 //                     junkPercent is negative, test its absolute value
 //                     for percents, but reverse the junk/good classification
@@ -168,16 +169,20 @@ var tests =
    junkPercent: 100,
    traitListener: true,
    junkListener: true},
   {command: kClassT,
    fileName: "spam4.eml",
    junkPercent: 100,
    traitListener: true,
    junkListener: true},
+  {command: kCounts,
+   tokenCount: 66,  // count of tokens in the corpus
+   junkCount: 2,    // count of junk messages in the corpus
+   goodCount: 1},   // count of good messages in the corpus
   {command: kForgetT,
    fileName: "spam4.eml",
    junkPercent: 100,
    traitListener: true,
    junkListener: false},
   {command: kClassT,
    fileName: "spam4.eml",
    junkPercent: 81,
@@ -506,10 +511,25 @@ function startCommand()
         0,           // length of aNewTraits array
         null,        // in array aNewTraits
         tListener ? traitListener :
           null,      // in nsIMsgTraitClassificationListener aTraitListener
         null,        // in nsIMsgWindow aMsgWindow
         jListener ? junkListener :
           null);     // in nsIJunkMailClassificationListener aJunkListener
       break;
+
+    case kCounts:
+      // test counts
+      let msgCount = {};
+      let tokenCount = nsIJunkMailPlugin.corpusCounts(null, {});
+      nsIJunkMailPlugin.corpusCounts(kJunkTrait, msgCount);
+      let junkCount = msgCount.value;
+      nsIJunkMailPlugin.corpusCounts(kGoodTrait, msgCount);
+      let goodCount = msgCount.value;
+      print("tokenCount, junkCount, goodCount is " + tokenCount, junkCount, goodCount);
+      do_check_eq(tokenCount, gTest.tokenCount);
+      do_check_eq(junkCount, gTest.junkCount);
+      do_check_eq(goodCount, gTest.goodCount);
+      do_timeout(0, "startCommand();");
+      break;
   }
 }