Bug 505307 - gloda should use nsIMsgDatabase::getFilterEnumerator to avoid creating a lot of js garbage nsMsgHdrs. r=asuth
authorKent James <kent@caspia.com>
Tue, 01 Sep 2009 17:33:49 -0700
changeset 3489 75644ade5a9a533d44e7dca16de13b21f3f70ef6
parent 3488 b83f71869a2506c6f8d48f7a680c645bf25d2dee
child 3490 856938cc9dbee82f8dfaf817f528289c805c2616
child 3624 96b37b501ea5c25ad20e12397bde8bce017f26f5
push idunknown
push userunknown
push dateunknown
reviewersasuth
bugs505307
Bug 505307 - gloda should use nsIMsgDatabase::getFilterEnumerator to avoid creating a lot of js garbage nsMsgHdrs. r=asuth
mailnews/db/gloda/modules/indexer.js
--- a/mailnews/db/gloda/modules/indexer.js
+++ b/mailnews/db/gloda/modules/indexer.js
@@ -939,31 +939,28 @@ var GlodaIndexer = {
   /**
    * The iterator we are using to iterate over the headers in
    *  this._indexingDatabase.
    */
   _indexingIterator: null,
 
   /** folder whose entry we are pending on */
   _pendingFolderEntry: null,
-  /** if we are pending on a folder, do we want an iterator too? */
-  _pendingFolderWantsIterator: false,
 
   /**
    * Common logic that we want to deal with the given folder ID.  Besides
    *  cutting down on duplicate code, this ensures that we are listening on
    *  the folder in case it tries to go away when we are using it.
    *
    * @return true when the folder was successfully entered, false when we need
    *     to pend on notification of updating of the folder (due to re-parsing
    *     or what have you).  In the event of an actual problem, an exception
    *     will escape.
    */
-  _indexerEnterFolder: function gloda_index_indexerEnterFolder(aFolderID,
-                                                               aNeedIterator) {
+  _indexerEnterFolder: function gloda_index_indexerEnterFolder(aFolderID) {
     // leave the folder if we haven't explicitly left it.
     if (this._indexingFolder !== null) {
       this._indexerLeaveFolder();
     }
 
     this._indexingGlodaFolder = GlodaDatastore._mapFolderID(aFolderID);
     this._indexingFolder = this._indexingGlodaFolder.getXPCOMFolder(
                              this._indexingGlodaFolder.kActivityIndexing);
@@ -989,39 +986,36 @@ var GlodaIndexer = {
       //  might get flung around, it won't make it out to us, and will instead
       //  be permuted into an NS_ERROR_NOT_INITIALIZED.)
       catch (e if ((e.result == Cr.NS_ERROR_NOT_INITIALIZED) ||
                    (e.result == NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE))) {
         // this means that we need to pend on the update; the listener for
         //  FolderLoaded events will call _indexerCompletePendingFolderEntry.
         this._log.debug("Pending on folder load...");
         this._pendingFolderEntry = this._indexingFolder;
-        this._pendingFolderWantsIterator = aNeedIterator;
         return this.kWorkAsync;
       }
       // we get an nsIMsgDatabase out of this (unsurprisingly) which
       //  explicitly inherits from nsIDBChangeAnnouncer, which has the
       //  AddListener call we want.
       if (this._indexingDatabase == null)
         this._indexingDatabase = this._indexingFolder.msgDatabase;
-      if (aNeedIterator)
-        this._indexerGetIterator();
       this._indexingDatabase.AddListener(this._databaseAnnouncerListener);
     }
     catch (ex) {
       this._log.error("Problem entering folder: " +
                       (this._indexingFolder ?
                          this._indexingFolder.prettiestName : "unknown") +
                       ", skipping. Error was: " + ex.fileName + ":" +
                       ex.lineNumber + ": " + ex);
       this._indexingGlodaFolder.indexing = false;
       this._indexingFolder = null;
       this._indexingGlodaFolder = null;
       this._indexingDatabase = null;
-      this._indexingIterator = null;
+      this._indexingEnumerator = null;
 
       // re-throw, we just wanted to make sure this junk is cleaned up and
       //  get localized error logging...
       throw ex;
     }
 
     return this.kWorkSync;
   },
@@ -1030,48 +1024,125 @@ var GlodaIndexer = {
    * If the folder was still parsing/updating when we tried to enter, then this
    *  handler will get called by the listener who got the FolderLoaded message.
    * All we need to do is get the database reference, register a listener on
    *  the db, and retrieve an iterator if desired.
    */
   _indexerCompletePendingFolderEntry:
       function gloda_indexer_indexerCompletePendingFolderEntry() {
     this._indexingDatabase = this._indexingFolder.msgDatabase;
-    if (this._pendingFolderWantsIterator)
-      this._indexerGetIterator();
     this._indexingDatabase.AddListener(this._databaseAnnouncerListener);
     this._log.debug("...Folder Loaded!");
 
     // the load is no longer pending; we certainly don't want more notifications
     this._pendingFolderEntry = null;
     // indexerEnterFolder returned kWorkAsync, which means we need to notify
     //  the callback driver to get things going again.
     this.callbackDriver();
   },
 
-  _indexerGetIterator: function gloda_indexer_indexerGetIterator() {
-    this._indexingIterator = fixIterator(
-                               this._indexingDatabase.EnumerateMessages(),
-                               Ci.nsIMsgDBHdr);
+  /**
+   *  @param  aGetAll  should we get all messages
+   *                    (or only those we need to index)?
+   */
+  _indexerGetEnumerator: function gloda_indexer_indexerGetEnumerator(aGetAll) {
+    if (aGetAll) {
+      this._indexingEnumerator = this._indexingDatabase.EnumerateMessages();
+    }
+
+    else {
+      // We need to create search terms for messages to index. Messages should
+      //  be indexed if they're indexable (local or offline and not expunged)
+      //  and either haven't been indexed or are dirty.
+      // The basic search expression is:
+      //  ((GLODA_MESSAGE_ID_PROPERTY Is 0) || (GLODA_DIRTY_PROPERTY Isnt 0))
+      // If the folder !isLocal we add the terms:
+      //  && (Status Is nsMsgMessageFlags.Offline)
+      //  && (Status Isnt nsMsgMessageFlags.Expunged)
+
+      let searchSession = Cc["@mozilla.org/messenger/searchSession;1"]
+                            .createInstance(Ci.nsIMsgSearchSession);
+      let searchTerms = Cc["@mozilla.org/array;1"]
+                         .createInstance(Ci.nsIMutableArray);
+      let isLocal = this._indexingFolder instanceof Ci.nsIMsgLocalMailFolder;
+
+      searchSession.addScopeTerm(Ci.nsMsgSearchScope.offlineMail,
+                                 this._indexingFolder);
+      let nsMsgSearchAttrib = Ci.nsMsgSearchAttrib;
+      let nsMsgSearchOp = Ci.nsMsgSearchOp;
+
+      // first term: (GLODA_MESSAGE_ID_PROPERTY Is 0
+      let searchTerm = searchSession.createTerm();
+      searchTerm.booleanAnd = false; // actually don't care here
+      searchTerm.beginsGrouping = true;
+      searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty;
+      searchTerm.op = nsMsgSearchOp.Is;
+      value = searchTerm.value;
+      value.attrib = searchTerm.attrib;
+      value.status = 0;
+      searchTerm.value = value;
+      searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY;
+      searchTerms.appendElement(searchTerm, false);
+
+      //  second term: || GLODA_DIRTY_PROPERTY Isnt 0 )
+      searchTerm = searchSession.createTerm();
+      searchTerm.booleanAnd = false;
+      searchTerm.endsGrouping = true;
+      searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty;
+      searchTerm.op = nsMsgSearchOp.Isnt;
+      value = searchTerm.value;
+      value.attrib = searchTerm.attrib;
+      value.status = 0;
+      searchTerm.value = value;
+      searchTerm.hdrProperty = GLODA_DIRTY_PROPERTY;
+      searchTerms.appendElement(searchTerm, false);
+
+      if (!isLocal)
+      {
+        //  third term: && Status Is nsMsgMessageFlags.Offline
+        searchTerm = searchSession.createTerm();
+        searchTerm.booleanAnd = true;
+        searchTerm.attrib = nsMsgSearchAttrib.MsgStatus;
+        searchTerm.op = nsMsgSearchOp.Is;
+        value = searchTerm.value;
+        value.attrib = searchTerm.attrib;
+        value.status = Ci.nsMsgMessageFlags.Offline;
+        searchTerm.value = value;
+        searchTerms.appendElement(searchTerm, false);
+
+        // fourth term: && Status Isnt nsMsgMessageFlags.Expunged
+        searchTerm = searchSession.createTerm();
+        searchTerm.booleanAnd = true;
+        searchTerm.attrib = nsMsgSearchAttrib.MsgStatus;
+        searchTerm.op = nsMsgSearchOp.Isnt;
+        value = searchTerm.value;
+        value.attrib = searchTerm.attrib;
+        value.status = Ci.nsMsgMessageFlags.Expunged;
+        searchTerm.value = value;
+        searchTerms.appendElement(searchTerm, false);
+      }
+
+      this._indexingEnumerator = this._indexingDatabase.getFilterEnumerator(searchTerms);
+    }
   },
 
   _indexerLeaveFolder: function gloda_index_indexerLeaveFolder(aExpected) {
     if (this._indexingFolder !== null) {
       if (this._indexingDatabase) {
         this._indexingDatabase.Commit(Ci.nsMsgDBCommitType.kLargeCommit);
         // remove our listener!
         this._indexingDatabase.RemoveListener(this._databaseAnnouncerListener);
       }
       // let the gloda folder know we are done indexing
       this._indexingGlodaFolder.indexing = false;
       // null everyone out
       this._indexingFolder = null;
       this._indexingGlodaFolder = null;
       this._indexingDatabase = null;
-      this._indexingIterator = null;
+      this._indexingEnumerator = null;
     }
   },
 
   /**
    * Event fed to us by our nsIFolderListener when a folder is loaded.  We use
    *  this event to two ends:
    *
    * - Know when a folder we were trying to open to index is actually ready to
@@ -1640,17 +1711,17 @@ var GlodaIndexer = {
     yield this.kWorkDone;
   },
 
   /**
    * Index the contents of a folder.
    */
   _worker_folderIndex: function gloda_worker_folderIndex(aJob) {
     let logDebug = this._log.level <= Log4Moz.Level.Debug;
-    yield this._indexerEnterFolder(aJob.id, true);
+    yield this._indexerEnterFolder(aJob.id);
 
     if (!this.shouldIndexFolder(this._indexingFolder))
       yield this.kWorkDone;
 
     // Make sure listeners get notified about this job.
     this._notifyListeners();
 
     // there is of course a cost to all this header investigation even if we
@@ -1669,98 +1740,81 @@ var GlodaIndexer = {
     //  dirty property.  Once we have done this, we can downgrade the folder's
     //  dirty status to plain dirty.  We do this rather than trying to process
     //  everyone in one go in a filthy context because if we have to terminate
     //  indexing before we quit, we don't want to have to re-index messages next
     //  time.  (This could even lead to never completing indexing in a
     //  pathological situation.)
     let glodaFolder = GlodaDatastore._mapFolder(this._indexingFolder);
     if (glodaFolder.dirtyStatus == glodaFolder.kFolderFilthy) {
+      this._indexerGetEnumerator(true);
       let count = 0;
-      for (let msgHdr in this._indexingIterator) {
+      for (let msgHdr in fixIterator(this._indexingEnumerator,
+                                     Ci.nsIMsgDBHdr)) {
         // we still need to avoid locking up the UI, pause periodically...
         if (++count % HEADER_CHECK_BLOCK_SIZE == 0)
           yield this.kWorkSync;
 
         let glodaMessageId = msgHdr.getUint32Property(
           GLODA_MESSAGE_ID_PROPERTY);
         // if it has a gloda message id, we need to mark it filthy
         if (glodaMessageId != 0)
           msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, this.kMessageFilthy);
         // if it doesn't have a gloda message id, we will definitely index it,
         //  so no action is required.
       }
       // this will automatically persist to the database
       glodaFolder.dirtyStatus = glodaFolder.kFolderDirty;
-
-      // We used up the iterator, get a new one.
-      this._indexerGetIterator();
     }
 
-    // Whether or not the given message should be indexed.  Messages should
-    // be indexed if they're indexable (local or offline and not expunged)
-    // and either haven't been indexed or are dirty.
-    let shouldIndexMessage = function(msgHdr) {
-      if ((!isLocal &&
-           !(msgHdr.flags & Components.interfaces.nsMsgMessageFlags.Offline)) ||
-          (msgHdr.flags & Components.interfaces.nsMsgMessageFlags.Expunged))
-        return false;
-
-      // returns 0 when missing, which means this message hasn't been indexed
-      if (msgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY) == 0)
-        return true;
-
-      // returns 0 when missing, which means this message is clean
-      return (msgHdr.getUint32Property(GLODA_DIRTY_PROPERTY) != 0);
-    };
-
     // Pass 1: count the number of messages to index.
     //  We do this in order to be able to report to the user what we're doing.
-    // To avoid traversing the entire folder again in the second pass, we could
-    //  cache headers that need indexing here, which would work fine for sparse
-    //  indexing but might eat too much memory for dense indexing.  Perhaps we
-    //  could employ a hybrid approach where we cache up to a certain number
-    //  of headers before falling back to full traversal in the second pass.
     // TODO: give up after reaching a certain number of messages in folders
     //  with ridiculous numbers of messages and make the interface just say
     //  something like "over N messages to go."
-    let count = 0;
+
+    this._indexerGetEnumerator(false);
     let numMessagesToIndex = 0;
-    for (let msgHdr in this._indexingIterator) {
-      // we still need to avoid locking up the UI, pause periodically...
-      if (++count % HEADER_CHECK_BLOCK_SIZE == 0)
-        yield this.kWorkSync;
-
-      if (shouldIndexMessage(msgHdr))
-        ++numMessagesToIndex;
+    let numMessagesOut = {};
+    // Keep going until we run out of headers.
+    while (this._indexingFolder.msgDatabase.nextMatchingHdrs(
+             this._indexingEnumerator,
+             HEADER_CHECK_BLOCK_SIZE * 8, // this way is much faster, do more
+             0, // moot, we don't return headers
+             null, // don't return headers, we just want the count
+             numMessagesOut)) {
+      numMessagesToIndex += numMessagesOut.value;
+      yield this.kWorkSync;
     }
+    numMessagesToIndex += numMessagesOut.value;
 
     aJob.goal = numMessagesToIndex;
 
     if (numMessagesToIndex > 0) {
       // We used up the iterator, get a new one.
-      this._indexerGetIterator();
+      this._indexerGetEnumerator(false);
 
       // Pass 2: index the messages.
       let count = 0;
-      for (let msgHdr in this._indexingIterator) {
+      for (let msgHdr in fixIterator(this._indexingEnumerator,
+                                     Ci.nsIMsgDBHdr)) {
         // per above, we want to periodically release control while doing all
         // this header traversal/investigation.
+        // XXX not clear that this is really needed, since search has its own
+        // method to yield to UI periodically.
         if (++count % HEADER_CHECK_BLOCK_SIZE == 0)
           yield this.kWorkSync;
 
-        if (shouldIndexMessage(msgHdr)) {
-          ++aJob.offset;
-          if (logDebug)
-            this._log.debug(">>>  _indexMessage");
-          yield this._callbackHandle.pushAndGo(this._indexMessage(msgHdr,
-              this._callbackHandle));
-          if (logDebug)
-            this._log.debug("<<<  _indexMessage");
-        }
+        ++aJob.offset;
+        if (logDebug)
+          this._log.debug(">>>  _indexMessage");
+        yield this._callbackHandle.pushAndGo(this._indexMessage(msgHdr,
+            this._callbackHandle));
+        if (logDebug)
+          this._log.debug("<<<  _indexMessage");
       }
     }
 
     glodaFolder.dirtyStatus = glodaFolder.kFolderClean;
 
     // by definition, it's not likely we'll visit this folder again anytime soon
     this._indexerLeaveFolder();
 
@@ -1780,17 +1834,17 @@ var GlodaIndexer = {
     for (; aJob.offset < aJob.items.length; aJob.offset++) {
       let item = aJob.items[aJob.offset];
       // item is either [folder ID, message key] or
       //                [folder ID, message ID]
 
       // get in the folder
       if (!this._indexingGlodaFolder ||
           this._indexingGlodaFolder.id != item[0]) {
-        yield this._indexerEnterFolder(item[0], false);
+        yield this._indexerEnterFolder(item[0]);
 
         // stay out of folders we should not be in!
         if (!this.shouldIndexFolder(this._indexingFolder))
           continue;
 
         folderIsLocal =
           this._indexingFolder instanceof Ci.nsIMsgLocalMailFolder;
       }
@@ -2338,16 +2392,18 @@ var GlodaIndexer = {
         this.indexer._indexingJobGoal++;
       }
       // only queue the message if we haven't overflowed our event-driven budget
       if (this.indexer._pendingAddJob.items.length <
           this.indexer._indexMaxEventQueueMessages)
         this.indexer._pendingAddJob.items.push(
           [GlodaDatastore._mapFolder(msgFolder).id,
            aMsgHdr.messageKey]);
+      else
+        this.indexer.indexingSweepNeeded = true;
       this.indexer.indexing = true;
     },
 
     OnItemAdded: function gloda_indexer_OnItemAdded(aParentItem, aItem) {
     },
     OnItemRemoved: function gloda_indexer_OnItemRemoved(aParentItem, aItem) {
     },
     OnItemPropertyChanged: function gloda_indexer_OnItemPropertyChanged(