Bug 734507 - Provide gloda soft schema bump to recover DB badness from bug 723372 which manifested as a.contact is undefined in gloda.js. r=bienvenu,a=Standard8,f=protz
authorAndrew Sutherland <asutherland@asutherland.org>
Sun, 11 Mar 2012 22:11:49 -0700
changeset 10670 39fafab28941bfa7c50824aa40a003dfe3f290c1
parent 10669 151697a4635b391c74dd4744231be9bcc07afe9e
child 10671 37248c37f75712375cfe5da24d49876075f7d032
push id416
push userbugzilla@standard8.plus.com
push dateTue, 20 Mar 2012 14:54:27 +0000
treeherdercomm-beta@39fafab28941 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbienvenu, Standard8
bugs734507, 723372
Bug 734507 - Provide gloda soft schema bump to recover DB badness from bug 723372 which manifested as a.contact is undefined in gloda.js. r=bienvenu,a=Standard8,f=protz
mailnews/db/gloda/modules/datastore.js
mailnews/db/gloda/modules/gloda.js
mailnews/db/gloda/modules/index_msg.js
mailnews/db/gloda/modules/indexer.js
mailnews/db/gloda/modules/public.js
mailnews/db/gloda/test/unit/resources/glodaTestHelper.js
mailnews/db/gloda/test/unit/test_migration.js
mailnews/db/gloda/test/unit/xpcshell.ini
mailnews/test/resources/asyncTestUtils.js
--- a/mailnews/db/gloda/modules/datastore.js
+++ b/mailnews/db/gloda/modules/datastore.js
@@ -604,16 +604,23 @@ ExplainedStatementProcessor.prototype = 
     this._ostream.close();
 
     let observerService = Cc["@mozilla.org/observer-service;1"]
                             .getService(Ci.nsIObserverService);
     observerService.removeObserver(this, "quit-application");
   }
 };
 
+// See the documentation on GlodaDatastore._schemaVersion to understand these:
+const DB_SCHEMA_ACCEPT_LEAVE_LOW = 31,
+      DB_SCHEMA_ACCEPT_LEAVE_HIGH = 34,
+      DB_SCHEMA_ACCEPT_DOWNGRADE_LOW = 35,
+      DB_SCHEMA_ACCEPT_DOWNGRADE_HIGH = 39,
+      DB_SCHEMA_DOWNGRADE_DELTA = 5;
+
 /**
  * Database abstraction layer.  Contains explicit SQL schemas for our
  *  fundamental representations (core 'nouns', if you will) as well as
  *  specialized functions for then dealing with each type of object.  At the
  *  same time, we are beginning to support extension-provided tables, which
  *  call into question whether we really need our hand-rolled code, or could
  *  simply improve the extension-provided table case to work for most of our
  *  hand-rolled cases.
@@ -715,17 +722,97 @@ var GlodaDatastore = {
   kConstraintIn: 1,
   kConstraintRanges: 2,
   kConstraintEquals: 3,
   kConstraintStringLike: 4,
   kConstraintFulltext: 5,
 
   /* ******************* SCHEMA ******************* */
 
-  _schemaVersion: 26,
+  /**
+   * Schema version policy. IMPORTANT!  We expect the following potential things
+   *  to happen in the life of gloda that can impact our schema and the ability
+   *  to move between different versions of Thunderbird:
+   *
+   * - Fundamental changes to the schema so that two versions of Thunderbird
+   *    cannot use the same global database.  To wit, Thunderbird N+1 needs to
+   *    blow away the database of Thunderbird N and reindex from scratch. 
+   *    Likewise, Thunderbird N will need to blow away Thunderbird N+1's
+   *    database because it can't understand it.  And we can't simply use a
+   *    different file because there would be fatal bookkeeping losses.
+   *
+   * - Bidirectional minor schema changes (rare).
+   *    Thunderbird N+1 does something that does not affect Thunderbird N's use
+   *    of the database, and a user switching back to Thunderbird N will not be
+   *    negatively impacted.  It will also be fine when they go back to N+1 and
+   *    N+1 will not be missing any vital data.  The historic example of this is
+   *    when we added a missing index that was important for performance.  In
+   *    that case, Thunderbird N could have potentially left the schema revision
+   *    intact (if there was a safe revision), rather than swapping it on the
+   *    downgrade, compelling N+1 to redo the transform on upgrade.
+   *
+   * - Backwards compatible, upgrade-transition minor schema changes.
+   *    Thunderbird N+1 does something that does not require nuking the
+   *    database / a full re-index, but does require processing on upgrade from
+   *    a version of the database previously used by Thunderbird.  These changes
+   *    do not impact N's ability to use the database.  For example, adding a
+   *    new indexed attribute that affects a small number of messages could be
+   *    handled by issuing a query on upgrade to dirty/index those messages.
+   *    However, if the user goes back to N from N+1, when they upgrade to N+1
+   *    again, we need to re-index.  In this case N would need to have downgrade
+   *    the schema revision.
+   *
+   * - Backwards incompatible, minor schema changes.
+   *    Thunderbird N+1 does something that does not require nuking the database
+   *    but will break Thunderbird N's ability to use the database.
+   *
+   * - Regression fixes.  Sometimes we may land something that screws up
+   *    databases, or the platform changes in a way that breaks our code and we
+   *    had insufficient unit test coverage and so don't detect it until some
+   *    databases have gotten messed up.
+   *
+   * Accordingly, every version of Thunderbird has a concept of potential schema
+   *  versions with associated semantics to prepare for the minor schema upgrade
+   *  cases were inter-op is possible.  These ranges and their semantics are:
+   * - accepts and leaves intact.  Covers:
+   *    - regression fixes that no longer exist with the landing of the upgrade
+   *       code as long as users never go back a build in the given channel.
+   *    - bidirectional minor schema changes.
+   * - accepts but downgrades version to self.  Covers:
+   *    - backwards compatible, upgrade-transition minor schema changes.
+   * - nuke range (anything beyond a specific revision needs to be nuked):
+   *    - backwards incompatible, minor scheme changes
+   *    - fundamental changes
+   *
+   *
+   * SO, YOU WANT TO CHANGE THE SCHEMA?
+   *
+   * Use the ranges below for Thunderbird 11 as a guide, bumping things as little
+   *  as possible.  If we start to use up the "accepts and leaves intact" range
+   *  without majorly changing things up, re-do the numbering acceptance range
+   *  to give us additional runway.
+   * 
+   * Also, if we keep needing non-nuking upgrades, consider adding an additional
+   *  table to the database that can tell older versions of Thunderbird what to
+   *  do when confronted with a newer database and where it can set flags to tell
+   *  the newer Thunderbird what the older Thunderbird got up to.  For example,
+   *  it would be much easier if we just tell Thunderbird N what to do when it's
+   *  confronted with the database.
+   *
+   *
+   * CURRENT STATE OF THE MIGRATION LOGIC:
+   *
+   * Thunderbird 11: uses 30 (regression fix from 26)
+   * - accepts and leaves intact: 31-34
+   * - accepts and downgrades by 5: 35-39
+   * - nukes: 40+
+   */
+  _schemaVersion: 30,
+  // what is the schema in the database right now?
+  _actualSchemaVersion: 0,
   _schema: {
     tables: {
 
       // ----- Messages
       folderLocations: {
         columns: [
           ["id", "INTEGER PRIMARY KEY"],
           ["folderURI", "TEXT NOT NULL"],
@@ -1006,26 +1093,51 @@ var GlodaDatastore = {
         dbConnection.executeSimpleSQL("PRAGMA cache_size = "+cacheSize);
         dbConnection.executeSimpleSQL("PRAGMA synchronous = FULL");
 
         // Register custom tokenizer to index all language text
         var tokenizer = Cc["@mozilla.org/messenger/fts3tokenizer;1"].
                           getService(Ci.nsIFts3Tokenizer);
         tokenizer.registerTokenizer(dbConnection);
 
-        if (dbConnection.schemaVersion != this._schemaVersion) {
+        // -- database schema changes
+        let dbSchemaVersion = this._actualSchemaVersion =
+          dbConnection.schemaVersion;
+        // - database from the future!
+        if (dbSchemaVersion > this._schemaVersion) {
+          if (dbSchemaVersion >= DB_SCHEMA_ACCEPT_LEAVE_LOW &&
+              dbSchemaVersion <= DB_SCHEMA_ACCEPT_LEAVE_HIGH) {
+            this._log.debug("db from the future in acceptable range; leaving " +
+                            "version at: " + dbSchemaVersion);
+          }
+          else if (dbSchemaVersion >= DB_SCHEMA_ACCEPT_DOWNGRADE_LOW &&
+                   dbSchemaVersion <= DB_SCHEMA_ACCEPT_DOWNGRADE_HIGH) {
+            let newVersion = dbSchemaVersion - DB_SCHEMA_DOWNGRADE_DELTA;
+            this._log.debug("db from the future in downgrade range; setting " +
+                            "version to " + newVersion + " down from " +
+                            dbSchemaVersion);
+            dbConnection.schemaVersion = this._actualSchemaVersion = newVersion;
+          }
+          // too far from the future, nuke it.
+          else {
+            dbConnection = this._nukeMigration(dbConnection);
+          }
+        }
+        // - database from the past!  migrate it, possibly.
+        else if (dbSchemaVersion < this._schemaVersion) {
           this._log.debug("Need to migrate database.  (DB version: " +
-            dbConnection.schemaVersion + " desired version: " +
+            this._actualSchemaVersion + " desired version: " +
             this._schemaVersion);
           dbConnection = this._migrate(dbService, dbFile,
                                        dbConnection,
-                                       dbConnection.schemaVersion,
+                                       this._actualSchemaVersion,
                                        this._schemaVersion);
-          this._log.debug("Migration completed.");
+          this._log.debug("Migration call completed.");
         }
+        // else: this database is juuust right.
       }
       // Handle corrupt databases, other oddities
       catch (ex) {
         if (ex.result == Cr.NS_ERROR_FILE_CORRUPTED) {
           this._log.warn("Database was corrupt, removing the old one.");
           dbFile.remove(false);
           this._log.warn("Removed old database, creating a new one.");
           dbConnection = this._createDB(dbService, dbFile);
@@ -1291,17 +1403,18 @@ var GlodaDatastore = {
    *  applicable), and their indices.
    */
   _createSchema: function gloda_ds_createSchema(aDBConnection) {
     // -- For each table...
     for each (let [tableName, tableDef] in Iterator(this._schema.tables)) {
       this._createTableSchema(aDBConnection, tableName, tableDef);
     }
 
-    aDBConnection.schemaVersion = this._schemaVersion;
+    aDBConnection.schemaVersion = this._actualSchemaVersion =
+      this._schemaVersion;
   },
 
   /**
    * Create a table for a noun, replete with data binding.
    */
   createNounTable: function gloda_ds_createTableIfNotExists(aNounDef) {
     // give it a _jsonText attribute if appropriate...
     if (aNounDef.allowsArbitraryAttrs)
@@ -1328,21 +1441,30 @@ var GlodaDatastore = {
     aNounDef.dbAttribAdjuster = aNounDef._dataBinder.adjustAttributes;
 
     if (aNounDef.schema.genericAttributes) {
       aNounDef.attrTableName = aNounDef.tableName + "Attributes";
       aNounDef.attrIDColumnName = "nounID";
     }
   },
 
+  _nukeMigration: function gloda_ds_nukeMigration(aDBConnection) {
+    aDBConnection.close();
+    aDBFile.remove(false);
+    this._log.warn("Global database has been purged due to schema change.  " + 
+                   "old version was " + this._actualSchemaVersion +
+                   ", new version is: " + this._schemaVersion);
+    return this._createDB(aDBService, aDBFile);
+  },
+
   /**
-   * Migrate the database _to the latest version_.  We only keep enough logic
-   *  around to get us to the recent version.  This code is not a time machine!
-   *  If we need to blow away the database to get to the most recent version,
-   *  then that's the sum total of the migration!
+   * Migrate the database _to the latest version_ from an older version.  We
+   *  only keep enough logic around to get us to the recent version.  This code
+   *  is not a time machine!  If we need to blow away the database to get to the
+   *  most recent version, then that's the sum total of the migration!
    */
   _migrate: function gloda_ds_migrate(aDBService, aDBFile, aDBConnection,
                                       aCurVersion, aNewVersion) {
 
     // version 12:
     // - notability column added
     // version 13:
     // - we are adding a new fulltext index column. blow away!
@@ -1369,26 +1491,53 @@ var GlodaDatastore = {
     // version 19
     // - there was a typo that was resulting in deleted getting set to the
     //  numeric value of the javascript undefined value.  (migrate-able)
     // version 20
     // - tokenizer changes to provide for case/accent-folding. (blow away)
     // version 21
     // - add the messagesAttribFastDeletion index we thought was already covered
     //  by an index we removed a while ago (migrate-able)
-    // (version 22-25 GAP being left for incremental, non-explodey change. jump
-    //  the schema to 26 if 22 is the next number but you have an explodey
-    //  change!)
     // version 26
-    // - bump page size and also cache size
-
-    aDBConnection.close();
-    aDBFile.remove(false);
-    this._log.warn("Global database has been purged due to schema change.");
-    return this._createDB(aDBService, aDBFile);
+    // - bump page size and also cache size (blow away)
+    // version 30
+    // - recover from bug 732372 that affected TB 11 beta / TB 12 alpha / TB 13
+    //    trunk.  The fix is bug 734507.  The revision bump happens
+    //    asynchronously. (migrate-able)
+    
+    // nuke if prior to 26
+    if (aCurVersion < 26)
+      return this._nukeMigration(aDBConnection);
+
+    // They must be desiring our "a.contact is undefined" fix!
+    // This fix runs asynchronously as the first indexing job the indexer ever
+    //  performs.  It is scheduled by the enabling of the message indexer and
+    //  it is the one that updates the schema version when done.
+    
+    // return the same DB connection since we didn't create a new one or do
+    //  anything.
+    return aDBConnection;
+  },
+
+  /**
+   * Asynchronously update the schema version; only for use by in-tree callers
+   *  who asynchronously perform migration work triggered by their initial
+   *  indexing sweep and who have properly updated the schema version in all
+   *  the appropriate locations in this file.
+   *
+   * This is done without doing anything about the current transaction state,
+   *  which is desired.
+   */
+  _updateSchemaVersion: function(newSchemaVersion) {
+    this._actualSchemaVersion = newSchemaVersion;
+    let stmt = this._createAsyncStatement(
+      // we need to concat; pragmas don't like "?1" binds
+      "PRAGMA user_version = " + newSchemaVersion, true);
+    stmt.executeAsync(this.trackAsync());
+    stmt.finalize();
   },
 
   _outstandingAsyncStatements: [],
 
   /**
    * Unless debugging, this is just _realCreateAsyncStatement, but in some
    *  debugging modes this is instead the helpful wrapper
    *  _createExplainedAsyncStatement.
--- a/mailnews/db/gloda/modules/gloda.js
+++ b/mailnews/db/gloda/modules/gloda.js
@@ -246,30 +246,43 @@ var Gloda = {
    */
   kIndexerIdle: 0,
   /**
    * The indexer is doing something.  We used to have other specific states, but
    *  they have been rendered irrelevant and wiped from existence.
    */
   kIndexerIndexing: 1,
 
-  /** Synchronous activities performed, you can drive us more. */
+  /**
+   * Synchronous activities performed that can be thought of as one processing
+   *  token.  Potentially yield the event-loop and re-schedule for later based
+   *  on how long we've actually taken/etc.  The goal here is that code that
+   *  is doing stuff synchronously yields with kWorkSync periodically to make
+   *  sure that it doesn't dominate the event-loop.  Unless the processing
+   *  in question is particularly intensive, it should be reasonable to apply
+   *  some decimation factor (ex: 32 or 64) with the general goal of yielding
+   *  every 3-10 milliseconds.
+   */
   kWorkSync: 0,
   /**
    * Asynchronous activity performed, you need to relinquish flow control and
    *  trust us to call callbackDriver later.
    */
   kWorkAsync: 1,
   /**
    * We are all done with our task, close us and figure out something else to do.
    */
   kWorkDone: 2,
   /**
    * We are not done with our task, but we think it's a good idea to take a
-   *  breather.
+   *  breather because we believe we have tied up the event loop for a
+   *  non-trivial amount of time.  So please re-schedule us in the future.
+   *
+   * This is currently only used internally by the indexer's batching logic;
+   *  minor changes may be required if used by actual indexers.
    */
   kWorkPause: 3,
   /**
    * We are done with our task, and have a result that we are returning.  This
    *  should only be used by your callback handler's doneWithResult method.
    *  Ex: you are passed aCallbackHandle, and you do
    *  "yield aCallbackHandle.doneWithResult(myResult);".
    */
--- a/mailnews/db/gloda/modules/index_msg.js
+++ b/mailnews/db/gloda/modules/index_msg.js
@@ -82,18 +82,33 @@ const GLODA_MESSAGE_ID_PROPERTY = "gloda
  *  |GlodaIndexer.kMessageFilthy|.
  */
 const GLODA_DIRTY_PROPERTY = "gloda-dirty";
 
 /**
  * The sentinel GLODA_MESSAGE_ID_PROPERTY value indicating that a message fails
  *  to index and we should not bother trying again, at least not until a new
  *  release is made.
+ *
+ * This should ideally just flip between 1 and 2, with GLODA_OLD_BAD_MESSAGE_ID
+ *  flipping in the other direction.  If we start having more trailing badness,
+ *  _indexerGetEnumerator and GLODA_OLD_BAD_MESSAGE_ID will need to be altered.
+ *
+ * When flipping this, be sure to update glodaTestHelper.js's copy.
  */
-const GLODA_BAD_MESSAGE_ID = 1;
+const GLODA_BAD_MESSAGE_ID = 2;
+/**
+ * The gloda id we used to use to mark messages as bad, but now should be
+ *  treated as eligible for indexing.  This is only ever used for consideration
+ *  when creating msg header enumerators with `_indexerGetEnumerator` which
+ *  means we only will re-index such messages in an indexing sweep.  Accordingly
+ *  event-driven indexing will still treat such messages as unindexed (and
+ *  unindexable) until an indexing sweep picks them up.
+ */
+const GLODA_OLD_BAD_MESSAGE_ID = 1;
 const GLODA_FIRST_VALID_MESSAGE_ID = 32;
 
 const JUNK_SCORE_PROPERTY = "junkscore";
 const JUNK_SPAM_SCORE_STR = Ci.nsIJunkMailPlugin.IS_SPAM_SCORE.toString();
 const JUNK_HAM_SCORE_STR = Ci.nsIJunkMailPlugin.IS_HAM_SCORE.toString();
 
 const nsIArray = Ci.nsIArray;
 const nsIMsgFolder = Ci.nsIMsgFolder;
@@ -495,16 +510,18 @@ var GlodaMsgIndexer = {
         Ci.nsIMsgFolderNotificationService.msgKeyChanged |
         Ci.nsIMsgFolderNotificationService.folderDeleted |
         Ci.nsIMsgFolderNotificationService.folderMoveCopyCompleted |
         Ci.nsIMsgFolderNotificationService.folderRenamed |
         Ci.nsIMsgFolderNotificationService.itemEvent);
 
     this._enabled = true;
 
+    this._considerSchemaMigration();
+
     this._log.info("Event-Driven Indexing is now " + this._enabled);
   },
   disable: function msg_indexer_disable() {
     // remove FolderLoaded notification listener
     let mailSession = Cc["@mozilla.org/messenger/services/session;1"].
       getService(Ci.nsIMsgMailSession);
     mailSession.RemoveFolderListener(this._folderListener);
 
@@ -724,19 +741,25 @@ var GlodaMsgIndexer = {
    *     that we should treat message with any gloda-id as dirty, not just
    *     messages that have non-bad message id's.
    */
   _indexerGetEnumerator: function gloda_indexer_indexerGetEnumerator(
       aEnumKind, aAllowPreBadIds) {
     if (aEnumKind == this.kEnumMsgsToIndex) {
       // We need to create search terms for messages to index. Messages should
       //  be indexed if they're indexable (local or offline and not expunged)
-      //  and either haven't been indexed or are dirty.
+      //  and either: haven't been indexed, are dirty, or are marked with with
+      //  a former GLODA_BAD_MESSAGE_ID that is no longer our bad marker.  (Our
+      //  bad marker can change on minor schema revs so that we can try and
+      //  reindex those messages exactly once and without needing to go through
+      //  a pass to mark them as needing one more try.)
       // The basic search expression is:
-      //  ((GLODA_MESSAGE_ID_PROPERTY Is 0) || (GLODA_DIRTY_PROPERTY Isnt 0)) &&
+      //  ((GLODA_MESSAGE_ID_PROPERTY Is 0) ||
+      //   (GLODA_MESSAGE_ID_PROPERTY Is GLODA_OLD_BAD_MESSAGE_ID) ||
+      //   (GLODA_DIRTY_PROPERTY Isnt 0)) &&
       //  (JUNK_SCORE_PROPERTY Isnt 100)
       // If the folder !isLocal we add the terms:
       //  - if the folder is offline -- && (Status Is nsMsgMessageFlags.Offline)
       //  - && (Status Isnt nsMsgMessageFlags.Expunged)
 
       let searchSession = Cc["@mozilla.org/messenger/searchSession;1"]
                             .createInstance(Ci.nsIMsgSearchSession);
       let searchTerms = Cc["@mozilla.org/array;1"]
@@ -756,17 +779,29 @@ var GlodaMsgIndexer = {
       searchTerm.op = nsMsgSearchOp.Is;
       let value = searchTerm.value;
       value.attrib = searchTerm.attrib;
       value.status = 0;
       searchTerm.value = value;
       searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY;
       searchTerms.appendElement(searchTerm, false);
 
-      //  second term: || GLODA_DIRTY_PROPERTY Isnt 0 )
+      // second term: || GLODA_MESSAGE_ID_PROPERTY Is GLODA_OLD_BAD_MESSAGE_ID
+      searchTerm = searchSession.createTerm();
+      searchTerm.booleanAnd = false; // OR
+      searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty;
+      searchTerm.op = nsMsgSearchOp.Is;
+      value = searchTerm.value;
+      value.attrib = searchTerm.attrib;
+      value.status = GLODA_OLD_BAD_MESSAGE_ID;
+      searchTerm.value = value;
+      searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY;
+      searchTerms.appendElement(searchTerm, false);
+
+      //  third term: || GLODA_DIRTY_PROPERTY Isnt 0 )
       searchTerm = searchSession.createTerm();
       searchTerm.booleanAnd = false;
       searchTerm.endsGrouping = true;
       searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty;
       searchTerm.op = nsMsgSearchOp.Isnt;
       value = searchTerm.value;
       value.attrib = searchTerm.attrib;
       value.status = 0;
@@ -935,19 +970,33 @@ var GlodaMsgIndexer = {
          onSchedule: this._schedule_messageIndex,
          jobCanceled: this._canceled_messageIndex,
          recover: this._recover_indexMessage,
          cleanup: this._cleanup_indexing,
        }],
       ["delete", {
          worker: this._worker_processDeletes,
        }],
+
+      ["fixMissingContacts", {
+        worker: this._worker_fixMissingContacts,
+       }],
     ];
   },
 
+  _schemaMigrationInitiated: false,
+  _considerSchemaMigration: function() {
+    if (!this._schemaMigrationInitiated &&
+        GlodaDatastore._actualSchemaVersion === 26) {
+      let job = new IndexingJob("fixMissingContacts", null);
+      GlodaIndexer.indexJob(job);
+      this._schemaMigrationInitiated = true;
+    }
+  },
+
   initialSweep: function() {
     this.indexingSweepNeeded = true;
   },
 
   _indexingSweepActive: false,
   /**
    * Indicate that an indexing sweep is desired.  We kick-off an indexing
    *  sweep at start-up and whenever we receive an event-based notification
@@ -1683,16 +1732,108 @@ var GlodaMsgIndexer = {
       deletedCollection = query.getCollection(aCallbackHandle);
       yield this.kWorkAsync;
     }
     this.pendingDeletions = false;
 
     yield this.kWorkDone;
   },
 
+  _worker_fixMissingContacts: function(aJob, aCallbackHandle) {
+    let identityContactInfos = [], fixedContacts = {};
+
+    // -- asynchronously get a list of all identities without contacts
+    // The upper bound on the number of messed up contacts is the number of
+    //  contacts in the user's address book.  This should be small enough
+    //  (and the data size small enough) that this won't explode thunderbird.
+    let queryStmt = GlodaDatastore._createAsyncStatement(
+      "SELECT identities.id, identities.contactID, identities.value " +
+        "FROM identities " +
+        "LEFT JOIN contacts ON identities.contactID = contacts.id " +
+        "WHERE identities.kind = 'email' AND contacts.id IS NULL",
+      true);
+    queryStmt.executeAsync({
+      handleResult: function(aResultSet) {
+        let row;
+        while ((row = aResultSet.getNextRow())) {
+          identityContactInfos.push({
+            identityId: row.getInt64(0),
+            contactId: row.getInt64(1),
+            email: row.getString(2)
+          });
+        }
+      },
+      handleError: function(aError) {
+      },
+      handleCompletion: function(aReason) {
+        GlodaDatastore._asyncCompleted();
+        aCallbackHandle.wrappedCallback();
+      },
+    });
+    queryStmt.finalize();
+    GlodaDatastore._pendingAsyncStatements++;
+    yield this.kWorkAsync;
+
+    // -- perform fixes only if there were missing contacts
+    if (identityContactInfos.length) {
+      const yieldEvery = 64;
+      // - create the missing contacts
+      for (let i = 0; i < identityContactInfos.length; i++) {
+        if ((i % yieldEvery) === 0)
+          yield this.kWorkSync;
+
+        let info = identityContactInfos[i],
+            card = GlodaUtils.getCardForEmail(info.email),
+            contact = new GlodaContact(
+              GlodaDatastore, info.contactId,
+              null, null,
+              card ? (card.displayName || info.email) : info.email,
+              0, 0);
+        GlodaDatastore.insertContact(contact);
+
+        // update the in-memory rep of the identity to know about the contact
+        //  if there is one.
+        let identity = GlodaCollectionManager.cacheLookupOne(
+                         Gloda.NOUN_IDENTITY, info.identityId, false);
+        if (identity) {
+          // Unfortunately, although this fixes the (reachable) Identity and
+          //  exposes the Contact, it does not make the Contact reachable from
+          //  the collection manager.  This will make explicit queries that look
+          //  up the contact potentially see the case where
+          //  contact.identities[0].contact !== contact.  Alternately, that
+          //  may not happen and instead the "contact" object we created above
+          //  may become unlinked.  (I'd have to trace some logic I don't feel
+          //  like tracing.)  Either way, The potential fallout is minimal
+          //  since the object identity invariant will just lapse and popularity
+          //  on the contact may become stale, and neither of those meaningfully
+          //  affect the operation of anything in Thunderbird.
+          // If we really cared, we could find all the dominant collections
+          //  that reference the identity and update their corresponding
+          //  contact collection to make it reachable.  That use-case does not
+          //  exist outside of here, which is why we're punting.
+          identity._contact = contact;
+          contact._identities = [identity];
+        }
+
+        // NOTE: If the addressbook indexer did anything useful other than
+        //  adapting to name changes, we could schedule indexing of the cards at
+        //  this time.  However, as of this writing, it doesn't, and this task
+        //  is a one-off relevant only to the time of this writing.
+      }
+
+      // - mark all folders as dirty, initiate indexing sweep
+      this.dirtyAllKnownFolders();
+      this.indexingSweepNeeded = true;
+    }
+
+    // -- mark the schema upgrade, be done
+    GlodaDatastore._updateSchemaVersion(GlodaDatastore._schemaVersion);
+    yield this.kWorkDone;
+  },
+
   /**
    * Determine whether a folder is suitable for indexing.
    *
    * @param aMsgFolder An nsIMsgFolder you want to see if we should index.
    *
    * @returns true if we want to index messages in this type of folder, false if
    *     we do not.
    */
@@ -1876,16 +2017,36 @@ var GlodaMsgIndexer = {
   indexMessages: function gloda_index_indexMessages(aFoldersAndMessages) {
     let job = new IndexingJob("message", null);
     job.items = [[GlodaDatastore._mapFolder(fm[0]).id, fm[1]] for each
                  ([i, fm] in Iterator(aFoldersAndMessages))];
     GlodaIndexer.indexJob(job);
   },
 
   /**
+   * Mark all known folders as dirty so that the next indexing sweep goes
+   *  into all folders and checks their contents to see if they need to be
+   *  indexed.
+   *
+   * This is being added for the migration case where we want to try and reindex
+   *  all of the messages that had been marked with GLODA_BAD_MESSAGE_ID but
+   *  which is now GLODA_OLD_BAD_MESSAGE_ID and so we should attempt to reindex
+   *  them.
+   */
+  dirtyAllKnownFolders: function gloda_index_msg_dirtyAllKnownFolders() {
+    // Just iterate over the datastore's folder map and tell each folder to
+    //  be dirty if its priority is not disabled.
+    for each (let [folderID, glodaFolder] in
+              Iterator(GlodaDatastore._folderByID)) {
+      if (glodaFolder.indexingPriority !== glodaFolder.kIndexingNeverPriority)
+        glodaFolder._ensureFolderDirty();
+    }
+  },
+
+  /**
    * Given a message header, return whether this message is likely to have
    * been indexed or not.
    *
    * This means the message must:
    * - Be in a folder eligible for gloda indexing. (Not News, etc.)
    * - Be in a non-filthy folder.
    * - Be gloda-indexed and non-filthy.
    *
--- a/mailnews/db/gloda/modules/indexer.js
+++ b/mailnews/db/gloda/modules/indexer.js
@@ -548,19 +548,20 @@ var GlodaIndexer = {
 
       // if we have an accumulated desire to index things, kick it off again.
       if (this._indexingDesired) {
         this._indexingDesired = false; // it's edge-triggered for now
         this.indexing = true;
       }
 
       // if we have not done an initial sweep, schedule scheduling one.
-      if (!this._initialSweepPerformed)
+      if (!this._initialSweepPerformed) {
         this._longTimer.initWithCallback(this._scheduleInitialSweep,
           this._INITIAL_SWEEP_DELAY, Ci.nsITimer.TYPE_ONE_SHOT);
+      }
     }
     else if (this._enabled && !aEnable) {
       for each (let [iIndexer, indexer] in Iterator(this._indexers)) {
         try {
           indexer.disable();
         } catch (ex) {
           this._log.warn("Helper indexer threw exception on disable: " + ex);
         }
@@ -1448,9 +1449,11 @@ var GlodaIndexer = {
     // shutdown fallback
     else if (aTopic == "quit-application") {
       this._shutdown();
     }
   },
 
 
 };
-GlodaIndexer._init();
+// we used to initialize here; now we have public.js do it for us after the
+//  indexers register themselves so we know about all our built-in indexers
+//  at init-time.
--- a/mailnews/db/gloda/modules/public.js
+++ b/mailnews/db/gloda/modules/public.js
@@ -40,16 +40,20 @@ const EXPORTED_SYMBOLS = ["Gloda"];
 const Cc = Components.classes;
 const Ci = Components.interfaces;
 const Cr = Components.results;
 const Cu = Components.utils;
 
 Cu.import("resource:///modules/gloda/gloda.js");
 Cu.import("resource:///modules/gloda/everybody.js");
 Cu.import("resource:///modules/gloda/indexer.js");
+// initialize the indexer! (who was actually imported as a nested dep by the
+//  things everybody.js imported.)  We waited until now so it could know about
+//  its indexers.
+GlodaIndexer._init();
 Cu.import("resource:///modules/gloda/index_msg.js");
 
 /**
  * Expose some junk
  */
 function proxy(aSourceObj, aSourceAttr, aDestObj, aDestAttr) {
   aDestObj[aDestAttr] = function() {
     return aSourceObj[aSourceAttr].apply(aSourceObj, arguments);
--- a/mailnews/db/gloda/test/unit/resources/glodaTestHelper.js
+++ b/mailnews/db/gloda/test/unit/resources/glodaTestHelper.js
@@ -142,17 +142,18 @@ Components.utils.import("resource:///mod
 Components.utils.import("resource:///modules/gloda/log4moz.js");
 let throwingAppender = new Log4Moz.ThrowingAppender(do_throw);
 throwingAppender.level = Log4Moz.Level.Warn;
 Log4Moz.repository.rootLogger.addAppender(throwingAppender);
 
 var LOG = Log4Moz.repository.getLogger("gloda.test");
 
 // index_msg does not export this, so we need to provide it.
-const GLODA_BAD_MESSAGE_ID = 1;
+const GLODA_BAD_MESSAGE_ID = 2,
+      GLODA_OLD_BAD_MESSAGE_ID = 1;
 
 // -- Add a hook that makes folders not filthy when we first see them.
 register_message_injection_listener({
   /**
    * By default all folders start out filthy.  This is great in the real world
    *  but I went and wrote all the unit tests without entirely thinking about
    *  how this affected said unit tests.  So we add a listener so that we can
    *  force the folders to be clean.
@@ -1054,16 +1055,47 @@ function _SqlExpectationListener(aExpect
                     "does not match expected count of", this.expectedCount,
                     "from caller", this.callerStackFrame,
                     "SQL", this.sqlDesc]);
     async_driver();
   },
 };
 
 /**
+ * Asynchronously run a SQL statement against the gloda database.  This can grow
+ *  binding logic and data returning as needed.
+ *
+ * We run the statement asynchronously to get a consistent view of the database.
+ */
+function sqlRun(sql) {
+  let conn = GlodaDatastore.asyncConnection;
+  let stmt = conn.createAsyncStatement(sql), rows = null;
+
+  mark_action("glodaTestHelper", "running SQL", [sql]);
+  stmt.executeAsync({
+    handleResult: function(aResultSet) {
+      if (!rows)
+        rows = [];
+      let row;
+      while ((row = aResultSet.getNextRow())) {
+        rows.push(row);
+      }
+    },
+    handleError: function(aError) {
+      mark_failure(["SQL error! result:", aError, "SQL: ", sql]);
+    },
+    handleCompletion: function() {
+      async_driver(rows);
+    }
+  });
+  stmt.finalize();
+  return false;
+}
+
+/**
  * Resume execution when the db has run all the async statements whose execution
  *  was queued prior to this call.  We trigger a commit to accomplish this,
  *  although this could also be accomplished without a commit.  (Though we would
  *  have to reach into datastore.js and get at the raw connection or extend
  *  datastore to provide a way to accomplish this.)
  */
 function wait_for_gloda_db_flush() {
   // we already have a mechanism to do this by forcing a commit.  arguably,
@@ -1280,16 +1312,21 @@ function nukeGlodaCachesAndCollections()
 
 /**
  * Add a name-and-address pair as generated by `makeNameAndAddress` to the
  *  personal address book.
  */
 function makeABCardForAddressPair(nameAndAddress) {
   let abManager = Components.classes["@mozilla.org/abmanager;1"]
                             .getService(Components.interfaces.nsIAbManager);
+  // XXX bug 314448 demands that we trigger creation of the ABs...  If we don't
+  //  do this, then the call to addCard will fail if someone else hasn't tickled
+  //  this.
+  abManager.directories;
+
   // kPABData is from abSetup.js
   let addressBook = abManager.getDirectory(kPABData.URI);
 
   let card = Components.classes["@mozilla.org/addressbook/cardproperty;1"]
                        .createInstance(Components.interfaces.nsIAbCard);
   card.displayName = nameAndAddress[0];
   card.primaryEmail = nameAndAddress[1];
 
new file mode 100644
--- /dev/null
+++ b/mailnews/db/gloda/test/unit/test_migration.js
@@ -0,0 +1,115 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test migration logic by artificially inducing or simulating the problem, then
+ *  trigger the migration logic, then verify things ended up correct, including
+ *  the schema version so a second pass of the logic doesn't happen.  (As
+ *  opposed to checking in an example of a broken database and running against
+ *  that.)
+ **/
+
+load("resources/glodaTestHelper.js");
+
+/**
+ * Fix the fallout from bug 732372 (with this patch for bug 734507) which left
+ *  identities whose e-mails were in the address book without contacts and then
+ *  broke messages involving them.
+ */
+function test_fix_missing_contacts_and_fallout() {
+  // -- setup
+
+  // - Create 4 e-mail addresses, 2 of which are in the address book.  (We want
+  //    to make sure we have to iterate, hence >1).
+  let abPeeps = gMessageGenerator.makeNamesAndAddresses(2),
+      nonAbPeeps = gMessageGenerator.makeNamesAndAddresses(2);
+  makeABCardForAddressPair(abPeeps[0]);
+  makeABCardForAddressPair(abPeeps[1]);
+
+  // - Create messages of the genres [from, to]: [inAB, inAB], [inAB, !inAB],
+  //    [!inAB, inAB], [!inAB, !inAB].  The permutations are black box overkill.
+  // smear the messages over multiple folders for realism
+  let [synFolders, yesyesMsgSet, yesnoMsgSet, noyesMsgSet, nonoMsgSet] =
+    make_folders_with_sets(3, [
+      { count: 2, from: abPeeps[0], to: [abPeeps[1]] },
+      { count: 2, from: abPeeps[1], to: nonAbPeeps },
+      { count: 2, from: nonAbPeeps[0], to: abPeeps },
+      { count: 2, from: nonAbPeeps[1], to: [nonAbPeeps[0]] },
+      ]);
+
+  yield wait_for_message_injection();
+  // union the yeses together; we don't care about their composition
+  let yesMsgSet = yesyesMsgSet.union(yesnoMsgSet).union(noyesMsgSet),
+      noMsgSet = nonoMsgSet;
+
+  // - Let gloda index the messages so the identities get created.
+  yield wait_for_gloda_indexer([yesMsgSet, noMsgSet], { augment: true });
+  // the messages are now indexed and the contacts created
+
+  // - Compel an indexing sweep so the folder's dirty statuses get cleared
+  GlodaMsgIndexer.initialSweep();
+  yield wait_for_gloda_indexer(); // (no new messages to index)
+
+  // - Force a DB commit so the pending commit tracker gets emptied out
+  // (otherwise we need to worry about its state overriding our clobbering)
+  yield wait_for_gloda_db_flush();
+
+  // - delete the contact records for the people in the address book.
+  yield sqlRun("DELETE FROM contacts WHERE id IN (" +
+               yesMsgSet.glodaMessages[0].from.contact.id + ", " +
+               yesMsgSet.glodaMessages[0].to[0].contact.id + ")");
+
+  // - Nuke the gloda caches so we totally forget those contact records.
+  nukeGlodaCachesAndCollections();
+
+  // - Manually mark the messages involving the inAB people with the _old_ bad
+  //    id marker so that our scan will see them.
+  for each (let msgHdr in yesMsgSet.msgHdrs) {
+    msgHdr.setUint32Property("gloda-id", GLODA_OLD_BAD_MESSAGE_ID);
+  }
+
+  // - mark the db schema version to the version with the bug (26)
+  // sanity check that gloda actually populates the value with the current
+  //  version correctly...
+  do_check_eq(GlodaDatastore._actualSchemaVersion,
+              GlodaDatastore._schemaVersion);
+  GlodaDatastore._actualSchemaVersion = 26;
+  yield sqlRun("PRAGMA user_version = 26");
+  // make sure that took, since we check it below as a success indicator.
+  let verRows = yield sqlRun("PRAGMA user_version");
+  do_check_eq(verRows[0].getInt64(0), 26);
+
+
+  // -- test
+  // - trigger the migration logic and request an indexing sweep
+  GlodaMsgIndexer.disable();
+  GlodaMsgIndexer.enable();
+  GlodaMsgIndexer.initialSweep();
+
+  // - wait for the indexer to complete, expecting that the messages that we
+  //    marked bad will get indexed but not the good messages.
+  yield wait_for_gloda_indexer(yesMsgSet, { augment: true });
+
+  // - verify that the identities have contacts again
+  // must have the contact object
+  do_check_neq(yesMsgSet.glodaMessages[0].from.contact, undefined);
+  // the contact's name should come from the address book card
+  do_check_eq(yesMsgSet.glodaMessages[0].from.contact.name, abPeeps[0][0]);
+
+  // - verify that the schema version changed from gloda's perspective and from
+  //    the db's perspective
+  verRows = yield sqlRun("PRAGMA user_version");
+  do_check_eq(verRows[0].getInt64(0), GlodaDatastore._schemaVersion);
+  do_check_eq(GlodaDatastore._actualSchemaVersion,
+              GlodaDatastore._schemaVersion);
+}
+
+var tests = [
+  test_fix_missing_contacts_and_fallout,
+];
+
+function run_test() {
+  configure_message_injection({mode: "local"});
+  glodaHelperRunTests(tests);
+}
--- a/mailnews/db/gloda/test/unit/xpcshell.ini
+++ b/mailnews/db/gloda/test/unit/xpcshell.ini
@@ -15,16 +15,17 @@ tail = tail_gloda.js
 [test_index_junk_imap_online.js]
 [test_index_junk_local.js]
 [test_index_messages_imap_offline.js]
 [test_index_messages_imap_online.js]
 [test_index_messages_imap_online_to_offline.js]
 [test_index_messages_local.js]
 [test_index_sweep_folder.js]
 [test_intl.js]
+[test_migration.js]
 [test_mime_attachments_size.js]
 [test_mime_emitter.js]
 [test_msg_search.js]
 [test_noun_mimetype.js]
 [test_query_core.js]
 [test_query_messages_imap_offline.js]
 [test_query_messages_imap_online.js]
 [test_query_messages_imap_online_to_offline.js]
--- a/mailnews/test/resources/asyncTestUtils.js
+++ b/mailnews/test/resources/asyncTestUtils.js
@@ -76,17 +76,17 @@ var asyncCopyListener = {
   OnProgress: function(aProgress, aProgressMax) {},
   SetMessageKey: function(aMsgKey) {},
   GetMessageId: function() {},
   OnStopCopy: function(aStatus) {
     async_driver();
   }
 };
 
-var asyncGeneratorStack = [];
+var asyncGeneratorStack = [], asyncGeneratorSendValue = undefined;
 
 /**
  * Run a function that may or may not be a generator.  All functions, generator
  *  or not, must return false if they do not want the async_driver to run the
  *  next logical work step.  Returning no value (undefined) or true indicates to
  *  the async_driver that it should execute the next work step.  You would
  *  return false if your function initiates an asynchronous operation and will
  *  ensure that async_driver is called when the async operation completes. (This
@@ -127,17 +127,18 @@ function async_run(aArgs) {
  *  async_driver() is called again once the async operation completes.
  *
  * Note: This function actually schedules the real driver to run after a
  *  timeout. This is to ensure that if you call us from a notification event
  *  that all the other things getting notified get a chance to do their work
  *  before we actually continue execution.  It also keeps our stack traces
  *  cleaner.
  */
-function async_driver() {
+function async_driver(val) {
+  asyncGeneratorSendValue = val;
   do_execute_soon(_async_driver);
   return false;
 }
 
 /**
  * Sentinel object that test helpers can throw to cause a generator stack to
  *  abort without triggering a test failure.  You would use this when you wrap
  *  a utility function to inject faults into the calling function so that you
@@ -147,18 +148,20 @@ function async_driver() {
 var asyncExpectedEarlyAbort = "REAP!";
 
 // the real driver!
 function _async_driver() {
   let curGenerator;
   while (asyncGeneratorStack.length) {
     curGenerator = asyncGeneratorStack[asyncGeneratorStack.length-1][0];
     try {
-      while (curGenerator.next()) {
+      while (curGenerator.send(asyncGeneratorSendValue)) {
+        asyncGeneratorSendValue = undefined;
       }
+      asyncGeneratorSendValue = undefined;
       return false;
     }
     catch (ex) {
       if (ex != StopIteration && ex != asyncExpectedEarlyAbort) {
         let asyncStack = [];
         dump("*******************************************\n");
         dump("Generator explosion!\n");
         dump("Unhappiness at: " + ex.fileName + ":" + ex.lineNumber + "\n");