Bug 1356812 - Use telemetry to report unfixable corrupt Places databases. r=bsmedberg, r=past, a=gchang
☠☠ backed out by f2661f7663e0 ☠ ☠
authorMarco Bonardo <mbonardo@mozilla.com>
Fri, 23 Jun 2017 18:15:46 +0200
changeset 356124 4e4f867b0c22ea5fc3694f03ccea54334a1c87e3
parent 356123 b6c9427fffe975a87b76554964faded29f97f810
child 356125 f2661f7663e05cc8b9ba8fdca70acc6d7c770623
push id7217
push userryanvm@gmail.com
push dateWed, 19 Jul 2017 21:21:43 +0000
treeherdermozilla-esr52@4e4f867b0c22 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbsmedberg, past, gchang
bugs1356812
milestone52.2.1
Bug 1356812 - Use telemetry to report unfixable corrupt Places databases. r=bsmedberg, r=past, a=gchang MozReview-Commit-ID: EZKfMlnCKCf
toolkit/components/places/Database.cpp
toolkit/components/places/Database.h
toolkit/components/places/tests/unit/corruptDB.sqlite
toolkit/components/places/tests/unit/test_corrupt_telemetry.js
toolkit/components/places/tests/unit/xpcshell.ini
toolkit/components/telemetry/Histograms.json
--- a/toolkit/components/places/Database.cpp
+++ b/toolkit/components/places/Database.cpp
@@ -591,57 +591,66 @@ Database::BackupAndReplaceDatabaseFile(n
     nsCOMPtr<nsIFile> backup;
     (void)aStorage->BackupDatabaseFile(databaseFile, DATABASE_CORRUPT_FILENAME,
                                        profDir, getter_AddRefs(backup));
   }
 
   // If anything fails from this point on, we have a stale connection or
   // database file, and there's not much more we can do.
   // The only thing we can try to do is to replace the database on the next
-  // start, and enforce a crash, so it gets reported to us.
+  // startup, and report the problem through telemetry.
+  {
+    enum eCorruptDBReplaceStage : int8_t {
+      stage_closing = 0,
+      stage_removing,
+      stage_reopening,
+      stage_replaced
+    };
+    eCorruptDBReplaceStage stage = stage_closing;
+    auto guard = MakeScopeExit([&]() {
+      if (stage != stage_replaced) {
+        // Reaching this point means the database is corrupt and we failed to
+        // replace it.  For this session part of the application related to
+        // bookmarks and history will misbehave.  The frontend may show a
+        // "locked" notification to the user though.
+        // Set up a pref to try replacing the database at the next startup.
+        Preferences::SetBool(PREF_FORCE_DATABASE_REPLACEMENT, true);
+      }
+      // Report the corruption through telemetry.
+      Telemetry::Accumulate(Telemetry::PLACES_DATABASE_CORRUPTION_HANDLING_STAGE,
+                            static_cast<int8_t>(stage));
+    });
 
-  // Close database connection if open.
-  if (mMainConn) {
-    rv = mMainConn->Close();
-    NS_ENSURE_SUCCESS(rv, ForceCrashAndReplaceDatabase(
-      NS_LITERAL_CSTRING("Unable to close the corrupt database.")));
-  }
+    // Close database connection if open.
+    if (mMainConn) {
+      rv = mMainConn->Close();
+      NS_ENSURE_SUCCESS(rv, rv);
+    }
 
-  // Remove the broken database.
-  rv = databaseFile->Remove(false);
-  if (NS_FAILED(rv) && rv != NS_ERROR_FILE_TARGET_DOES_NOT_EXIST) {
-    return ForceCrashAndReplaceDatabase(
-      NS_LITERAL_CSTRING("Unable to remove the corrupt database file."));
+    // Remove the broken database.
+    stage = stage_removing;
+    rv = databaseFile->Remove(false);
+    if (NS_FAILED(rv) && rv != NS_ERROR_FILE_TARGET_DOES_NOT_EXIST) {
+      return rv;
+    }
+
+    // Create a new database file.
+    // Use an unshared connection, it will consume more memory but avoid shared
+    // cache contentions across threads.
+    stage = stage_reopening;
+    rv = aStorage->OpenUnsharedDatabase(databaseFile, getter_AddRefs(mMainConn));
+    NS_ENSURE_SUCCESS(rv, rv);
+
+    stage = stage_replaced;
   }
 
-  // Create a new database file.
-  // Use an unshared connection, it will consume more memory but avoid shared
-  // cache contentions across threads.
-  rv = aStorage->OpenUnsharedDatabase(databaseFile, getter_AddRefs(mMainConn));
-  NS_ENSURE_SUCCESS(rv, ForceCrashAndReplaceDatabase(
-    NS_LITERAL_CSTRING("Unable to open a new database connection.")));
-
   return NS_OK;
 }
 
 nsresult
-Database::ForceCrashAndReplaceDatabase(const nsCString& aReason)
-{
-  Preferences::SetBool(PREF_FORCE_DATABASE_REPLACEMENT, true);
-  // Ensure that prefs get saved, or we could crash before storing them.
-  nsIPrefService* prefService = Preferences::GetService();
-  if (prefService && NS_SUCCEEDED(prefService->SavePrefFile(nullptr))) {
-    // We could force an application restart here, but we'd like to get these
-    // cases reported to us, so let's force a crash instead.
-    MOZ_CRASH_UNSAFE_OOL(aReason.get());
-  }
-  return NS_ERROR_FAILURE;
-}
-
-nsresult
 Database::InitSchema(bool* aDatabaseMigrated)
 {
   MOZ_ASSERT(NS_IsMainThread());
   *aDatabaseMigrated = false;
 
   // WARNING: any statement executed before setting the journal mode must be
   // finalized, since SQLite doesn't allow changing the journal mode if there
   // is any outstanding statement.
--- a/toolkit/components/places/Database.h
+++ b/toolkit/components/places/Database.h
@@ -220,22 +220,16 @@ protected:
    * one.
    *
    * @param aStorage
    *        mozStorage service instance.
    */
   nsresult BackupAndReplaceDatabaseFile(nsCOMPtr<mozIStorageService>& aStorage);
 
   /**
-   * This should be used as a last resort in case the database is corrupt and
-   * there's no way to fix it in-place.
-   */
-  nsresult ForceCrashAndReplaceDatabase(const nsCString& aReason);
-
-  /**
    * Initializes the database.  This performs any necessary migrations for the
    * database.  All migration is done inside a transaction that is rolled back
    * if any error occurs.
    * @param aDatabaseMigrated
    *        Whether a schema upgrade happened.
    */
   nsresult InitSchema(bool* aDatabaseMigrated);
 
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..b234246cac90b17d43d3955156d94276b7e3b503
GIT binary patch
literal 32772
zc%1Fm+i%)d90zdYq?C4JZF<Y@J1<k0312Y85TvRl#fh?#EMtz=R*|X``v8_Ow(Nr-
z+g_qc`$x97N&92^x~D!(+CQ+@{RlX1wkfTmX;Q22NBKg|g>x}F2EP8P9Z12tQ4(^=
zDx7m=QOV_$dyFXxvy_=d$|scK{QpdjQczw&aXyN=_bR>N4Uqd~`S$JHk5?o=7>F?X
z_K6>jhl7aw>8XNxzvDDrhq=x5w!=;}vCjgZZSA<urqf}&ovrO==Mj70JSwpv4+V3b
zhi-|b(J1k<g5*h0$gG7*(haERe2fwkL;rLI^^q?W6VVk(B9i$CVkiT7xYf!=7h$~1
zWq)4uMoF@MC`Ed*#+@%+wzJo6vzD{b+-tkcEU}PJUeZY`Q!iSTdl+Z?i02FOn{hk{
zJT8N1_>U&<xc02M`A9|`k&d44_+PrMOwXp8`(9C3=mE~*k8{8B==1!Nvf|y%=P#wj
zqN)~)g#w*{U@+iX+1BG>uP<esHVorn)H|-$=goSQ^o-M8yTL$EPh+F;w+3bL-qY`X
zd;0kJ`PmXXbIi0Kjh|49?)8GZfq$oJ+SRqH+EQCTxEF3HTSsA<&*zSokBZ*H!kz6x
z;hSJcM>c)9t36m>vrN0bs!i&)=Ire0Vj`w<(Nx)?*2BO{qBQDC-HXDLYQr`v1~+W_
z0IJn}Pb+unp~^)?<NkrInO3d4+U@#fuWT7-&zD^Ur(v|R8_7#vLYIaA^k@0^>GRK<
zSAV@YU-Ni8)~72S^F+j4di~=ir-BYX-4={VJf01!jp9_qrmba7dS8ZvPc5rftyF5f
z>iO2%s%MuQb+b~VYNK2+t$Nio-2SM<EQ@_H8Zyf?ZD!URW~E`)+2*$EH1<z6uz&Uj
zu0N~OAAf$YFd|P1siiUzJiOo7-%X-k!b7$}7bo4{p1s>tqI*d+`40~Q0KA?)rhG)X
zLb*<PoAQn_`|5s;@)qSX<pSjr<*G9K?thVTgL0E{i?U34m+~IveaZ)v4=GC&rp$gv
zOy|l|7AWffur~kz00000000000000000000000000000000000000000000000000
W000000000000000004k<yZje$IAS{h
new file mode 100644
--- /dev/null
+++ b/toolkit/components/places/tests/unit/test_corrupt_telemetry.js
@@ -0,0 +1,31 @@
+/* Any copyright is dedicated to the Public Domain.
+   http://creativecommons.org/publicdomain/zero/1.0/ */
+
+// Tests that history initialization correctly handles a request to forcibly
+// replace the current database.
+
+add_task(async function() {
+  let profileDBPath = await OS.Path.join(OS.Constants.Path.profileDir, "places.sqlite");
+  await OS.File.remove(profileDBPath, {ignoreAbsent: true});
+  // Ensure that our database doesn't already exist.
+  Assert.ok(!(await OS.File.exists(profileDBPath)), "places.sqlite shouldn't exist");
+  let dir = await OS.File.getCurrentDirectory();
+  let src = OS.Path.join(dir, "corruptDB.sqlite");
+  await OS.File.copy(src, profileDBPath);
+  Assert.ok(await OS.File.exists(profileDBPath), "places.sqlite should exist");
+
+  let count = Services.telemetry
+                      .getHistogramById("PLACES_DATABASE_CORRUPTION_HANDLING_STAGE")
+                      .snapshot()
+                      .counts[3];
+  Assert.equal(count, 0, "There should be no telemetry");
+
+  do_check_eq(PlacesUtils.history.databaseStatus,
+              PlacesUtils.history.DATABASE_STATUS_CORRUPT);
+
+  count = Services.telemetry
+                  .getHistogramById("PLACES_DATABASE_CORRUPTION_HANDLING_STAGE")
+                  .snapshot()
+                  .counts[3];
+  Assert.equal(count, 1, "Telemetry should have been added");
+});
--- a/toolkit/components/places/tests/unit/xpcshell.ini
+++ b/toolkit/components/places/tests/unit/xpcshell.ini
@@ -4,16 +4,17 @@ tail =
 firefox-appdir = browser
 skip-if = toolkit == 'android'
 support-files =
   bookmarks.corrupt.html
   bookmarks.json
   bookmarks.preplaces.html
   bookmarks_html_singleframe.html
   bug476292.sqlite
+  corruptDB.sqlite
   default.sqlite
   livemark.xml
   mobile_bookmarks_folder_import.json
   mobile_bookmarks_folder_merge.json
   mobile_bookmarks_multiple_folders.json
   mobile_bookmarks_root_import.json
   mobile_bookmarks_root_merge.json
   nsDummyObserver.js
@@ -82,16 +83,17 @@ skip-if = (os == "win" && os_version == 
 [test_bookmarks_html_import_tags.js]
 [test_bookmarks_html_singleframe.js]
 [test_bookmarks_restore_notification.js]
 [test_bookmarks_setNullTitle.js]
 [test_broken_folderShortcut_result.js]
 [test_browserhistory.js]
 [test_bug636917_isLivemark.js]
 [test_childlessTags.js]
+[test_corrupt_telemetry.js]
 [test_crash_476292.js]
 [test_database_replaceOnStartup.js]
 [test_download_history.js]
 # Bug 676989: test fails consistently on Android
 fail-if = os == "android"
 [test_frecency.js]
 [test_frecency_zero_updated.js]
 # Bug 676989: test hangs consistently on Android
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -4259,16 +4259,25 @@
   },
   "CSP_UNSAFE_EVAL_DOCUMENTS_COUNT": {
     "alert_emails": ["seceng@mozilla.com"],
     "bug_numbers": [1252829],
     "expires_in_version": "55",
     "kind": "count",
     "description": "Number of unique pages that contain an unsafe-eval CSP directive"
   },
+  "PLACES_DATABASE_CORRUPTION_HANDLING_STAGE": {
+    "alert_emails": ["firefox-dev@mozilla.org"],
+    "bug_numbers": [1356812],
+    "expires_in_version": "never",
+    "kind": "enumerated",
+    "n_values": 6,
+    "releaseChannelCollection": "opt-out",
+    "description": "PLACES: stage reached when trying to fix a database corruption , see Places::Database::eCorruptDBReplaceStatus"
+  },
   "PLACES_PAGES_COUNT": {
     "expires_in_version": "never",
     "kind": "exponential",
     "low": 1000,
     "high": 150000,
     "n_buckets": 20,
     "releaseChannelCollection": "opt-out",
     "description": "PLACES: Number of unique pages"