Bug 947745 - Clean up the seer database when it gets too big. r=honzab
authorNicholas Hurley <hurley@todesschaf.org>
Fri, 17 Jan 2014 17:45:48 -0800
changeset 164112 5bf92bd8d7ed4ee2ef372c421279dba54e166c77
parent 164111 a61079647dc5fcd265a3b7d2f4c8b7cf6ba45af8
child 164113 c5684d09fa1b7a75fe3b96718147742479eab48d
push id26026
push userphilringnalda@gmail.com
push dateSat, 18 Jan 2014 23:17:27 +0000
treeherdermozilla-central@61fd0f987cf2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewershonzab
bugs947745
milestone29.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 947745 - Clean up the seer database when it gets too big. r=honzab
b2g/app/b2g.js
mobile/android/app/mobile.js
modules/libpref/src/init/all.js
netwerk/base/src/Seer.cpp
netwerk/base/src/Seer.h
toolkit/components/telemetry/Histograms.json
--- a/b2g/app/b2g.js
+++ b/b2g/app/b2g.js
@@ -63,16 +63,20 @@ pref("network.http.max-persistent-connec
 
 // spdy
 pref("network.http.spdy.push-allowance", 32768);
 
 // See bug 545869 for details on why these are set the way they are
 pref("network.buffer.cache.count", 24);
 pref("network.buffer.cache.size",  16384);
 
+// predictive actions
+pref("network.seer.max-db-size", 2097152); // bytes
+pref("network.seer.preserve", 50); // percentage of seer data to keep when cleaning up
+
 /* session history */
 pref("browser.sessionhistory.max_total_viewers", 1);
 pref("browser.sessionhistory.max_entries", 50);
 
 /* session store */
 pref("browser.sessionstore.resume_session_once", false);
 pref("browser.sessionstore.resume_from_crash", true);
 pref("browser.sessionstore.resume_from_crash_timeout", 60); // minutes
--- a/mobile/android/app/mobile.js
+++ b/mobile/android/app/mobile.js
@@ -97,16 +97,20 @@ pref("network.http.max-persistent-connec
 
 // spdy
 pref("network.http.spdy.push-allowance", 32768);
 
 // See bug 545869 for details on why these are set the way they are
 pref("network.buffer.cache.count", 24);
 pref("network.buffer.cache.size",  16384);
 
+// predictive actions
+pref("network.seer.max-db-size", 2097152); // bytes
+pref("network.seer.preserve", 50); // percentage of seer data to keep when cleaning up
+
 /* history max results display */
 pref("browser.display.history.maxresults", 100);
 
 /* How many times should have passed before the remote tabs list is refreshed */
 pref("browser.display.remotetabs.timeout", 10);
 
 /* session history */
 pref("browser.sessionhistory.max_total_viewers", 1);
--- a/modules/libpref/src/init/all.js
+++ b/modules/libpref/src/init/all.js
@@ -1268,16 +1268,18 @@ pref("network.seer.subresource-degradati
 pref("network.seer.subresource-degradation.week", 10);
 pref("network.seer.subresource-degradation.month", 25);
 pref("network.seer.subresource-degradation.year", 50);
 pref("network.seer.subresource-degradation.max", 100);
 pref("network.seer.preconnect-min-confidence", 90);
 pref("network.seer.preresolve-min-confidence", 60);
 pref("network.seer.redirect-likely-confidence", 75);
 pref("network.seer.max-queue-size", 50);
+pref("network.seer.max-db-size", 157286400); // bytes
+pref("network.seer.preserve", 80); // percentage of seer data to keep when cleaning up
 
 
 // The following prefs pertain to the negotiate-auth extension (see bug 17578),
 // which provides transparent Kerberos or NTLM authentication using the SPNEGO
 // protocol.  Each pref is a comma-separated list of keys, where each key has
 // the format:
 //   [scheme "://"] [host [":" port]]
 // For example, "foo.com" would match "http://www.foo.com/bar", etc.
--- a/netwerk/base/src/Seer.cpp
+++ b/netwerk/base/src/Seer.cpp
@@ -95,16 +95,21 @@ const char SEER_PRERESOLVE_MIN_PREF[] =
 const int PRERESOLVE_MIN_DEFAULT = 60;
 const char SEER_REDIRECT_LIKELY_PREF[] =
   "network.seer.redirect-likely-confidence";
 const int REDIRECT_LIKELY_DEFAULT = 75;
 
 const char SEER_MAX_QUEUE_SIZE_PREF[] = "network.seer.max-queue-size";
 const uint32_t SEER_MAX_QUEUE_SIZE_DEFAULT = 50;
 
+const char SEER_MAX_DB_SIZE_PREF[] = "network.seer.max-db-size";
+const int32_t SEER_MAX_DB_SIZE_DEFAULT_BYTES = 150 * 1024 * 1024;
+const char SEER_PRESERVE_PERCENTAGE_PREF[] = "network.seer.preserve";
+const int32_t SEER_PRESERVE_PERCENTAGE_DEFAULT = 80;
+
 // All these time values are in usec
 const long long ONE_DAY = 86400LL * 1000000LL;
 const long long ONE_WEEK = 7LL * ONE_DAY;
 const long long ONE_MONTH = 30LL * ONE_DAY;
 const long long ONE_YEAR = 365LL * ONE_DAY;
 
 const long STARTUP_WINDOW = 5L * 60L * 1000000L; // 5min
 
@@ -187,16 +192,20 @@ Seer::Seer()
   ,mPreresolveMinConfidence(PRERESOLVE_MIN_DEFAULT)
   ,mRedirectLikelyConfidence(REDIRECT_LIKELY_DEFAULT)
   ,mMaxQueueSize(SEER_MAX_QUEUE_SIZE_DEFAULT)
   ,mStatements(mDB)
   ,mLastStartupTime(0)
   ,mStartupCount(0)
   ,mQueueSize(0)
   ,mQueueSizeLock("Seer.mQueueSizeLock")
+  ,mCleanupScheduled(false)
+  ,mMaxDBSize(SEER_MAX_DB_SIZE_DEFAULT_BYTES)
+  ,mPreservePercentage(SEER_PRESERVE_PERCENTAGE_DEFAULT)
+  ,mLastCleanupTime(0)
 {
 #if defined(PR_LOGGING)
   gSeerLog = PR_NewLogModule("NetworkSeer");
 #endif
 
   MOZ_ASSERT(!gSeer, "multiple Seer instances!");
   gSeer = this;
 }
@@ -271,16 +280,22 @@ Seer::InstallObserver()
                               PRERESOLVE_MIN_DEFAULT);
   Preferences::AddIntVarCache(&mRedirectLikelyConfidence,
                               SEER_REDIRECT_LIKELY_PREF,
                               REDIRECT_LIKELY_DEFAULT);
 
   Preferences::AddIntVarCache(&mMaxQueueSize, SEER_MAX_QUEUE_SIZE_PREF,
                               SEER_MAX_QUEUE_SIZE_DEFAULT);
 
+  Preferences::AddIntVarCache(&mMaxDBSize, SEER_MAX_DB_SIZE_PREF,
+                              SEER_MAX_DB_SIZE_DEFAULT_BYTES);
+  Preferences::AddIntVarCache(&mPreservePercentage,
+                              SEER_PRESERVE_PERCENTAGE_PREF,
+                              SEER_PRESERVE_PERCENTAGE_DEFAULT);
+
   return rv;
 }
 
 void
 Seer::RemoveObserver()
 {
   MOZ_ASSERT(NS_IsMainThread(), "Removing observer off main thread");
 
@@ -315,16 +330,17 @@ public:
 };
 
 class SeerNewTransactionEvent : public nsRunnable
 {
   NS_IMETHODIMP Run() MOZ_OVERRIDE
   {
     gSeer->CommitTransaction();
     gSeer->BeginTransaction();
+    gSeer->MaybeScheduleCleanup();
     nsRefPtr<SeerCommitTimerInitEvent> event = new SeerCommitTimerInitEvent();
     NS_DispatchToMainThread(event);
     return NS_OK;
   }
 };
 
 NS_IMETHODIMP
 Seer::Observe(nsISupports *subject, const char *topic,
@@ -569,16 +585,21 @@ Seer::EnsureInitStorage()
                          "ON moz_hosts (id, origin);"));
   NS_ENSURE_SUCCESS(rv, rv);
 
   rv = mDB->ExecuteSimpleSQL(
       NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS host_origin_index "
                          "ON moz_hosts (origin);"));
   NS_ENSURE_SUCCESS(rv, rv);
 
+  rv = mDB->ExecuteSimpleSQL(
+      NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS host_load_index "
+                         "ON moz_hosts (last_load);"));
+  NS_ENSURE_SUCCESS(rv, rv);
+
   // And this is the table that keeps track of the hosts for subresources of a
   // pageload.
   rv = mDB->ExecuteSimpleSQL(
       NS_LITERAL_CSTRING("CREATE TABLE IF NOT EXISTS moz_subhosts (\n"
                          "  id INTEGER PRIMARY KEY AUTOINCREMENT,\n"
                          "  hid INTEGER NOT NULL,\n"
                          "  origin TEXT NOT NULL,\n"
                          "  hits INTEGER DEFAULT 0,\n"
@@ -670,16 +691,21 @@ Seer::EnsureInitStorage()
                          ");\n"));
   NS_ENSURE_SUCCESS(rv, rv);
 
   rv = mDB->ExecuteSimpleSQL(
       NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS startup_page_uri_index "
                          "ON moz_startup_pages (uri);"));
   NS_ENSURE_SUCCESS(rv, rv);
 
+  rv = mDB->ExecuteSimpleSQL(
+      NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS startup_page_hit_index "
+                         "ON moz_startup_pages (last_hit);"));
+  NS_ENSURE_SUCCESS(rv, rv);
+
   // This table is similar to moz_hosts above, but uses full URIs instead of
   // hosts so that we can get more specific predictions for URIs that people
   // visit often (such as their email or social network home pages).
   rv = mDB->ExecuteSimpleSQL(
       NS_LITERAL_CSTRING("CREATE TABLE IF NOT EXISTS moz_pages (\n"
                          "  id integer PRIMARY KEY AUTOINCREMENT,\n"
                          "  uri TEXT NOT NULL,\n"
                          "  loads INTEGER DEFAULT 0,\n"
@@ -939,16 +965,18 @@ public:
         rv = NS_ERROR_UNEXPECTED;
     }
 
     gSeer->FreeSpaceInQueue();
 
     Telemetry::AccumulateTimeDelta(Telemetry::SEER_PREDICT_WORK_TIME,
                                    startTime);
 
+    gSeer->MaybeScheduleCleanup();
+
     return rv;
   }
 
 private:
   Seer::UriInfo mTargetURI;
   Seer::UriInfo mSourceURI;
   SeerPredictReason mReason;
   SeerVerifierHandle mVerifier;
@@ -1720,16 +1748,18 @@ public:
       MOZ_ASSERT(false, "Got unexpected value for learn reason");
       rv = NS_ERROR_UNEXPECTED;
     }
 
     gSeer->FreeSpaceInQueue();
 
     Telemetry::AccumulateTimeDelta(Telemetry::SEER_LEARN_WORK_TIME, startTime);
 
+    gSeer->MaybeScheduleCleanup();
+
     return rv;
   }
 private:
   Seer::UriInfo mTargetURI;
   Seer::UriInfo mSourceURI;
   SeerLearnReason mReason;
   TimeStamp mEnqueueTime;
 };
@@ -2229,16 +2259,357 @@ Seer::Reset()
   if (!mInitialized) {
     return NS_ERROR_NOT_AVAILABLE;
   }
 
   nsRefPtr<SeerResetEvent> event = new SeerResetEvent();
   return mIOThread->Dispatch(event, NS_DISPATCH_NORMAL);
 }
 
+class SeerCleanupEvent : public nsRunnable
+{
+public:
+  NS_IMETHOD Run() MOZ_OVERRIDE
+  {
+    gSeer->Cleanup();
+    gSeer->mCleanupScheduled = false;
+    return NS_OK;
+  }
+};
+
+// Returns the current size (in bytes) of the db file on disk
+int64_t
+Seer::GetDBFileSize()
+{
+  MOZ_ASSERT(!NS_IsMainThread(), "GetDBFileSize called on main thread!");
+
+  CommitTransaction();
+
+  nsCOMPtr<mozIStorageStatement> countStmt = mStatements.GetCachedStatement(
+      NS_LITERAL_CSTRING("PRAGMA page_count;"));
+  if (!countStmt) {
+    return 0;
+  }
+  mozStorageStatementScoper scopedCount(countStmt);
+  bool hasRows;
+  nsresult rv = countStmt->ExecuteStep(&hasRows);
+  if (NS_FAILED(rv) || !hasRows) {
+    return 0;
+  }
+  int64_t pageCount;
+  rv = countStmt->GetInt64(0, &pageCount);
+  if (NS_FAILED(rv)) {
+    return 0;
+  }
+
+  nsCOMPtr<mozIStorageStatement> sizeStmt = mStatements.GetCachedStatement(
+      NS_LITERAL_CSTRING("PRAGMA page_size;"));
+  if (!sizeStmt) {
+    return 0;
+  }
+  mozStorageStatementScoper scopedSize(sizeStmt);
+  rv = sizeStmt->ExecuteStep(&hasRows);
+  if (NS_FAILED(rv) || !hasRows) {
+    return 0;
+  }
+  int64_t pageSize;
+  rv = sizeStmt->GetInt64(0, &pageSize);
+  if (NS_FAILED(rv)) {
+    return 0;
+  }
+
+  BeginTransaction();
+
+  return pageCount * pageSize;
+}
+
+// Returns the size (in bytes) that the db file will consume on disk AFTER we
+// vacuum the db.
+int64_t
+Seer::GetDBFileSizeAfterVacuum()
+{
+  MOZ_ASSERT(!NS_IsMainThread(), "GetDBFileSizeAfterVacuum called on main thread!");
+
+  CommitTransaction();
+
+  nsCOMPtr<mozIStorageStatement> countStmt = mStatements.GetCachedStatement(
+      NS_LITERAL_CSTRING("PRAGMA page_count;"));
+  if (!countStmt) {
+    return 0;
+  }
+  mozStorageStatementScoper scopedCount(countStmt);
+  bool hasRows;
+  nsresult rv = countStmt->ExecuteStep(&hasRows);
+  if (NS_FAILED(rv) || !hasRows) {
+    return 0;
+  }
+  int64_t pageCount;
+  rv = countStmt->GetInt64(0, &pageCount);
+  if (NS_FAILED(rv)) {
+    return 0;
+  }
+
+  nsCOMPtr<mozIStorageStatement> sizeStmt = mStatements.GetCachedStatement(
+      NS_LITERAL_CSTRING("PRAGMA page_size;"));
+  if (!sizeStmt) {
+    return 0;
+  }
+  mozStorageStatementScoper scopedSize(sizeStmt);
+  rv = sizeStmt->ExecuteStep(&hasRows);
+  if (NS_FAILED(rv) || !hasRows) {
+    return 0;
+  }
+  int64_t pageSize;
+  rv = sizeStmt->GetInt64(0, &pageSize);
+  if (NS_FAILED(rv)) {
+    return 0;
+  }
+
+  nsCOMPtr<mozIStorageStatement> freeStmt = mStatements.GetCachedStatement(
+      NS_LITERAL_CSTRING("PRAGMA freelist_count;"));
+  if (!freeStmt) {
+    return 0;
+  }
+  mozStorageStatementScoper scopedFree(freeStmt);
+  rv = freeStmt->ExecuteStep(&hasRows);
+  if (NS_FAILED(rv) || !hasRows) {
+    return 0;
+  }
+  int64_t freelistCount;
+  rv = freeStmt->GetInt64(0, &freelistCount);
+  if (NS_FAILED(rv)) {
+    return 0;
+  }
+
+  BeginTransaction();
+
+  return (pageCount - freelistCount) * pageSize;
+}
+
+void
+Seer::MaybeScheduleCleanup()
+{
+  MOZ_ASSERT(!NS_IsMainThread(), "MaybeScheduleCleanup called on main thread!");
+
+  if (mCleanupScheduled) {
+    Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SCHEDULED, false);
+    return;
+  }
+
+  int64_t dbFileSize = GetDBFileSize();
+  if (dbFileSize < mMaxDBSize) {
+    Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SCHEDULED, false);
+    return;
+  }
+
+  mCleanupScheduled = true;
+
+  nsRefPtr<SeerCleanupEvent> event = new SeerCleanupEvent();
+  nsresult rv = mIOThread->Dispatch(event, NS_DISPATCH_NORMAL);
+  if (NS_FAILED(rv)) {
+    mCleanupScheduled = false;
+    Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SCHEDULED, false);
+  } else {
+    Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SCHEDULED, true);
+  }
+}
+
+#ifndef ANDROID
+static const long long CLEANUP_CUTOFF = ONE_MONTH;
+#else
+static const long long CLEANUP_CUTOFF = ONE_WEEK;
+#endif
+
+void
+Seer::CleanupOrigins(PRTime now)
+{
+  PRTime cutoff = now - CLEANUP_CUTOFF;
+
+  nsCOMPtr<mozIStorageStatement> deleteOrigins = mStatements.GetCachedStatement(
+      NS_LITERAL_CSTRING("DELETE FROM moz_hosts WHERE last_load <= :cutoff"));
+  if (!deleteOrigins) {
+    return;
+  }
+  mozStorageStatementScoper scopedOrigins(deleteOrigins);
+
+  nsresult rv = deleteOrigins->BindInt32ByName(NS_LITERAL_CSTRING("cutoff"),
+                                               cutoff);
+  RETURN_IF_FAILED(rv);
+
+  deleteOrigins->Execute();
+}
+
+void
+Seer::CleanupStartupPages(PRTime now)
+{
+  PRTime cutoff = now - ONE_WEEK;
+
+  nsCOMPtr<mozIStorageStatement> deletePages = mStatements.GetCachedStatement(
+      NS_LITERAL_CSTRING("DELETE FROM moz_startup_pages WHERE "
+                         "last_hit <= :cutoff"));
+  if (!deletePages) {
+    return;
+  }
+  mozStorageStatementScoper scopedPages(deletePages);
+
+  nsresult rv = deletePages->BindInt32ByName(NS_LITERAL_CSTRING("cutoff"),
+                                             cutoff);
+  RETURN_IF_FAILED(rv);
+
+  deletePages->Execute();
+}
+
+int32_t
+Seer::GetSubresourceCount()
+{
+  nsCOMPtr<mozIStorageStatement> count = mStatements.GetCachedStatement(
+      NS_LITERAL_CSTRING("SELECT COUNT(id) FROM moz_subresources"));
+  if (!count) {
+    return 0;
+  }
+  mozStorageStatementScoper scopedCount(count);
+
+  bool hasRows;
+  nsresult rv = count->ExecuteStep(&hasRows);
+  if (NS_FAILED(rv) || !hasRows) {
+    return 0;
+  }
+
+  int32_t subresourceCount = 0;
+  count->GetInt32(0, &subresourceCount);
+
+  return subresourceCount;
+}
+
+void
+Seer::Cleanup()
+{
+  MOZ_ASSERT(!NS_IsMainThread(), "Seer::Cleanup called on main thread!");
+
+  nsresult rv = EnsureInitStorage();
+  if (NS_FAILED(rv)) {
+    return;
+  }
+
+  int64_t dbFileSize = GetDBFileSize();
+  float preservePercentage = static_cast<float>(mPreservePercentage) / 100.0;
+  int64_t evictionCutoff = static_cast<int64_t>(mMaxDBSize) * preservePercentage;
+  if (dbFileSize < evictionCutoff) {
+    return;
+  }
+
+  CommitTransaction();
+  BeginTransaction();
+
+  PRTime now = PR_Now();
+  if (mLastCleanupTime) {
+    Telemetry::Accumulate(Telemetry::SEER_CLEANUP_DELTA,
+                          (now - mLastCleanupTime) / 1000);
+  }
+  mLastCleanupTime = now;
+
+  CleanupOrigins(now);
+  CleanupStartupPages(now);
+
+  dbFileSize = GetDBFileSizeAfterVacuum();
+  if (dbFileSize < evictionCutoff) {
+    // We've deleted enough stuff, time to free up the disk space and be on
+    // our way.
+    VacuumDatabase();
+    Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SUCCEEDED, true);
+    Telemetry::Accumulate(Telemetry::SEER_CLEANUP_TIME,
+                          (PR_Now() - mLastCleanupTime) / 1000);
+    return;
+  }
+
+  bool canDelete = true;
+  while (canDelete && (dbFileSize >= evictionCutoff)) {
+    int32_t subresourceCount = GetSubresourceCount();
+    if (!subresourceCount) {
+      canDelete = false;
+      break;
+    }
+
+    // DB size scales pretty much linearly with the number of rows in
+    // moz_subresources, so we can guess how many rows we need to delete pretty
+    // accurately.
+    float percentNeeded = static_cast<float>(dbFileSize - evictionCutoff) /
+      static_cast<float>(dbFileSize);
+
+    int32_t subresourcesToDelete = static_cast<int32_t>(percentNeeded * subresourceCount);
+    if (!subresourcesToDelete) {
+      // We're getting pretty close to nothing here, anyway, so we may as well
+      // just trash it all. This delete cascades to moz_subresources, as well.
+      rv = mDB->ExecuteSimpleSQL(NS_LITERAL_CSTRING("DELETE FROM moz_pages;"));
+      if (NS_FAILED(rv)) {
+        canDelete = false;
+        break;
+      }
+    } else {
+      nsCOMPtr<mozIStorageStatement> deleteStatement = mStatements.GetCachedStatement(
+          NS_LITERAL_CSTRING("DELETE FROM moz_subresources WHERE id IN "
+                            "(SELECT id FROM moz_subresources ORDER BY "
+                            "last_hit ASC LIMIT :limit);"));
+      if (!deleteStatement) {
+        canDelete = false;
+        break;
+      }
+      mozStorageStatementScoper scopedDelete(deleteStatement);
+
+      rv = deleteStatement->BindInt32ByName(NS_LITERAL_CSTRING("limit"),
+                                            subresourcesToDelete);
+      if (NS_FAILED(rv)) {
+        canDelete = false;
+        break;
+      }
+
+      rv = deleteStatement->Execute();
+      if (NS_FAILED(rv)) {
+        canDelete = false;
+        break;
+      }
+
+      // Now we clean up pages that no longer reference any subresources
+      rv = mDB->ExecuteSimpleSQL(
+          NS_LITERAL_CSTRING("DELETE FROM moz_pages WHERE id NOT IN "
+                             "(SELECT DISTINCT(pid) FROM moz_subresources);"));
+      if (NS_FAILED(rv)) {
+        canDelete = false;
+        break;
+      }
+    }
+
+    if (canDelete) {
+      dbFileSize = GetDBFileSizeAfterVacuum();
+    }
+  }
+
+  if (!canDelete || (dbFileSize >= evictionCutoff)) {
+    // Last-ditch effort to free up space
+    ResetInternal();
+    Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SUCCEEDED, false);
+  } else {
+    // We do this to actually free up the space on disk
+    VacuumDatabase();
+    Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SUCCEEDED, true);
+  }
+  Telemetry::Accumulate(Telemetry::SEER_CLEANUP_TIME,
+                        (PR_Now() - mLastCleanupTime) / 1000);
+}
+
+void
+Seer::VacuumDatabase()
+{
+  MOZ_ASSERT(!NS_IsMainThread(), "VacuumDatabase called on main thread!");
+
+  CommitTransaction();
+  mDB->ExecuteSimpleSQL(NS_LITERAL_CSTRING("VACUUM;"));
+  BeginTransaction();
+}
+
 #ifdef SEER_TESTS
 class SeerPrepareForDnsTestEvent : public nsRunnable
 {
 public:
   SeerPrepareForDnsTestEvent(int64_t timestamp, const char *uri)
     :mTimestamp(timestamp)
     ,mUri(uri)
   { }
--- a/netwerk/base/src/Seer.h
+++ b/netwerk/base/src/Seer.h
@@ -59,16 +59,17 @@ public:
 private:
   friend class SeerPredictionEvent;
   friend class SeerLearnEvent;
   friend class SeerResetEvent;
   friend class SeerPredictionRunner;
   friend class SeerDBShutdownRunner;
   friend class SeerCommitTimerInitEvent;
   friend class SeerNewTransactionEvent;
+  friend class SeerCleanupEvent;
 
   void CheckForAndDeleteOldDBFile();
   nsresult EnsureInitStorage();
 
   // This is a proxy for the information we need from an nsIURI
   struct UriInfo {
     nsAutoCString spec;
     nsAutoCString origin;
@@ -160,16 +161,26 @@ private:
     mDB->BeginTransaction();
   }
 
   void CommitTransaction()
   {
     mDB->CommitTransaction();
   }
 
+  int64_t GetDBFileSize();
+  int64_t GetDBFileSizeAfterVacuum();
+  void MaybeScheduleCleanup();
+  void Cleanup();
+  void CleanupOrigins(PRTime now);
+  void CleanupStartupPages(PRTime now);
+  int32_t GetSubresourceCount();
+
+  void VacuumDatabase();
+
   // Observer-related stuff
   nsresult InstallObserver();
   void RemoveObserver();
 
   bool mInitialized;
 
   bool mEnabled;
   bool mEnableHoverOnSSL;
@@ -215,14 +226,19 @@ private:
   nsRefPtr<SeerDNSListener> mDNSListener;
 
   nsCOMPtr<nsITimer> mCommitTimer;
 
 #ifdef SEER_TESTS
   friend class SeerPrepareForDnsTestEvent;
   void PrepareForDnsTestInternal(int64_t timestamp, const nsACString &uri);
 #endif
+
+  bool mCleanupScheduled;
+  int32_t mMaxDBSize;
+  int32_t mPreservePercentage;
+  PRTime mLastCleanupTime;
 };
 
 } // ::mozilla::net
 } // ::mozilla
 
 #endif // mozilla_net_Seer_h
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -2226,16 +2226,40 @@
   },
   "SEER_PREDICT_TIME_TO_INACTION": {
     "expires_in_version": "never",
     "kind": "exponential",
     "high": "3000",
     "n_buckets": 10,
     "description": "How long it takes from the time Predict() is called to the time we figure out there's nothing to do"
   },
+  "SEER_CLEANUP_DELTA": {
+      "expires_in_version": "never",
+      "kind": "exponential",
+      "high": "60000",
+      "n_buckets": 50,
+      "description": "How long between seer db cleanups, in ms"
+  },
+  "SEER_CLEANUP_SUCCEEDED": {
+      "expires_in_version": "never",
+      "kind": "boolean",
+      "description": "Whether or not the seer cleanup succeeded"
+  },
+  "SEER_CLEANUP_TIME": {
+      "expires_in_version": "never",
+      "kind": "exponential",
+      "high": "5000",
+      "n_buckets": 10,
+      "description": "How long it takes to run the seer cleanup"
+  },
+  "SEER_CLEANUP_SCHEDULED": {
+      "expires_in_version": "never",
+      "kind": "boolean",
+      "description": "Whether or not we actually try the cleanup method when we think about it"
+  },
   "FIND_PLUGINS": {
     "expires_in_version": "never",
     "kind": "exponential",
     "high": "3000",
     "n_buckets": 10,
     "extended_statistics_ok": true,
     "description": "Time spent scanning filesystem for plugins (ms)"
   },