Bug 1265525 - Part 2: Frecency calculation and top sites query updates r=sebastian draft
authorGrigory Kruglov <gkruglov@mozilla.com>
Mon, 02 May 2016 14:10:23 -0700
changeset 362653 d10261276c8cb9905aa61121c5bf711b2b23ca4a
parent 362652 74fbcd8c80710dd5ffc7e4d258941ee16bac4cb1
child 519854 2bcad0abcef24f46d9c08bd833a00d5dd6855c8a
push id17007
push usergkruglov@mozilla.com
push dateMon, 02 May 2016 21:38:07 +0000
reviewerssebastian
bugs1265525
milestone49.0a1
Bug 1265525 - Part 2: Frecency calculation and top sites query updates r=sebastian MozReview-Commit-ID: 7tqr4IT9635
mobile/android/base/java/org/mozilla/gecko/db/BrowserContract.java
mobile/android/base/java/org/mozilla/gecko/db/BrowserProvider.java
mobile/android/base/java/org/mozilla/gecko/db/LocalBrowserDB.java
mobile/android/tests/background/junit4/src/org/mozilla/gecko/db/BrowserContractTest.java
--- a/mobile/android/base/java/org/mozilla/gecko/db/BrowserContract.java
+++ b/mobile/android/base/java/org/mozilla/gecko/db/BrowserContract.java
@@ -3,16 +3,18 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 package org.mozilla.gecko.db;
 
 import org.mozilla.gecko.AppConstants;
 
 import android.net.Uri;
+import android.support.annotation.NonNull;
+
 import org.mozilla.gecko.annotation.RobocopTarget;
 
 @RobocopTarget
 public class BrowserContract {
     public static final String AUTHORITY = AppConstants.ANDROID_PACKAGE_NAME + ".db.browser";
     public static final Uri AUTHORITY_URI = Uri.parse("content://" + AUTHORITY);
 
     public static final String PASSWORDS_AUTHORITY = AppConstants.ANDROID_PACKAGE_NAME + ".db.passwords";
@@ -52,29 +54,85 @@ public class BrowserContract {
     public static final String PARAM_DATASET_ID = "dataset_id";
     public static final String PARAM_GROUP_BY = "group_by";
 
     static public enum ExpirePriority {
         NORMAL,
         AGGRESSIVE
     }
 
-    static public String getFrecencySortOrder(boolean includesBookmarks, boolean asc) {
-        final String age = "(" + Combined.DATE_LAST_VISITED + " - " + System.currentTimeMillis() + ") / 86400000";
-
-        StringBuilder order = new StringBuilder(Combined.VISITS + " * MAX(1, 100 * 225 / (" + age + "*" + age + " + 225)) ");
+    /**
+     * Produces a SQL expression used for sorting results of the "combined" view by frecency.
+     * Combines remote and local frecency calculations, weighting local visits much heavier.
+     *
+     * @param includesBookmarks When URL is bookmarked, should we give it bonus frecency points?
+     * @param ascending Indicates if sorting order ascending
+     * @return Combined frecency sorting expression
+     */
+    static public String getCombinedFrecencySortOrder(boolean includesBookmarks, boolean ascending) {
+        final long now = System.currentTimeMillis();
+        StringBuilder order = new StringBuilder(getRemoteFrecencySQL(now) + " + " + getLocalFrecencySQL(now));
 
         if (includesBookmarks) {
             order.insert(0, "(CASE WHEN " + Combined.BOOKMARK_ID + " > -1 THEN 100 ELSE 0 END) + ");
         }
 
-        order.append(asc ? " ASC" : " DESC");
+        order.append(ascending ? " ASC" : " DESC");
         return order.toString();
     }
 
+    /**
+     * See Bug 1265525 for details (explanation + graphs) on how Remote frecency compares to Local frecency for different
+     * combinations of visits count and age.
+     *
+     * @param now Base time in milliseconds for age calculation
+     * @return remote frecency SQL calculation
+     */
+    static public String getRemoteFrecencySQL(final long now) {
+        return getFrecencyCalculation(now, 1, 110, Combined.REMOTE_VISITS_COUNT, Combined.REMOTE_DATE_LAST_VISITED);
+    }
+
+    /**
+     * Local frecency SQL calculation. Note higher scale factor and squared visit count which achieve
+     * visits generated locally being much preferred over remote visits.
+     * See Bug 1265525 for details (explanation + comparison graphs).
+     *
+     * @param now Base time in milliseconds for age calculation
+     * @return local frecency SQL calculation
+     */
+    static public String getLocalFrecencySQL(final long now) {
+        String visitCountExpr = "(" + Combined.LOCAL_VISITS_COUNT + " + 2)";
+        visitCountExpr = visitCountExpr + " * " + visitCountExpr;
+
+        return getFrecencyCalculation(now, 2, 225, visitCountExpr, Combined.LOCAL_DATE_LAST_VISITED);
+    }
+
+    /**
+     * Our version of frecency is computed by scaling the number of visits by a multiplier
+     * that approximates Gaussian decay, based on how long ago the entry was last visited.
+     * Since we're limited by the math we can do with sqlite, we're calculating this
+     * approximation using the Cauchy distribution: multiplier = scale_const / (age^2 + scale_const).
+     * For example, with 15 as our scale parameter, we get a scale constant 15^2 = 225. Then:
+     * frecencyScore = numVisits * max(1, 100 * 225 / (age*age + 225)). (See bug 704977)
+     *
+     * @param now Base time in milliseconds for age calculation
+     * @param minFrecency Minimum allowed frecency value
+     * @param multiplier Scale constant
+     * @param visitCountExpr Expression which will produce a visit count
+     * @param lastVisitExpr Expression which will produce "last-visited" timestamp
+     * @return Frecency SQL calculation
+     */
+    static public String getFrecencyCalculation(final long now, final int minFrecency, final int multiplier, @NonNull  final String visitCountExpr, @NonNull final String lastVisitExpr) {
+        final long nowInMicroseconds = now * 1000;
+        final long microsecondsPerDay = 86400000000L;
+        final String ageExpr = "(" + nowInMicroseconds + " - " + lastVisitExpr + ") / " + microsecondsPerDay;
+
+        return visitCountExpr + " * MAX(" + minFrecency + ", 100 * " + multiplier + " / (" + ageExpr + " * " + ageExpr + " + " + multiplier + "))";
+    }
+
     @RobocopTarget
     public interface CommonColumns {
         public static final String _ID = "_id";
     }
 
     @RobocopTarget
     public interface DateSyncColumns {
         public static final String DATE_CREATED = "created";
@@ -240,16 +298,22 @@ public class BrowserContract {
         public static final String VIEW_NAME = "combined";
 
         public static final String VIEW_WITH_FAVICONS = "combined_with_favicons";
 
         public static final Uri CONTENT_URI = Uri.withAppendedPath(AUTHORITY_URI, "combined");
 
         public static final String BOOKMARK_ID = "bookmark_id";
         public static final String HISTORY_ID = "history_id";
+
+        public static final String REMOTE_VISITS_COUNT = "remoteVisitCount";
+        public static final String REMOTE_DATE_LAST_VISITED = "remoteDateLastVisited";
+
+        public static final String LOCAL_VISITS_COUNT = "localVisitCount";
+        public static final String LOCAL_DATE_LAST_VISITED = "localDateLastVisited";
     }
 
     public static final class Schema {
         private Schema() {}
         public static final Uri CONTENT_URI = Uri.withAppendedPath(AUTHORITY_URI, "schema");
 
         public static final String VERSION = "version";
     }
--- a/mobile/android/base/java/org/mozilla/gecko/db/BrowserProvider.java
+++ b/mobile/android/base/java/org/mozilla/gecko/db/BrowserProvider.java
@@ -309,59 +309,58 @@ public class BrowserProvider extends Sha
     }
 
     /**
      * Remove enough history items to bring the database count below <code>retain</code>,
      * removing no items with a modified time after <code>keepAfter</code>.
      *
      * Provide <code>keepAfter</code> less than or equal to zero to skip that check.
      *
-     * Items will be removed according to an approximate frecency calculation.
+     * Items will be removed according to last visited date.
      */
     private void expireHistory(final SQLiteDatabase db, final int retain, final long keepAfter) {
         Log.d(LOGTAG, "Expiring history.");
         final long rows = DatabaseUtils.queryNumEntries(db, TABLE_HISTORY);
 
         if (retain >= rows) {
             debug("Not expiring history: only have " + rows + " rows.");
             return;
         }
 
-        final String sortOrder = BrowserContract.getFrecencySortOrder(false, true);
         final long toRemove = rows - retain;
         debug("Expiring at most " + toRemove + " rows earlier than " + keepAfter + ".");
 
         final String sql;
         if (keepAfter > 0) {
             sql = "DELETE FROM " + TABLE_HISTORY + " " +
                   "WHERE MAX(" + History.DATE_LAST_VISITED + ", " + History.DATE_MODIFIED + ") < " + keepAfter + " " +
                   " AND " + History._ID + " IN ( SELECT " +
                     History._ID + " FROM " + TABLE_HISTORY + " " +
-                    "ORDER BY " + sortOrder + " LIMIT " + toRemove +
+                    "ORDER BY " + History.DATE_LAST_VISITED + " ASC LIMIT " + toRemove +
                   ")";
         } else {
             sql = "DELETE FROM " + TABLE_HISTORY + " WHERE " + History._ID + " " +
                   "IN ( SELECT " + History._ID + " FROM " + TABLE_HISTORY + " " +
-                  "ORDER BY " + sortOrder + " LIMIT " + toRemove + ")";
+                  "ORDER BY " + History.DATE_LAST_VISITED + " ASC LIMIT " + toRemove + ")";
         }
         trace("Deleting using query: " + sql);
 
         beginWrite(db);
         db.execSQL(sql);
     }
 
     /**
      * Remove any thumbnails that for sites that aren't likely to be ever shown.
      * Items will be removed according to a frecency calculation and only if they are not pinned
      *
      * Call this method within a transaction.
      */
     private void expireThumbnails(final SQLiteDatabase db) {
         Log.d(LOGTAG, "Expiring thumbnails.");
-        final String sortOrder = BrowserContract.getFrecencySortOrder(true, false);
+        final String sortOrder = BrowserContract.getCombinedFrecencySortOrder(true, false);
         final String sql = "DELETE FROM " + TABLE_THUMBNAILS +
                            " WHERE " + Thumbnails.URL + " NOT IN ( " +
                              " SELECT " + Combined.URL +
                              " FROM " + Combined.VIEW_NAME +
                              " ORDER BY " + sortOrder +
                              " LIMIT " + DEFAULT_EXPIRY_THUMBNAIL_COUNT +
                            ") AND " + Thumbnails.URL + " NOT IN ( " +
                              " SELECT " + Bookmarks.URL +
@@ -897,17 +896,17 @@ public class BrowserProvider extends Sha
                        Combined.BOOKMARK_ID + ", " +
                        Combined.HISTORY_ID + ", " +
                        Bookmarks.URL + ", " +
                        Bookmarks.TITLE + ", " +
                        Combined.HISTORY_ID + ", " +
                        TopSites.TYPE_TOP + " AS " + TopSites.TYPE +
                        " FROM " + Combined.VIEW_NAME +
                        " WHERE " + ignoreForTopSitesWhereClause +
-                       " ORDER BY " + BrowserContract.getFrecencySortOrder(true, false) +
+                       " ORDER BY " + BrowserContract.getCombinedFrecencySortOrder(true, false) +
                        " LIMIT " + totalLimit,
 
                        ignoreForTopSitesArgs);
 
             if (hasProcessedAnySuggestedSites) {
                 db.execSQL("INSERT INTO " + TABLE_TOPSITES +
                            // We need to LIMIT _after_ selecting the relevant suggested sites, which requires us to
                            // use an additional internal subquery, since we cannot LIMIT a subquery that is part of UNION ALL.
--- a/mobile/android/base/java/org/mozilla/gecko/db/LocalBrowserDB.java
+++ b/mobile/android/base/java/org/mozilla/gecko/db/LocalBrowserDB.java
@@ -564,24 +564,20 @@ public class LocalBrowserDB implements B
             }
         }
 
         if (urlFilter != null) {
             selection = DBUtils.concatenateWhere(selection, "(" + Combined.URL + " NOT LIKE ?)");
             selectionArgs = DBUtils.appendSelectionArgs(selectionArgs, new String[] { urlFilter.toString() });
         }
 
-        // Our version of frecency is computed by scaling the number of visits by a multiplier
-        // that approximates Gaussian decay, based on how long ago the entry was last visited.
-        // Since we're limited by the math we can do with sqlite, we're calculating this
-        // approximation using the Cauchy distribution: multiplier = 15^2 / (age^2 + 15^2).
-        // Using 15 as our scale parameter, we get a constant 15^2 = 225. Following this math,
-        // frecencyScore = numVisits * max(1, 100 * 225 / (age*age + 225)). (See bug 704977)
-        // We also give bookmarks an extra bonus boost by adding 100 points to their frecency score.
-        final String sortOrder = BrowserContract.getFrecencySortOrder(true, false);
+        // Order by combined remote+local frecency score.
+        // Local visits are preferred, so they will by far outweigh remote visits.
+        // Bookmarked history items get extra frecency points.
+        final String sortOrder = BrowserContract.getCombinedFrecencySortOrder(true, false);
 
         return cr.query(combinedUriWithLimit(limit),
                         projection,
                         selection,
                         selectionArgs,
                         sortOrder);
     }
 
new file mode 100644
--- /dev/null
+++ b/mobile/android/tests/background/junit4/src/org/mozilla/gecko/db/BrowserContractTest.java
@@ -0,0 +1,67 @@
+/* Any copyright is dedicated to the Public Domain.
+   http://creativecommons.org/publicdomain/zero/1.0/ */
+
+package org.mozilla.gecko.db;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mozilla.gecko.background.testhelpers.TestRunner;
+
+import static org.junit.Assert.*;
+
+@RunWith(TestRunner.class)
+public class BrowserContractTest {
+    @Test
+    /**
+     * Test that bookmark and sorting order clauses are set correctly
+     */
+    public void testGetCombinedFrecencySortOrder() throws Exception {
+        String sqlNoBookmarksDesc = BrowserContract.getCombinedFrecencySortOrder(false, false);
+        String sqlNoBookmarksAsc = BrowserContract.getCombinedFrecencySortOrder(false, true);
+        String sqlBookmarksDesc = BrowserContract.getCombinedFrecencySortOrder(true, false);
+        String sqlBookmarksAsc = BrowserContract.getCombinedFrecencySortOrder(true, true);
+
+        assertTrue(sqlBookmarksAsc.endsWith(" ASC"));
+        assertTrue(sqlBookmarksDesc.endsWith(" DESC"));
+        assertTrue(sqlNoBookmarksAsc.endsWith(" ASC"));
+        assertTrue(sqlNoBookmarksDesc.endsWith(" DESC"));
+
+        assertTrue(sqlBookmarksAsc.startsWith("(CASE WHEN bookmark_id > -1 THEN 100 ELSE 0 END) + "));
+        assertTrue(sqlBookmarksDesc.startsWith("(CASE WHEN bookmark_id > -1 THEN 100 ELSE 0 END) + "));
+    }
+
+    @Test
+    /**
+     * Test that calculation string is correct for remote visits
+     * maxFrecency=1, scaleConst=110, correct sql params for visit count and last date
+     * and that time is converted to microseconds.
+     */
+    public void testGetRemoteFrecencySQL() throws Exception {
+        long now = 1;
+        String sql = BrowserContract.getRemoteFrecencySQL(now);
+        String ageExpr = "(" + now * 1000 + " - remoteDateLastVisited) / 86400000000";
+
+        assertEquals(
+                "remoteVisitCount * MAX(1, 100 * 110 / (" + ageExpr + " * " + ageExpr + " + 110))",
+                sql
+        );
+    }
+
+    @Test
+    /**
+     * Test that calculation string is correct for remote visits
+     * maxFrecency=2, scaleConst=225, correct sql params for visit count and last date
+     * and that time is converted to microseconds.
+     */
+    public void testGetLocalFrecencySQL() throws Exception {
+        long now = 1;
+        String sql = BrowserContract.getLocalFrecencySQL(now);
+        String ageExpr = "(" + now * 1000 + " - localDateLastVisited) / 86400000000";
+        String visitCountExpr = "(localVisitCount + 2) * (localVisitCount + 2)";
+
+        assertEquals(
+                visitCountExpr + " * MAX(2, 100 * 225 / (" + ageExpr + " * " + ageExpr + " + 225))",
+                sql
+        );
+    }
+}
\ No newline at end of file