Bug 1378967 - Ignore highlights items with opaque URIs or invalid hosts r=sebastian
authorGrigory Kruglov <gkruglov@mozilla.com>
Thu, 13 Jul 2017 18:07:04 -0400
changeset 368961 87dc187ee1a9
parent 368960 5bd83e02ab47
child 368962 307eb68974d8
push id32178
push userkwierso@gmail.com
push date2017-07-15 00:10 +0000
treeherdermozilla-central@9ce83907a2bc [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssebastian
bugs1378967
milestone56.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1378967 - Ignore highlights items with opaque URIs or invalid hosts r=sebastian MozReview-Commit-ID: DMkeqGH79dj
mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightCandidate.java
mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightsRanking.java
--- a/mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightCandidate.java
+++ b/mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightCandidate.java
@@ -39,17 +39,17 @@ import java.util.Map;
     public @interface Feature {}
 
     @VisibleForTesting final Map<String, Double> features;
     private Highlight highlight;
     private @Nullable String imageUrl;
     private String host;
     private double score;
 
-    public static HighlightCandidate fromCursor(Cursor cursor) {
+    public static HighlightCandidate fromCursor(Cursor cursor) throws InvalidHighlightCandidateException {
         final HighlightCandidate candidate = new HighlightCandidate();
 
         extractHighlight(candidate, cursor);
         extractFeatures(candidate, cursor);
 
         return candidate;
     }
 
@@ -58,17 +58,17 @@ import java.util.Map;
      */
     private static void extractHighlight(HighlightCandidate candidate, Cursor cursor) {
         candidate.highlight = Highlight.fromCursor(cursor);
     }
 
     /**
      * Extract and assign features that will be used for ranking.
      */
-    private static void extractFeatures(HighlightCandidate candidate, Cursor cursor) {
+    private static void extractFeatures(HighlightCandidate candidate, Cursor cursor) throws InvalidHighlightCandidateException {
         candidate.features.put(
                 FEATURE_AGE_IN_DAYS,
                 (System.currentTimeMillis() - cursor.getDouble(cursor.getColumnIndexOrThrow(BrowserContract.History.DATE_LAST_VISITED)))
                         / (1000 * 3600 * 24));
 
         candidate.features.put(
                 FEATURE_VISITS_COUNT,
                 cursor.getDouble(cursor.getColumnIndexOrThrow(BrowserContract.History.VISITS)));
@@ -120,31 +120,33 @@ import java.util.Map;
         }
 
         candidate.features.put(
                 FEATURE_DESCRIPTION_LENGTH,
                 (double) candidate.highlight.getMetadata().getDescriptionLength());
 
         final Uri uri = Uri.parse(candidate.highlight.getUrl());
 
+        // We don't support opaque URIs (such as mailto:...), or URIs which do not have a valid host.
+        // The latter might simply be URIs with invalid characters in them (such as underscore...).
+        // See Bug 1363521 and Bug 1378967.
+        if (!uri.isHierarchical() || uri.getHost() == null) {
+            throw new InvalidHighlightCandidateException();
+        }
+
         candidate.host = uri.getHost();
 
         candidate.features.put(
                 FEATURE_PATH_LENGTH,
                 (double) uri.getPathSegments().size());
 
-        if (uri.isHierarchical()) {
-            candidate.features.put(
-                    FEATURE_QUERY_LENGTH,
-                    (double) uri.getQueryParameterNames().size());
-
-        // Opaque URIs do not support getQueryParameterNames.
-        } else {
-            candidate.features.put(FEATURE_QUERY_LENGTH, 0d);
-        }
+        // Only hierarchical URIs support getQueryParameterNames.
+        candidate.features.put(
+                FEATURE_QUERY_LENGTH,
+                (double) uri.getQueryParameterNames().size());
     }
 
     @VisibleForTesting HighlightCandidate() {
         features = new HashMap<>();
     }
 
     /* package-private */ double getScore() {
         return score;
@@ -189,9 +191,13 @@ import java.util.Map;
         for (Map.Entry<String, Double> entry : features.entrySet()) {
             if (filter.call(entry.getKey())) {
                 filteredFeatures.put(entry.getKey(), entry.getValue());
             }
         }
 
         return filteredFeatures;
     }
+
+    /* package-private */ static class InvalidHighlightCandidateException extends Exception {
+        private static final long serialVersionUID = 949263104621445850L;
+    }
 }
--- a/mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightsRanking.java
+++ b/mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightsRanking.java
@@ -2,16 +2,17 @@
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 package org.mozilla.gecko.activitystream.ranking;
 
 import android.database.Cursor;
 import android.support.annotation.VisibleForTesting;
+import android.util.Log;
 
 import org.mozilla.gecko.activitystream.homepanel.model.Highlight;
 
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -36,16 +37,18 @@ import static org.mozilla.gecko.activity
  * of good highlights. The result set is likely smaller than the cursor size.
  *
  * - First we calculate an initial score based on how frequent we visit the URL and domain.
  * - Then we multiply some (normalized) feature values with weights to calculate:
  *      initialScore * e ^ -(sum of weighted features)
  * - Finally we adjust the score with some custom rules.
  */
 public class HighlightsRanking {
+    private static final String LOG_TAG = "HighlightsRanking";
+
     private static final Map<String, Double> HIGHLIGHT_WEIGHTS = new HashMap<>();
     static {
         // TODO: Needs confirmation from the desktop team that this is the correct weight mapping (Bug 1336037)
         HIGHLIGHT_WEIGHTS.put(HighlightCandidate.FEATURE_VISITS_COUNT, -0.1);
         HIGHLIGHT_WEIGHTS.put(HighlightCandidate.FEATURE_DESCRIPTION_LENGTH, -0.1);
         HIGHLIGHT_WEIGHTS.put(HighlightCandidate.FEATURE_PATH_LENGTH, -0.1);
 
         HIGHLIGHT_WEIGHTS.put(HighlightCandidate.FEATURE_QUERY_LENGTH, 0.4);
@@ -92,17 +95,22 @@ public class HighlightsRanking {
     /**
      * Extract features for every candidate. The heavy lifting is done in
      * HighlightCandidate.fromCursor().
      */
     @VisibleForTesting static List<HighlightCandidate> extractFeatures(Cursor cursor) {
         return looselyMapCursor(cursor, new Func1<Cursor, HighlightCandidate>() {
             @Override
             public HighlightCandidate call(Cursor cursor) {
-                return HighlightCandidate.fromCursor(cursor);
+                try {
+                    return HighlightCandidate.fromCursor(cursor);
+                } catch (HighlightCandidate.InvalidHighlightCandidateException e) {
+                    Log.w(LOG_TAG, "Skipping invalid highlight item", e);
+                    return null;
+                }
             }
         });
     }
 
     /**
      * Normalize the values of all features listed in NORMALIZATION_FEATURES. Normalization will map
      * the values into the interval of [0,1] based on the min/max values for the features.
      */