browser/extensions/activity-stream/lib/UserDomainAffinityProvider.jsm
author Kris Maglione <maglione.k@gmail.com>
Wed, 24 Jan 2018 15:48:47 -0800
changeset 455769 d4a7c018420e408fbe0a13ffddd2861623fda5a7
parent 440942 d1f0c44b2d7935ffacef0cdde5d4078259377613
child 455772 cc87ad81ff86109c7ea0187424fa9a8ed3b4af6c
permissions -rw-r--r--
Bug 1431533: Part 5a - Auto-rewrite code to use ChromeUtils import methods. r=florian MozReview-Commit-ID: 8V1ZT53ReiP

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
"use strict";

const {classes: Cc, interfaces: Ci, utils: Cu} = Components;
ChromeUtils.import("resource://gre/modules/Services.jsm");

const history = Cc["@mozilla.org/browser/nav-history-service;1"].getService(Ci.nsINavHistoryService);

const DEFAULT_TIME_SEGMENTS = [
  {"id": "hour", "startTime": 3600, "endTime": 0, "weightPosition": 1},
  {"id": "day", "startTime": 86400, "endTime": 3600, "weightPosition": 0.75},
  {"id": "week", "startTime": 604800, "endTime": 86400, "weightPosition": 0.5},
  {"id": "weekPlus", "startTime": 0, "endTime": 604800, "weightPosition": 0.25},
  {"id": "alltime", "startTime": 0, "endTime": 0, "weightPosition": 0.25}
];

const DEFAULT_PARAMETER_SETS = {
  "linear-frequency": {
    "recencyFactor": 0.4,
    "frequencyFactor": 0.5,
    "combinedDomainFactor": 0.5,
    "perfectFrequencyVisits": 10,
    "perfectCombinedDomainScore": 2,
    "multiDomainBoost": 0.1,
    "itemScoreFactor": 0
  }
};

const DEFAULT_MAX_HISTORY_QUERY_RESULTS = 1000;

function merge(...args) {
  return Object.assign.apply(this, args);
}

/**
 * Provides functionality to personalize content recommendations by calculating
 * user domain affinity scores. These scores are used to calculate relevance
 * scores for items/recs/stories that have domain affinities.
 *
 * The algorithm works as follows:
 *
 * - The recommendation endpoint returns a settings object containing
 * timeSegments and parametersets.
 *
 * - For every time segment we calculate the corresponding domain visit counts,
 * yielding result objects of the following structure: {"mozilla.org": 12,
 * "mozilla.com": 34} (see UserDomainAffinityProvider#queryVisits)
 *
 * - These visit counts are transformed to domain affinity scores for all
 * provided parameter sets: {"mozilla.org": {"paramSet1": 0.8,
 * "paramSet2": 0.9}, "mozilla.org": {"paramSet1": 1, "paramSet2": 0.9}}
 * (see UserDomainAffinityProvider#calculateScoresForParameterSets)
 *
 * - The parameter sets provide factors for weighting which allows for
 * flexible targeting. The functionality to calculate final scores can
 * be seen in UserDomainAffinityProvider#calculateScores
 *
 * - The user domain affinity scores are summed up across all time segments
 * see UserDomainAffinityProvider#calculateAllUserDomainAffinityScores
 *
 * - An item's domain affinities are matched to the user's domain affinity
 * scores by calculating an item relevance score
 * (see UserDomainAffinityProvider#calculateItemRelevanceScore)
 *
 * - The item relevance scores are used to sort items (see TopStoriesFeed for
 * more details)
 *
 * - The data structure was chosen to allow for fast cache lookups during
 * relevance score calculation. While user domain affinities are calculated
 * infrequently (i.e. only once a day), the item relevance score (potentially)
 * needs to be calculated every time the feed updates. Therefore allowing cache
 * lookups of scores[domain][parameterSet] is beneficial
 */
this.UserDomainAffinityProvider = class UserDomainAffinityProvider {
  constructor(
    timeSegments = DEFAULT_TIME_SEGMENTS,
    parameterSets = DEFAULT_PARAMETER_SETS,
    maxHistoryQueryResults = DEFAULT_MAX_HISTORY_QUERY_RESULTS,
    version,
    scores) {
    this.timeSegments = timeSegments;
    this.maxHistoryQueryResults = maxHistoryQueryResults;
    this.version = version;
    if (scores) {
      this.parameterSets = parameterSets;
      this.scores = scores;
    } else {
      this.parameterSets = this.prepareParameterSets(parameterSets);
      this.scores = this.calculateAllUserDomainAffinityScores();
    }
  }

  /**
   * Adds dynamic parameters to the given parameter sets that need to be
   * computed based on time segments.
   *
   * @param ps The parameter sets
   * @return Updated parameter sets with additional fields (i.e. timeSegmentWeights)
   */
  prepareParameterSets(ps) {
    return Object
      .keys(ps)
      // Add timeSegmentWeight fields to param sets e.g. timeSegmentWeights: {"hour": 1, "day": 0.8915, ...}
      .map(k => ({[k]: merge(ps[k], {timeSegmentWeights: this.calculateTimeSegmentWeights(ps[k].recencyFactor)})}))
      .reduce((acc, cur) => merge(acc, cur));
  }

  /**
   * Calculates a time segment weight based on the provided recencyFactor.
   *
   * @param recencyFactor The recency factor indicating how to weigh recency
   * @return An object containing time segment weights: {"hour": 0.987, "day": 1}
   */
  calculateTimeSegmentWeights(recencyFactor) {
    return this.timeSegments
      .reduce((acc, cur) => merge(acc, ({[cur.id]: this.calculateScore(cur.weightPosition, 1, recencyFactor)})), {});
  }

  /**
   * Calculates user domain affinity scores based on browsing history and the
   * available times segments and parameter sets.
   */
  calculateAllUserDomainAffinityScores() {
    return this.timeSegments
      // Calculate parameter set specific domain scores for each time segment
      // => [{"a.com": {"ps1": 12, "ps2": 34}, "b.com": {"ps1": 56, "ps2": 78}}, ...]
      .map(ts => this.calculateUserDomainAffinityScores(ts))
      // Keep format, but reduce to single object, with combined scores across all time segments
      // => "{a.com":{"ps1":2,"ps2":2}, "b.com":{"ps1":3,"ps2":3}}""
      .reduce((acc, cur) => this._combineScores(acc, cur));
  }

  /**
   * Calculates the user domain affinity scores for the given time segment.
   *
   * @param ts The time segment
   * @return The parameter specific scores for all domains with visits in
   * this time segment: {"a.com": {"ps1": 12, "ps2": 34}, "b.com" ...}
   */
  calculateUserDomainAffinityScores(ts) {
    // Returns domains and visit counts for this time segment: {"a.com": 1, "b.com": 2}
    let visits = this.queryVisits(ts);

    return Object
      .keys(visits)
      .reduce((acc, d) => merge(acc, {[d]: this.calculateScoresForParameterSets(ts, visits[d])}), {});
  }

  /**
   * Calculates the scores for all parameter sets for the given time segment
   * and domain visit count.
   *
   * @param ts The time segment
   * @param vc The domain visit count in the given time segment
   * @return The parameter specific scores for the visit count in
   * this time segment: {"ps1": 12, "ps2": 34}
   */
  calculateScoresForParameterSets(ts, vc) {
    return Object
      .keys(this.parameterSets)
      .reduce((acc, ps) => merge(acc, {[ps]: this.calculateScoreForParameterSet(ts, vc, this.parameterSets[ps])}), {});
  }

  /**
   * Calculates the final affinity score in the given time segment for the given parameter set
   *
   * @param timeSegment The time segment
   * @param visitCount The domain visit count in the given time segment
   * @param parameterSet The parameter set to use for scoring
   * @return The final score
   */
  calculateScoreForParameterSet(timeSegment, visitCount, parameterSet) {
    return this.calculateScore(
      visitCount * parameterSet.timeSegmentWeights[timeSegment.id],
      parameterSet.perfectFrequencyVisits,
      parameterSet.frequencyFactor);
  }

  /**
   * Keeps the same format, but reduces the two objects to a single object, with
   * combined scores across all time segments  => {a.com":{"ps1":2,"ps2":2},
   * "b.com":{"ps1":3,"ps2":3}}
   */
  _combineScores(a, b) {
    // Merge both score objects so we get a combined object holding all domains.
    // This is so we can combine them without missing domains that are in a and not in b and vice versa.
    const c = merge({}, a, b);
    return Object.keys(c).reduce((acc, d) => merge(acc, this._combine(a, b, c, d)), {});
  }

  _combine(a, b, c, d) {
    return Object
      .keys(c[d])
      // Summing up the parameter set specific scores of each domain
      .map(ps => ({[d]: {[ps]: Math.min(1, ((a[d] && a[d][ps]) || 0) + ((b[d] && b[d][ps]) || 0))}}))
      // Reducing from an array of objects with a single parameter set to a single object
      // [{"a.com":{"ps1":11}}, {"a.com: {"ps2":12}}] => {"a.com":{"ps1":11,"ps2":12}}
      .reduce((acc, cur) => ({[d]: merge(acc[d], cur[d])}));
  }

  /**
   * Calculates a value on the curve described by the provided parameters. The curve we're using is
   * (a^(b*x) - 1) / (a^b - 1): https://www.desmos.com/calculator/maqhpttupp
   *
   * @param {number} score A value between 0 and maxScore, representing x.
   * @param {number} maxScore Highest possible score.
   * @param {number} factor The slope describing the curve to get to maxScore. A low slope value
   * [0, 0.5] results in a log-shaped curve, a high slope [0.5, 1] results in a exp-shaped curve,
   * a slope of exactly 0.5 is linear.
   * @param {number} ease Adjusts how much bend is in the curve i.e. how dramatic the maximum
   * effect of the slope can be. This represents b in the formula above.
   * @return {number} the final score
   */
  calculateScore(score, maxScore, factor, ease = 2) {
    let a = 0;
    let x = Math.max(0, score / maxScore);

    if (x >= 1) {
      return 1;
    }

    if (factor === 0.5) {
      return x;
    }

    if (factor < 0.5) {
      // We want a log-shaped curve so we scale "a" between 0 and .99
      a = (factor / 0.5) * 0.49;
    } else if (factor > 0.5) {
      // We want an exp-shaped curve so we scale "a" between 1.01 and 10
      a = 1 + (factor - 0.5) / 0.5 * 9;
    }

    return (Math.pow(a, ease * x) - 1) / (Math.pow(a, ease) - 1);
  }

  /**
   * Queries the visit counts in the given time segment.
   *
   * @param ts the time segment
   * @return the visit count object: {"a.com": 1, "b.com": 2}
   */
  queryVisits(ts) {
    const visitCounts = {};
    const query = history.getNewQuery();
    const wwwRegEx = /^www\./;

    query.beginTimeReference = query.TIME_RELATIVE_NOW;
    query.beginTime = (ts.startTime && ts.startTime !== 0) ? -(ts.startTime * 1000 * 1000) : -(Date.now() * 1000);

    query.endTimeReference = query.TIME_RELATIVE_NOW;
    query.endTime = (ts.endTime && ts.endTime !== 0) ? -(ts.endTime * 1000 * 1000) : 0;

    const options = history.getNewQueryOptions();
    options.sortingMode = options.SORT_BY_VISITCOUNT_DESCENDING;
    options.maxResults = this.maxHistoryQueryResults;

    const root = history.executeQuery(query, options).root;
    root.containerOpen = true;
    for (let i = 0; i < root.childCount; i++) {
      let node = root.getChild(i);
      let host = Services.io.newURI(node.uri).host.replace(wwwRegEx, "");
      if (!visitCounts[host]) {
        visitCounts[host] = 0;
      }
      visitCounts[host] += node.accessCount;
    }
    root.containerOpen = false;
    return visitCounts;
  }

  /**
   * Calculates an item's relevance score.
   *
   * @param item the item (story), must contain domain affinities, otherwise a
   * score of 1 is returned.
   * @return the calculated item's score or 1 if item has no domain_affinities
   * or references an unknown parameter set.
   */
  calculateItemRelevanceScore(item) {
    const params = this.parameterSets[item.parameter_set];
    if (!item.domain_affinities || !params) {
      return item.item_score;
    }

    const scores = Object
      .keys(item.domain_affinities)
      .reduce((acc, d) => {
        let userDomainAffinityScore = this.scores[d] ? this.scores[d][item.parameter_set] : false;
        if (userDomainAffinityScore) {
          acc.combinedDomainScore += userDomainAffinityScore * item.domain_affinities[d];
          acc.matchingDomainsCount++;
        }
        return acc;
      }, {combinedDomainScore: 0, matchingDomainsCount: 0});

    // Boost the score as configured in the provided parameter set
    const boostedCombinedDomainScore = scores.combinedDomainScore *
      Math.pow(params.multiDomainBoost + 1, scores.matchingDomainsCount);

    // Calculate what the score would be if the item score is ignored
    const normalizedCombinedDomainScore = this.calculateScore(boostedCombinedDomainScore,
      params.perfectCombinedDomainScore,
      params.combinedDomainFactor);

    // Calculate the final relevance score using the itemScoreFactor. The itemScoreFactor
    // allows weighting the item score in relation to the normalizedCombinedDomainScore:
    // An itemScoreFactor of 1 results in the item score and ignores the combined domain score
    // An itemScoreFactor of 0.5 results in the the average of item score and combined domain score
    // An itemScoreFactor of 0 results in the combined domain score and ignores the item score
    return params.itemScoreFactor * (item.item_score - normalizedCombinedDomainScore) + normalizedCombinedDomainScore;
  }

  /**
   * Returns an object holding the settings and affinity scores of this provider instance.
   */
  getAffinities() {
    return {
      timeSegments: this.timeSegments,
      parameterSets: this.parameterSets,
      maxHistoryQueryResults: this.maxHistoryQueryResults,
      version: this.version,
      scores: this.scores
    };
  }
};

this.EXPORTED_SYMBOLS = ["UserDomainAffinityProvider"];