Bug 1317223 (part 3) - a bookmark repair requestor. r=markh,tcsc
☠☠ backed out by 4c8faad8c333 ☠ ☠
authorMark Hammond <mhammond@skippinet.com.au>
Thu, 02 Mar 2017 17:14:40 +1100
changeset 374618 b7efe4ba02c25a3b04fa0a84bd02d1b4ba511947
parent 374617 705939ae18f00dd3ccba81fc4e77acefdb51561f
child 374619 e91a6d043fc6943c365d56ed34ce07e83f5fc4b9
push id10863
push userjlorenzo@mozilla.com
push dateMon, 06 Mar 2017 23:02:23 +0000
treeherdermozilla-aurora@0931190cd725 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmarkh, tcsc
bugs1317223
milestone54.0a1
Bug 1317223 (part 3) - a bookmark repair requestor. r=markh,tcsc The bookmark repair requestor takes the validation results and possibly initiates a bookmark repair. MozReview-Commit-ID: 7rRNbBx8Vo3
services/sync/modules/bookmark_repair.js
services/sync/modules/collection_repair.js
services/sync/moz.build
services/sync/tests/unit/test_bookmark_repair_requestor.js
services/sync/tests/unit/xpcshell.ini
tools/lint/eslint/modules.json
new file mode 100644
--- /dev/null
+++ b/services/sync/modules/bookmark_repair.js
@@ -0,0 +1,533 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+"use strict";
+
+const Cu = Components.utils;
+
+this.EXPORTED_SYMBOLS = ["BookmarkRepairRequestor"];
+
+Cu.import("resource://gre/modules/Services.jsm");
+Cu.import("resource://gre/modules/Preferences.jsm");
+Cu.import("resource://gre/modules/Log.jsm");
+Cu.import("resource://services-sync/collection_repair.js");
+Cu.import("resource://services-sync/constants.js");
+Cu.import("resource://services-sync/resource.js");
+Cu.import("resource://services-sync/doctor.js");
+Cu.import("resource://services-common/utils.js");
+
+const log = Log.repository.getLogger("Sync.Engine.Bookmarks.Repair");
+
+const PREF_BRANCH = "services.sync.repairs.bookmarks.";
+
+// How long should we wait after sending a repair request before we give up?
+const RESPONSE_INTERVAL_TIMEOUT = 60 * 60 * 24 * 3; // 3 days
+
+// The maximum number of IDs we will request to be repaired. Beyond this
+// number we assume that trying to repair may do more harm than good and may
+// ask another client to wipe the server and reupload everything. Bug 1341972
+// is tracking that work.
+const MAX_REQUESTED_IDS = 1000;
+
+class AbortRepairError extends Error {
+  constructor(reason) {
+    super();
+    this.reason = reason;
+  }
+}
+
+// The states we can be in.
+const STATE = Object.freeze({
+  NOT_REPAIRING: "",
+
+  // We need to try to find another client to use.
+  NEED_NEW_CLIENT: "repair.need-new-client",
+
+  // We've sent the first request to a client.
+  SENT_REQUEST: "repair.sent",
+
+  // We've retried a request to a client.
+  SENT_SECOND_REQUEST: "repair.sent-again",
+
+  // There were no problems, but we've gone as far as we can.
+  FINISHED: "repair.finished",
+
+  // We've found an error that forces us to abort this entire repair cycle.
+  ABORTED: "repair.aborted",
+});
+
+// The preferences we use to hold our state.
+const PREF = Object.freeze({
+  // If a repair is in progress, this is the generated GUID for the "flow ID".
+  REPAIR_ID: "flowID",
+
+  // The IDs we are currently trying to obtain via the repair process, space sep'd.
+  REPAIR_MISSING_IDS: "ids",
+
+  // The ID of the client we're currently trying to get the missing items from.
+  REPAIR_CURRENT_CLIENT: "currentClient",
+
+  // The IDs of the clients we've previously tried to get the missing items
+  // from, space sep'd.
+  REPAIR_PREVIOUS_CLIENTS: "previousClients",
+
+  // The time, in seconds, when we initiated the most recent client request.
+  REPAIR_WHEN: "when",
+
+  // Our current state.
+  CURRENT_STATE: "state",
+});
+
+class BookmarkRepairRequestor extends CollectionRepairRequestor {
+  constructor(service = null) {
+    super(service);
+    this.prefs = new Preferences(PREF_BRANCH);
+  }
+
+  /* Exposed incase another module needs to understand our state
+  */
+  get STATE() {
+    return STATE;
+  }
+
+  /* Check if any other clients connected to our account are current performing
+     a repair. A thin wrapper which exists mainly for mocking during tests.
+  */
+  anyClientsRepairing(flowID) {
+    return Doctor.anyClientsRepairing(this.service, "bookmarks", flowID);
+  }
+
+  /* Return a set of IDs we should request.
+  */
+  getProblemIDs(validationInfo) {
+    // Set of ids of "known bad records". Many of the validation issues will
+    // report duplicates -- if the server is missing a record, it is unlikely
+    // to cause only a single problem.
+    let ids = new Set();
+
+    // Note that we allow any of the validation problem fields to be missing so
+    // that tests have a slightly easier time, hence the `|| []` in each loop.
+
+    // Missing children records when the parent exists but a child doesn't.
+    for (let { child } of validationInfo.problems.missingChildren || []) {
+      ids.add(child);
+    }
+    if (ids.size > MAX_REQUESTED_IDS) {
+      return ids; // might as well give up here - we aren't going to repair.
+    }
+
+    // Orphans are when the child exists but the parent doesn't.
+    // This could either be a problem in the child (it's wrong about the node
+    // that should be its parent), or the parent could simply be missing.
+    for (let { parent, id } of validationInfo.problems.orphans || []) {
+      // Request both, to handle both cases
+      ids.add(id);
+      ids.add(parent);
+    }
+    if (ids.size > MAX_REQUESTED_IDS) {
+      return ids; // might as well give up here - we aren't going to repair.
+    }
+
+    // Entries where we have the parent but know for certain that the child was
+    // deleted.
+    for (let { parent } of validationInfo.problems.deletedChildren || []) {
+      ids.add(parent);
+    }
+    if (ids.size > MAX_REQUESTED_IDS) {
+      return ids; // might as well give up here - we aren't going to repair.
+    }
+
+    // Entries where the child references a parent that we don't have, but we
+    // know why: the parent was deleted.
+    for (let { child } of validationInfo.problems.deletedParents || []) {
+      ids.add(child);
+    }
+    if (ids.size > MAX_REQUESTED_IDS) {
+      return ids; // might as well give up here - we aren't going to repair.
+    }
+
+    // Cases where the parent and child disagree about who the parent is.
+    for (let { parent, child } of validationInfo.problems.parentChildMismatches || []) {
+      // Request both, since we don't know which is right.
+      ids.add(parent);
+      ids.add(child);
+    }
+    if (ids.size > MAX_REQUESTED_IDS) {
+      return ids; // might as well give up here - we aren't going to repair.
+    }
+
+    // Cases where multiple parents reference a child. We re-request both the
+    // child, and all the parents that think they own it. This may be more than
+    // we need, but I don't think we can safely make the assumption that the
+    // child is right.
+    for (let { parents, child } of validationInfo.problems.multipleParents || []) {
+      for (let parent of parents) {
+        ids.add(parent);
+      }
+      ids.add(child);
+    }
+
+    // XXX - any others we should consider?
+    return ids;
+  }
+
+  /* See if the repairer is willing and able to begin a repair process given
+     the specified validation information.
+     Returns true if a repair was started and false otherwise.
+  */
+  startRepairs(validationInfo, flowID) {
+    if (this._currentState != STATE.NOT_REPAIRING) {
+      log.info(`Can't start a repair - repair with ID ${this._flowID} is already in progress`);
+      return false;
+    }
+
+    if (this.anyClientsRepairing()) {
+      log.info("Can't start repair, since other clients are already repairing bookmarks");
+      let extra = { flowID, reason: "other clients repairing" };
+      this.service.recordTelemetryEvent("repair", "aborted", undefined, extra)
+      return false;
+    }
+
+    let ids = this.getProblemIDs(validationInfo);
+    if (ids.size > MAX_REQUESTED_IDS) {
+      log.info("Not starting a repair as there are over " + MAX_REQUESTED_IDS + " problems");
+      let extra = { flowID, reason: `too many problems: ${ids.size}` };
+      this.service.recordTelemetryEvent("repair", "aborted", undefined, extra)
+      return false;
+    }
+
+    if (ids.size == 0) {
+      log.info("Not starting a repair as there are no problems");
+      return false;
+    }
+
+    log.info(`Starting a repair, looking for ${ids.size} missing item(s)`);
+    // setup our prefs to indicate we are on our way.
+    this._flowID = flowID;
+    this._currentIDs = Array.from(ids);
+    this._currentState = STATE.NEED_NEW_CLIENT;
+    this.service.recordTelemetryEvent("repair", "started", undefined, { flowID, numIDs: ids.size.toString() });
+    return this.continueRepairs();
+  }
+
+  /* Work out what state our current repair request is in, and whether it can
+     proceed to a new state.
+     Returns true if we could continue the repair - even if the state didn't
+     actually move. Returns false if we aren't actually repairing.
+  */
+  continueRepairs(response = null) {
+    // Note that "ABORTED" and "FINISHED" should never be current when this
+    // function returns - this function resets to NOT_REPAIRING in those cases.
+    if (this._currentState == STATE.NOT_REPAIRING) {
+      return false;
+    }
+
+    let state, newState;
+    let abortReason;
+    // we loop until the state doesn't change - but enforce a max of 10 times
+    // to prevent errors causing infinite loops.
+    for (let i = 0; i < 10; i++) {
+      state = this._currentState;
+      log.info("continueRepairs starting with state", state);
+      try {
+        newState = this._continueRepairs(state, response);
+        log.info("continueRepairs has next state", newState);
+      } catch (ex) {
+        if (!(ex instanceof AbortRepairError)) {
+          throw ex;
+        }
+        log.info(`Repair has been aborted: ${ex.reason}`);
+        newState = STATE.ABORTED;
+        abortReason = ex.reason;
+      }
+
+      if (newState == STATE.ABORTED) {
+        break;
+      }
+
+      this._currentState = newState;
+      Services.prefs.savePrefFile(null); // flush prefs.
+      if (state == newState) {
+        break;
+      }
+    }
+    if (state != newState) {
+      log.error("continueRepairs spun without getting a new state");
+    }
+    if (newState == STATE.FINISHED || newState == STATE.ABORTED) {
+      let object = newState == STATE.FINISHED ? "finished" : "aborted";
+      let extra = {
+        flowID: this._flowID,
+        numIDs: this._currentIDs.length.toString(),
+      };
+      if (abortReason) {
+        extra.reason = abortReason;
+      }
+      this.service.recordTelemetryEvent("repair", object, undefined, extra);
+      // reset our state and flush our prefs.
+      this.prefs.resetBranch();
+      Services.prefs.savePrefFile(null); // flush prefs.
+    }
+    return true;
+  }
+
+  _continueRepairs(state, response = null) {
+    if (this.anyClientsRepairing(this._flowID)) {
+      throw new AbortRepairError("other clients repairing");
+    }
+    switch (state) {
+      case STATE.SENT_REQUEST:
+      case STATE.SENT_SECOND_REQUEST:
+        let flowID = this._flowID;
+        let clientID = this._currentRemoteClient;
+        if (!clientID) {
+          throw new AbortRepairError(`In state ${state} but have no client IDs listed`);
+        }
+        if (response) {
+          // We got an explicit response - let's see how we went.
+          state = this._handleResponse(state, response);
+          break;
+        }
+        // So we've sent a request - and don't yet have a response. See if the
+        // client we sent it to has removed it from its list (ie, whether it
+        // has synced since we wrote the request.)
+        let client = this.service.clientsEngine.remoteClient(clientID);
+        if (!client) {
+          // hrmph - the client has disappeared.
+          log.info(`previously requested client "${clientID}" has vanished - moving to next step`);
+          state = STATE.NEED_NEW_CLIENT;
+          let extra = {
+            deviceID: this.service.identity.hashedDeviceID(clientID),
+            flowID,
+          }
+          this.service.recordTelemetryEvent("repair", "abandon", "missing", extra);
+          break;
+        }
+        if (this._isCommandPending(clientID, flowID)) {
+          // So the command we previously sent is still queued for the client
+          // (ie, that client is yet to have synced). Let's see if we should
+          // give up on that client.
+          let lastRequestSent = this.prefs.get(PREF.REPAIR_WHEN);
+          let timeLeft = lastRequestSent + RESPONSE_INTERVAL_TIMEOUT - this._now();
+          if (timeLeft <= 0) {
+            log.info(`previous request to client ${clientID} is pending, but has taken too long`);
+            state = STATE.NEED_NEW_CLIENT;
+            // XXX - should we remove the command?
+            let extra = {
+              deviceID: this.service.identity.hashedDeviceID(clientID),
+              flowID,
+            }
+            this.service.recordTelemetryEvent("repair", "abandon", "silent", extra);
+            break;
+          }
+          // Let's continue to wait for that client to respond.
+          log.trace(`previous request to client ${clientID} has ${timeLeft} seconds before we give up on it`);
+          break;
+        }
+        // The command isn't pending - if this was the first request, we give
+        // it another go (as that client may have cleared the command but is yet
+        // to complete the sync)
+        // XXX - note that this is no longer true - the responders don't remove
+        // their command until they have written a response. This might mean
+        // we could drop the entire STATE.SENT_SECOND_REQUEST concept???
+        if (state == STATE.SENT_REQUEST) {
+          log.info(`previous request to client ${clientID} was removed - trying a second time`);
+          state = STATE.SENT_SECOND_REQUEST;
+          this._writeRequest(clientID);
+        } else {
+          // this was the second time around, so give up on this client
+          log.info(`previous 2 requests to client ${clientID} were removed - need a new client`);
+          state = STATE.NEED_NEW_CLIENT;
+        }
+        break;
+
+      case STATE.NEED_NEW_CLIENT:
+        // We need to find a new client to request.
+        let newClientID = this._findNextClient();
+        if (!newClientID) {
+          state = STATE.FINISHED;
+          break;
+        }
+        this._addToPreviousRemoteClients(this._currentRemoteClient);
+        this._currentRemoteClient = newClientID;
+        this._writeRequest(newClientID);
+        state = STATE.SENT_REQUEST;
+        break;
+
+      case STATE.ABORTED:
+        break; // our caller will take the abort action.
+
+      case STATE.FINISHED:
+        break;
+
+      case NOT_REPAIRING:
+        // No repair is in progress. This is a common case, so only log trace.
+        log.trace("continue repairs called but no repair in progress.");
+        break;
+
+      default:
+        log.error(`continue repairs finds itself in an unknown state ${state}`);
+        state = STATE.ABORTED;
+        break;
+
+    }
+    return state;
+  }
+
+  /* Handle being in the SENT_REQUEST or SENT_SECOND_REQUEST state with an
+     explicit response.
+  */
+  _handleResponse(state, response) {
+    let clientID = this._currentRemoteClient;
+    let flowID = this._flowID;
+
+    if (response.flowID != flowID || response.clientID != clientID ||
+        response.request != "upload") {
+      log.info("got a response to a different repair request", response);
+      // hopefully just a stale request that finally came in (either from
+      // an entirely different repair flow, or from a client we've since
+      // given up on.) It doesn't mean we need to abort though...
+      return state;
+    }
+    // Pull apart the response and see if it provided everything we asked for.
+    let remainingIDs = Array.from(CommonUtils.difference(this._currentIDs, response.ids));
+    log.info(`repair response from ${clientID} provided "${response.ids}", remaining now "${remainingIDs}"`);
+    this._currentIDs = remainingIDs;
+    if (remainingIDs.length) {
+      // try a new client for the remaining ones.
+      state = STATE.NEED_NEW_CLIENT;
+    } else {
+      state = STATE.FINISHED;
+    }
+    // record telemetry about this
+    let extra = {
+      deviceID: this.service.identity.hashedDeviceID(clientID),
+      flowID,
+      numIDs: response.ids.length.toString(),
+    }
+    this.service.recordTelemetryEvent("repair", "response", "upload", extra);
+    return state;
+  }
+
+  /* Issue a repair request to a specific client.
+  */
+  _writeRequest(clientID) {
+    log.trace("writing repair request to client", clientID);
+    let ids = this._currentIDs;
+    if (!ids) {
+      throw new AbortRepairError("Attempting to write a request, but there are no IDs");
+    }
+    let flowID = this._flowID;
+    // Post a command to that client.
+    let request = {
+      collection: "bookmarks",
+      request: "upload",
+      requestor: this.service.clientsEngine.localID,
+      ids,
+      flowID,
+    }
+    this.service.clientsEngine.sendCommand("repairRequest", [request], clientID, { flowID });
+    this.prefs.set(PREF.REPAIR_WHEN, Math.floor(this._now()));
+    // record telemetry about this
+    let extra = {
+      deviceID: this.service.identity.hashedDeviceID(clientID),
+      flowID,
+      numIDs: ids.length.toString(),
+    }
+    this.service.recordTelemetryEvent("repair", "request", "upload", extra);
+  }
+
+  _findNextClient() {
+    let alreadyDone = this._getPreviousRemoteClients();
+    alreadyDone.push(this._currentRemoteClient);
+    let remoteClients = this.service.clientsEngine.remoteClients;
+    // we want to consider the most-recently synced clients first.
+    remoteClients.sort((a, b) => b.serverLastModified - a.serverLastModified);
+    for (let client of remoteClients) {
+      log.trace("findNextClient considering", client);
+      if (alreadyDone.indexOf(client.id) == -1 && this._isSuitableClient(client)) {
+        return client.id;
+      }
+    }
+    log.trace("findNextClient found no client");
+    return null;
+  }
+
+  /* Is the passed client record suitable as a repair responder?
+  */
+  _isSuitableClient(client) {
+    // filter only desktop firefox running > 53 (ie, any 54)
+    return (client.type == DEVICE_TYPE_DESKTOP &&
+            Services.vc.compare(client.version, 53) > 0);
+  }
+
+  /* Is our command still in the "commands" queue for the specific client?
+  */
+  _isCommandPending(clientID, flowID) {
+    // getClientCommands() is poorly named - it's only outgoing commands
+    // from us we have yet to write. For our purposes, we want to check
+    // them and commands previously written (which is in .commands)
+    let commands = [...this.service.clientsEngine.getClientCommands(clientID),
+                    ...this.service.clientsEngine.remoteClient(clientID).commands || []];
+    for (let command of commands) {
+      if (command.command != "repairRequest" || command.args.length != 1) {
+        continue;
+      }
+      let arg = command.args[0];
+      if (arg.collection == "bookmarks" && arg.request == "upload" &&
+          arg.flowID == flowID) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // accessors for our prefs.
+  get _currentState() {
+    return this.prefs.get(PREF.CURRENT_STATE, STATE.NOT_REPAIRING);
+  }
+  set _currentState(newState) {
+    this.prefs.set(PREF.CURRENT_STATE, newState);
+  }
+
+  get _currentIDs() {
+    let ids = this.prefs.get(PREF.REPAIR_MISSING_IDS, "");
+    return ids.length ? ids.split(" ") : [];
+  }
+  set _currentIDs(arrayOfIDs) {
+    this.prefs.set(PREF.REPAIR_MISSING_IDS, arrayOfIDs.join(" "));
+  }
+
+  get _currentRemoteClient() {
+    return this.prefs.get(PREF.REPAIR_CURRENT_CLIENT);
+  }
+  set _currentRemoteClient(clientID) {
+    this.prefs.set(PREF.REPAIR_CURRENT_CLIENT, clientID);
+  }
+
+  get _flowID() {
+    return this.prefs.get(PREF.REPAIR_ID);
+  }
+  set _flowID(val) {
+    this.prefs.set(PREF.REPAIR_ID, val);
+  }
+
+  // use a function for this pref to offer somewhat sane semantics.
+  _getPreviousRemoteClients() {
+    let alreadyDone = this.prefs.get(PREF.REPAIR_PREVIOUS_CLIENTS, "");
+    return alreadyDone.length ? alreadyDone.split(" ") : [];
+  }
+  _addToPreviousRemoteClients(clientID) {
+    let arrayOfClientIDs = this._getPreviousRemoteClients();
+    arrayOfClientIDs.push(clientID);
+    this.prefs.set(PREF.REPAIR_PREVIOUS_CLIENTS, arrayOfClientIDs.join(" "));
+  }
+
+  /* Used for test mocks.
+  */
+  _now() {
+    // We use the server time, which is SECONDS
+    return AsyncResource.serverTime;
+  }
+}
--- a/services/sync/modules/collection_repair.js
+++ b/services/sync/modules/collection_repair.js
@@ -3,23 +3,27 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 "use strict";
 
 const Cu = Components.utils;
 
 Cu.import("resource://gre/modules/XPCOMUtils.jsm");
 Cu.import("resource://services-sync/main.js");
 
+XPCOMUtils.defineLazyModuleGetter(this, "BookmarkRepairRequestor",
+  "resource://services-sync/bookmark_repair.js");
+
 this.EXPORTED_SYMBOLS = ["getRepairRequestor", "getAllRepairRequestors",
                          "CollectionRepairRequestor",
                          "getRepairResponder",
                          "CollectionRepairResponder"];
 
 // The individual requestors/responders, lazily loaded.
 const REQUESTORS = {
+  bookmarks: ["bookmark_repair.js", "BookmarkRepairRequestor"],
 }
 
 const RESPONDERS = {
 }
 
 // Should we maybe enforce the requestors being a singleton?
 function _getRepairConstructor(which, collection) {
   if (!(collection in which)) {
--- a/services/sync/moz.build
+++ b/services/sync/moz.build
@@ -14,16 +14,17 @@ XPCSHELL_TESTS_MANIFESTS += ['tests/unit
 EXTRA_COMPONENTS += [
     'SyncComponents.manifest',
     'Weave.js',
 ]
 
 EXTRA_JS_MODULES['services-sync'] += [
     'modules/addonsreconciler.js',
     'modules/addonutils.js',
+    'modules/bookmark_repair.js',
     'modules/bookmark_validator.js',
     'modules/browserid_identity.js',
     'modules/collection_repair.js',
     'modules/collection_validator.js',
     'modules/doctor.js',
     'modules/engines.js',
     'modules/keys.js',
     'modules/main.js',
new file mode 100644
--- /dev/null
+++ b/services/sync/tests/unit/test_bookmark_repair_requestor.js
@@ -0,0 +1,514 @@
+/* Any copyright is dedicated to the Public Domain.
+   http://creativecommons.org/publicdomain/zero/1.0/ */
+Cu.import("resource://services-sync/bookmark_repair.js");
+
+initTestLogging("Trace");
+
+function makeClientRecord(id, fields = {}) {
+  return {
+    id,
+    version: fields.version || "54.0a1",
+    type: fields.type || "desktop",
+    stale: fields.stale || false,
+    serverLastModified: fields.serverLastModified || 0,
+  }
+}
+
+class MockClientsEngine {
+  constructor(clientList) {
+    this._clientList = clientList;
+    this._sentCommands = {}
+  }
+
+  get remoteClients() {
+    return Object.values(this._clientList);
+  }
+
+  remoteClient(id) {
+    return this._clientList[id];
+  }
+
+  sendCommand(command, args, clientID) {
+    let cc = this._sentCommands[clientID] || [];
+    cc.push({ command, args });
+    this._sentCommands[clientID] = cc;
+  }
+
+  getClientCommands(clientID) {
+    return this._sentCommands[clientID] || [];
+  }
+}
+
+class MockIdentity {
+  hashedDeviceID(did) {
+    return did; // don't hash it to make testing easier.
+  }
+}
+
+class MockService {
+  constructor(clientList) {
+    this.clientsEngine = new MockClientsEngine(clientList);
+    this.identity = new MockIdentity();
+    this._recordedEvents = [];
+  }
+
+  recordTelemetryEvent(object, method, value, extra = undefined) {
+    this._recordedEvents.push({ method, object, value, extra });
+  }
+}
+
+function checkState(expected) {
+  equal(Services.prefs.getCharPref("services.sync.repairs.bookmarks.state"), expected);
+}
+
+function checkRepairFinished() {
+  try {
+    let state = Services.prefs.getCharPref("services.sync.repairs.bookmarks.state");
+    ok(false, state);
+  } catch (ex) {
+    ok(true, "no repair preference exists");
+  }
+}
+
+function checkOutgoingCommand(service, clientID) {
+  let sent = service.clientsEngine._sentCommands;
+  deepEqual(Object.keys(sent), [clientID]);
+  equal(sent[clientID].length, 1);
+  equal(sent[clientID][0].command, "repairRequest");
+}
+
+function NewBookmarkRepairRequestor(mockService) {
+  let req = new BookmarkRepairRequestor(mockService);
+  req._now = () => Date.now() / 1000; // _now() is seconds.
+  return req;
+}
+
+add_task(async function test_requestor_no_clients() {
+  let mockService = new MockService({ });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+
+  requestor.startRepairs(validationInfo, flowID);
+  // there are no clients, so we should end up in "finished" (which we need to
+  // check via telemetry)
+  deepEqual(mockService._recordedEvents, [
+    { object: "repair",
+      method: "started",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    },
+    { object: "repair",
+      method: "finished",
+      value: undefined,
+      extra: { flowID, numIDs: 3, reason: undefined },
+    }
+  ]);
+});
+
+add_task(async function test_requestor_one_client_no_response() {
+  let mockService = new MockService({ "client-a": makeClientRecord("client-a") });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+  requestor.startRepairs(validationInfo, flowID);
+  // the command should now be outgoing.
+  checkOutgoingCommand(mockService, "client-a");
+
+  checkState(requestor.STATE.SENT_REQUEST);
+  // asking it to continue stays in that state until we timeout or the command
+  // is removed.
+  requestor.continueRepairs();
+  checkState(requestor.STATE.SENT_REQUEST);
+
+  // now pretend that client synced.
+  mockService.clientsEngine._sentCommands = {};
+  requestor.continueRepairs();
+  checkState(requestor.STATE.SENT_SECOND_REQUEST);
+  // the command should be outgoing again.
+  checkOutgoingCommand(mockService, "client-a");
+
+  // pretend that client synced again without writing a command.
+  mockService.clientsEngine._sentCommands = {};
+  requestor.continueRepairs();
+  // There are no more clients, so we've given up.
+
+  checkRepairFinished();
+  deepEqual(mockService._recordedEvents, [
+    { object: "repair",
+      method: "started",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "finished",
+      value: undefined,
+      extra: { flowID, numIDs: 3, reason: undefined },
+    }
+  ]);
+});
+
+add_task(async function test_requestor_one_client_no_sync() {
+  let mockService = new MockService({ "client-a": makeClientRecord("client-a") });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+  requestor.startRepairs(validationInfo, flowID);
+  // the command should now be outgoing.
+  checkOutgoingCommand(mockService, "client-a");
+
+  checkState(requestor.STATE.SENT_REQUEST);
+
+  // pretend we are now in the future.
+  let theFuture = Date.now() + 300000000;
+  requestor._now = () => theFuture;
+
+  requestor.continueRepairs();
+
+  // We should be finished as we gave up in disgust.
+  checkRepairFinished();
+  deepEqual(mockService._recordedEvents, [
+    { object: "repair",
+      method: "started",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "abandon",
+      value: "silent",
+      extra: { flowID, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "finished",
+      value: undefined,
+      extra: { flowID, numIDs: 3, reason: undefined },
+    }
+  ]);
+});
+
+add_task(async function test_requestor_latest_client_used() {
+  let mockService = new MockService({
+    "client-early": makeClientRecord("client-early", { serverLastModified: Date.now() - 10 }),
+    "client-late": makeClientRecord("client-late", { serverLastModified: Date.now() }),
+  });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        { parent: "x", child: "a" },
+      ],
+      orphans: [],
+    }
+  }
+  requestor.startRepairs(validationInfo, Utils.makeGUID());
+  // the repair command should be outgoing to the most-recent client.
+  checkOutgoingCommand(mockService, "client-late");
+  checkState(requestor.STATE.SENT_REQUEST);
+  // and this test is done - reset the repair.
+  requestor.prefs.resetBranch();
+});
+
+add_task(async function test_requestor_client_vanishes() {
+  let mockService = new MockService({
+    "client-a": makeClientRecord("client-a"),
+    "client-b": makeClientRecord("client-b"),
+  });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+  requestor.startRepairs(validationInfo, flowID);
+  // the command should now be outgoing.
+  checkOutgoingCommand(mockService, "client-a");
+
+  checkState(requestor.STATE.SENT_REQUEST);
+
+  mockService.clientsEngine._sentCommands = {};
+  // Now let's pretend the client vanished.
+  delete mockService.clientsEngine._clientList["client-a"];
+
+  requestor.continueRepairs();
+  // We should have moved on to client-b.
+  checkState(requestor.STATE.SENT_REQUEST);
+  checkOutgoingCommand(mockService, "client-b");
+
+  // Now let's pretend client B wrote all missing IDs.
+  let response = {
+    collection: "bookmarks",
+    request: "upload",
+    flowID: requestor._flowID,
+    clientID: "client-b",
+    ids: ["a", "b", "c"],
+  }
+  requestor.continueRepairs(response);
+
+  // We should be finished as we got all our IDs.
+  checkRepairFinished();
+  deepEqual(mockService._recordedEvents, [
+    { object: "repair",
+      method: "started",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "abandon",
+      value: "missing",
+      extra: { flowID, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-b" },
+    },
+    { object: "repair",
+      method: "response",
+      value: "upload",
+      extra: { flowID, deviceID: "client-b", numIDs: 3 },
+    },
+    { object: "repair",
+      method: "finished",
+      value: undefined,
+      extra: { flowID, numIDs: 0, reason: undefined },
+    }
+  ]);
+});
+
+add_task(async function test_requestor_success_responses() {
+  let mockService = new MockService({
+    "client-a": makeClientRecord("client-a"),
+    "client-b": makeClientRecord("client-b"),
+  });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+  requestor.startRepairs(validationInfo, flowID);
+  // the command should now be outgoing.
+  checkOutgoingCommand(mockService, "client-a");
+
+  checkState(requestor.STATE.SENT_REQUEST);
+
+  mockService.clientsEngine._sentCommands = {};
+  // Now let's pretend the client wrote a response.
+  let response = {
+    collection: "bookmarks",
+    request: "upload",
+    clientID: "client-a",
+    flowID: requestor._flowID,
+    ids: ["a", "b"],
+  }
+  requestor.continueRepairs(response);
+  // We should have moved on to client 2.
+  checkState(requestor.STATE.SENT_REQUEST);
+  checkOutgoingCommand(mockService, "client-b");
+
+  // Now let's pretend client B write the missing ID.
+  response = {
+    collection: "bookmarks",
+    request: "upload",
+    clientID: "client-b",
+    flowID: requestor._flowID,
+    ids: ["c"],
+  }
+  requestor.continueRepairs(response);
+
+  // We should be finished as we got all our IDs.
+  checkRepairFinished();
+  deepEqual(mockService._recordedEvents, [
+    { object: "repair",
+      method: "started",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "response",
+      value: "upload",
+      extra: { flowID, deviceID: "client-a", numIDs: 2 },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 1, deviceID: "client-b" },
+    },
+    { object: "repair",
+      method: "response",
+      value: "upload",
+      extra: { flowID, deviceID: "client-b", numIDs: 1 },
+    },
+    { object: "repair",
+      method: "finished",
+      value: undefined,
+      extra: { flowID, numIDs: 0, reason: undefined },
+    }
+  ]);
+});
+
+add_task(async function test_client_suitability() {
+  let mockService = new MockService({
+    "client-a": makeClientRecord("client-a"),
+    "client-b": makeClientRecord("client-b", { type: "mobile" }),
+    "client-c": makeClientRecord("client-c", { version: "52.0a1" }),
+    "client-d": makeClientRecord("client-c", { version: "54.0a1" }),
+  });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  ok(requestor._isSuitableClient(mockService.clientsEngine.remoteClient("client-a")));
+  ok(!requestor._isSuitableClient(mockService.clientsEngine.remoteClient("client-b")));
+  ok(!requestor._isSuitableClient(mockService.clientsEngine.remoteClient("client-c")));
+  ok(requestor._isSuitableClient(mockService.clientsEngine.remoteClient("client-d")));
+});
+
+add_task(async function test_requestor_already_repairing_at_start() {
+  let mockService = new MockService({ });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  requestor.anyClientsRepairing = () => true;
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+
+  ok(!requestor.startRepairs(validationInfo, flowID),
+     "Shouldn't start repairs");
+  equal(mockService._recordedEvents.length, 1);
+  equal(mockService._recordedEvents[0].method, "aborted");
+});
+
+add_task(async function test_requestor_already_repairing_continue() {
+  let clientB = makeClientRecord("client-b")
+  let mockService = new MockService({
+    "client-a": makeClientRecord("client-a"),
+    "client-b": clientB
+  });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+  requestor.startRepairs(validationInfo, flowID);
+  // the command should now be outgoing.
+  checkOutgoingCommand(mockService, "client-a");
+
+  checkState(requestor.STATE.SENT_REQUEST);
+  mockService.clientsEngine._sentCommands = {};
+
+  // Now let's pretend the client wrote a response (it doesn't matter what's in here)
+  let response = {
+    collection: "bookmarks",
+    request: "upload",
+    clientID: "client-a",
+    flowID: requestor._flowID,
+    ids: ["a", "b"],
+  };
+
+  // and another client also started a request
+  clientB.commands = [{
+    args: [{ collection: "bookmarks", flowID: "asdf" }],
+    command: "repairRequest",
+  }];
+
+
+  requestor.continueRepairs(response);
+
+  // We should have aborted now
+  checkRepairFinished();
+  const expected = [
+    { method: "started",
+      object: "repair",
+      value: undefined,
+      extra: { flowID: flowID, numIDs: "3" },
+    },
+    { method: "request",
+      object: "repair",
+      value: "upload",
+      extra: { flowID: flowID, numIDs: "3", deviceID: "client-a" },
+    },
+    { method: "aborted",
+      object: "repair",
+      value: undefined,
+      extra: { flowID: flowID, numIDs: "3", reason: "other clients repairing" },
+    }
+  ];
+
+  deepEqual(mockService._recordedEvents, expected);
+});
--- a/services/sync/tests/unit/xpcshell.ini
+++ b/services/sync/tests/unit/xpcshell.ini
@@ -131,16 +131,17 @@ tags = addons
 [test_bookmark_duping.js]
 [test_bookmark_engine.js]
 [test_bookmark_invalid.js]
 [test_bookmark_legacy_microsummaries_support.js]
 [test_bookmark_livemarks.js]
 [test_bookmark_order.js]
 [test_bookmark_places_query_rewriting.js]
 [test_bookmark_record.js]
+[test_bookmark_repair_requestor.js]
 [test_bookmark_smart_bookmarks.js]
 [test_bookmark_store.js]
 [test_bookmark_decline_undecline.js]
 # Too many intermittent "ASSERTION: thread pool wasn't shutdown: '!mPool'" (bug 804479)
 skip-if = debug
 [test_bookmark_tracker.js]
 requesttimeoutfactor = 4
 [test_bookmark_validator.js]
--- a/tools/lint/eslint/modules.json
+++ b/tools/lint/eslint/modules.json
@@ -15,16 +15,17 @@
   "assertions.js": ["Assert", "Expect"],
   "async.js": ["Async"],
   "AsyncSpellCheckTestHelper.jsm": ["onSpellCheck"],
   "AutoMigrate.jsm": ["AutoMigrate"],
   "Battery.jsm": ["GetBattery", "Battery"],
   "blocklist-clients.js": ["AddonBlocklistClient", "GfxBlocklistClient", "OneCRLBlocklistClient", "PluginBlocklistClient"],
   "blocklist-updater.js": ["checkVersions", "addTestBlocklistClient"],
   "bogus_element_type.jsm": [],
+  "bookmark_repair.js": ["BookmarkRepairRequestor"],
   "bookmark_validator.js": ["BookmarkValidator", "BookmarkProblemData"],
   "bookmarks.js": ["BookmarksEngine", "PlacesItem", "Bookmark", "BookmarkFolder", "BookmarkQuery", "Livemark", "BookmarkSeparator"],
   "bookmarks.jsm": ["PlacesItem", "Bookmark", "Separator", "Livemark", "BookmarkFolder", "DumpBookmarks"],
   "BootstrapMonitor.jsm": ["monitor"],
   "browser-loader.js": ["BrowserLoader"],
   "browserid_identity.js": ["BrowserIDManager", "AuthenticationError"],
   "CertUtils.jsm": ["BadCertHandler", "checkCert", "readCertPrefs", "validateCert"],
   "clients.js": ["ClientEngine", "ClientsRec"],