Bug 1317223 (part 3) - a bookmark repair requestor. r=markh,tcsc
☠☠ backed out by 4c8faad8c333 ☠ ☠
authorMark Hammond <mhammond@skippinet.com.au>
Thu, 02 Mar 2017 17:14:40 +1100
changeset 345591 b7efe4ba02c25a3b04fa0a84bd02d1b4ba511947
parent 345590 705939ae18f00dd3ccba81fc4e77acefdb51561f
child 345592 e91a6d043fc6943c365d56ed34ce07e83f5fc4b9
push id31441
push userkwierso@gmail.com
push dateThu, 02 Mar 2017 22:57:54 +0000
treeherdermozilla-central@b23d6277acca [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmarkh, tcsc
bugs1317223
milestone54.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1317223 (part 3) - a bookmark repair requestor. r=markh,tcsc The bookmark repair requestor takes the validation results and possibly initiates a bookmark repair. MozReview-Commit-ID: 7rRNbBx8Vo3
services/sync/modules/bookmark_repair.js
services/sync/modules/collection_repair.js
services/sync/moz.build
services/sync/tests/unit/test_bookmark_repair_requestor.js
services/sync/tests/unit/xpcshell.ini
tools/lint/eslint/modules.json
new file mode 100644
--- /dev/null
+++ b/services/sync/modules/bookmark_repair.js
@@ -0,0 +1,533 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+"use strict";
+
+const Cu = Components.utils;
+
+this.EXPORTED_SYMBOLS = ["BookmarkRepairRequestor"];
+
+Cu.import("resource://gre/modules/Services.jsm");
+Cu.import("resource://gre/modules/Preferences.jsm");
+Cu.import("resource://gre/modules/Log.jsm");
+Cu.import("resource://services-sync/collection_repair.js");
+Cu.import("resource://services-sync/constants.js");
+Cu.import("resource://services-sync/resource.js");
+Cu.import("resource://services-sync/doctor.js");
+Cu.import("resource://services-common/utils.js");
+
+const log = Log.repository.getLogger("Sync.Engine.Bookmarks.Repair");
+
+const PREF_BRANCH = "services.sync.repairs.bookmarks.";
+
+// How long should we wait after sending a repair request before we give up?
+const RESPONSE_INTERVAL_TIMEOUT = 60 * 60 * 24 * 3; // 3 days
+
+// The maximum number of IDs we will request to be repaired. Beyond this
+// number we assume that trying to repair may do more harm than good and may
+// ask another client to wipe the server and reupload everything. Bug 1341972
+// is tracking that work.
+const MAX_REQUESTED_IDS = 1000;
+
+class AbortRepairError extends Error {
+  constructor(reason) {
+    super();
+    this.reason = reason;
+  }
+}
+
+// The states we can be in.
+const STATE = Object.freeze({
+  NOT_REPAIRING: "",
+
+  // We need to try to find another client to use.
+  NEED_NEW_CLIENT: "repair.need-new-client",
+
+  // We've sent the first request to a client.
+  SENT_REQUEST: "repair.sent",
+
+  // We've retried a request to a client.
+  SENT_SECOND_REQUEST: "repair.sent-again",
+
+  // There were no problems, but we've gone as far as we can.
+  FINISHED: "repair.finished",
+
+  // We've found an error that forces us to abort this entire repair cycle.
+  ABORTED: "repair.aborted",
+});
+
+// The preferences we use to hold our state.
+const PREF = Object.freeze({
+  // If a repair is in progress, this is the generated GUID for the "flow ID".
+  REPAIR_ID: "flowID",
+
+  // The IDs we are currently trying to obtain via the repair process, space sep'd.
+  REPAIR_MISSING_IDS: "ids",
+
+  // The ID of the client we're currently trying to get the missing items from.
+  REPAIR_CURRENT_CLIENT: "currentClient",
+
+  // The IDs of the clients we've previously tried to get the missing items
+  // from, space sep'd.
+  REPAIR_PREVIOUS_CLIENTS: "previousClients",
+
+  // The time, in seconds, when we initiated the most recent client request.
+  REPAIR_WHEN: "when",
+
+  // Our current state.
+  CURRENT_STATE: "state",
+});
+
+class BookmarkRepairRequestor extends CollectionRepairRequestor {
+  constructor(service = null) {
+    super(service);
+    this.prefs = new Preferences(PREF_BRANCH);
+  }
+
+  /* Exposed incase another module needs to understand our state
+  */
+  get STATE() {
+    return STATE;
+  }
+
+  /* Check if any other clients connected to our account are current performing
+     a repair. A thin wrapper which exists mainly for mocking during tests.
+  */
+  anyClientsRepairing(flowID) {
+    return Doctor.anyClientsRepairing(this.service, "bookmarks", flowID);
+  }
+
+  /* Return a set of IDs we should request.
+  */
+  getProblemIDs(validationInfo) {
+    // Set of ids of "known bad records". Many of the validation issues will
+    // report duplicates -- if the server is missing a record, it is unlikely
+    // to cause only a single problem.
+    let ids = new Set();
+
+    // Note that we allow any of the validation problem fields to be missing so
+    // that tests have a slightly easier time, hence the `|| []` in each loop.
+
+    // Missing children records when the parent exists but a child doesn't.
+    for (let { child } of validationInfo.problems.missingChildren || []) {
+      ids.add(child);
+    }
+    if (ids.size > MAX_REQUESTED_IDS) {
+      return ids; // might as well give up here - we aren't going to repair.
+    }
+
+    // Orphans are when the child exists but the parent doesn't.
+    // This could either be a problem in the child (it's wrong about the node
+    // that should be its parent), or the parent could simply be missing.
+    for (let { parent, id } of validationInfo.problems.orphans || []) {
+      // Request both, to handle both cases
+      ids.add(id);
+      ids.add(parent);
+    }
+    if (ids.size > MAX_REQUESTED_IDS) {
+      return ids; // might as well give up here - we aren't going to repair.
+    }
+
+    // Entries where we have the parent but know for certain that the child was
+    // deleted.
+    for (let { parent } of validationInfo.problems.deletedChildren || []) {
+      ids.add(parent);
+    }
+    if (ids.size > MAX_REQUESTED_IDS) {
+      return ids; // might as well give up here - we aren't going to repair.
+    }
+
+    // Entries where the child references a parent that we don't have, but we
+    // know why: the parent was deleted.
+    for (let { child } of validationInfo.problems.deletedParents || []) {
+      ids.add(child);
+    }
+    if (ids.size > MAX_REQUESTED_IDS) {
+      return ids; // might as well give up here - we aren't going to repair.
+    }
+
+    // Cases where the parent and child disagree about who the parent is.
+    for (let { parent, child } of validationInfo.problems.parentChildMismatches || []) {
+      // Request both, since we don't know which is right.
+      ids.add(parent);
+      ids.add(child);
+    }
+    if (ids.size > MAX_REQUESTED_IDS) {
+      return ids; // might as well give up here - we aren't going to repair.
+    }
+
+    // Cases where multiple parents reference a child. We re-request both the
+    // child, and all the parents that think they own it. This may be more than
+    // we need, but I don't think we can safely make the assumption that the
+    // child is right.
+    for (let { parents, child } of validationInfo.problems.multipleParents || []) {
+      for (let parent of parents) {
+        ids.add(parent);
+      }
+      ids.add(child);
+    }
+
+    // XXX - any others we should consider?
+    return ids;
+  }
+
+  /* See if the repairer is willing and able to begin a repair process given
+     the specified validation information.
+     Returns true if a repair was started and false otherwise.
+  */
+  startRepairs(validationInfo, flowID) {
+    if (this._currentState != STATE.NOT_REPAIRING) {
+      log.info(`Can't start a repair - repair with ID ${this._flowID} is already in progress`);
+      return false;
+    }
+
+    if (this.anyClientsRepairing()) {
+      log.info("Can't start repair, since other clients are already repairing bookmarks");
+      let extra = { flowID, reason: "other clients repairing" };
+      this.service.recordTelemetryEvent("repair", "aborted", undefined, extra)
+      return false;
+    }
+
+    let ids = this.getProblemIDs(validationInfo);
+    if (ids.size > MAX_REQUESTED_IDS) {
+      log.info("Not starting a repair as there are over " + MAX_REQUESTED_IDS + " problems");
+      let extra = { flowID, reason: `too many problems: ${ids.size}` };
+      this.service.recordTelemetryEvent("repair", "aborted", undefined, extra)
+      return false;
+    }
+
+    if (ids.size == 0) {
+      log.info("Not starting a repair as there are no problems");
+      return false;
+    }
+
+    log.info(`Starting a repair, looking for ${ids.size} missing item(s)`);
+    // setup our prefs to indicate we are on our way.
+    this._flowID = flowID;
+    this._currentIDs = Array.from(ids);
+    this._currentState = STATE.NEED_NEW_CLIENT;
+    this.service.recordTelemetryEvent("repair", "started", undefined, { flowID, numIDs: ids.size.toString() });
+    return this.continueRepairs();
+  }
+
+  /* Work out what state our current repair request is in, and whether it can
+     proceed to a new state.
+     Returns true if we could continue the repair - even if the state didn't
+     actually move. Returns false if we aren't actually repairing.
+  */
+  continueRepairs(response = null) {
+    // Note that "ABORTED" and "FINISHED" should never be current when this
+    // function returns - this function resets to NOT_REPAIRING in those cases.
+    if (this._currentState == STATE.NOT_REPAIRING) {
+      return false;
+    }
+
+    let state, newState;
+    let abortReason;
+    // we loop until the state doesn't change - but enforce a max of 10 times
+    // to prevent errors causing infinite loops.
+    for (let i = 0; i < 10; i++) {
+      state = this._currentState;
+      log.info("continueRepairs starting with state", state);
+      try {
+        newState = this._continueRepairs(state, response);
+        log.info("continueRepairs has next state", newState);
+      } catch (ex) {
+        if (!(ex instanceof AbortRepairError)) {
+          throw ex;
+        }
+        log.info(`Repair has been aborted: ${ex.reason}`);
+        newState = STATE.ABORTED;
+        abortReason = ex.reason;
+      }
+
+      if (newState == STATE.ABORTED) {
+        break;
+      }
+
+      this._currentState = newState;
+      Services.prefs.savePrefFile(null); // flush prefs.
+      if (state == newState) {
+        break;
+      }
+    }
+    if (state != newState) {
+      log.error("continueRepairs spun without getting a new state");
+    }
+    if (newState == STATE.FINISHED || newState == STATE.ABORTED) {
+      let object = newState == STATE.FINISHED ? "finished" : "aborted";
+      let extra = {
+        flowID: this._flowID,
+        numIDs: this._currentIDs.length.toString(),
+      };
+      if (abortReason) {
+        extra.reason = abortReason;
+      }
+      this.service.recordTelemetryEvent("repair", object, undefined, extra);
+      // reset our state and flush our prefs.
+      this.prefs.resetBranch();
+      Services.prefs.savePrefFile(null); // flush prefs.
+    }
+    return true;
+  }
+
+  _continueRepairs(state, response = null) {
+    if (this.anyClientsRepairing(this._flowID)) {
+      throw new AbortRepairError("other clients repairing");
+    }
+    switch (state) {
+      case STATE.SENT_REQUEST:
+      case STATE.SENT_SECOND_REQUEST:
+        let flowID = this._flowID;
+        let clientID = this._currentRemoteClient;
+        if (!clientID) {
+          throw new AbortRepairError(`In state ${state} but have no client IDs listed`);
+        }
+        if (response) {
+          // We got an explicit response - let's see how we went.
+          state = this._handleResponse(state, response);
+          break;
+        }
+        // So we've sent a request - and don't yet have a response. See if the
+        // client we sent it to has removed it from its list (ie, whether it
+        // has synced since we wrote the request.)
+        let client = this.service.clientsEngine.remoteClient(clientID);
+        if (!client) {
+          // hrmph - the client has disappeared.
+          log.info(`previously requested client "${clientID}" has vanished - moving to next step`);
+          state = STATE.NEED_NEW_CLIENT;
+          let extra = {
+            deviceID: this.service.identity.hashedDeviceID(clientID),
+            flowID,
+          }
+          this.service.recordTelemetryEvent("repair", "abandon", "missing", extra);
+          break;
+        }
+        if (this._isCommandPending(clientID, flowID)) {
+          // So the command we previously sent is still queued for the client
+          // (ie, that client is yet to have synced). Let's see if we should
+          // give up on that client.
+          let lastRequestSent = this.prefs.get(PREF.REPAIR_WHEN);
+          let timeLeft = lastRequestSent + RESPONSE_INTERVAL_TIMEOUT - this._now();
+          if (timeLeft <= 0) {
+            log.info(`previous request to client ${clientID} is pending, but has taken too long`);
+            state = STATE.NEED_NEW_CLIENT;
+            // XXX - should we remove the command?
+            let extra = {
+              deviceID: this.service.identity.hashedDeviceID(clientID),
+              flowID,
+            }
+            this.service.recordTelemetryEvent("repair", "abandon", "silent", extra);
+            break;
+          }
+          // Let's continue to wait for that client to respond.
+          log.trace(`previous request to client ${clientID} has ${timeLeft} seconds before we give up on it`);
+          break;
+        }
+        // The command isn't pending - if this was the first request, we give
+        // it another go (as that client may have cleared the command but is yet
+        // to complete the sync)
+        // XXX - note that this is no longer true - the responders don't remove
+        // their command until they have written a response. This might mean
+        // we could drop the entire STATE.SENT_SECOND_REQUEST concept???
+        if (state == STATE.SENT_REQUEST) {
+          log.info(`previous request to client ${clientID} was removed - trying a second time`);
+          state = STATE.SENT_SECOND_REQUEST;
+          this._writeRequest(clientID);
+        } else {
+          // this was the second time around, so give up on this client
+          log.info(`previous 2 requests to client ${clientID} were removed - need a new client`);
+          state = STATE.NEED_NEW_CLIENT;
+        }
+        break;
+
+      case STATE.NEED_NEW_CLIENT:
+        // We need to find a new client to request.
+        let newClientID = this._findNextClient();
+        if (!newClientID) {
+          state = STATE.FINISHED;
+          break;
+        }
+        this._addToPreviousRemoteClients(this._currentRemoteClient);
+        this._currentRemoteClient = newClientID;
+        this._writeRequest(newClientID);
+        state = STATE.SENT_REQUEST;
+        break;
+
+      case STATE.ABORTED:
+        break; // our caller will take the abort action.
+
+      case STATE.FINISHED:
+        break;
+
+      case NOT_REPAIRING:
+        // No repair is in progress. This is a common case, so only log trace.
+        log.trace("continue repairs called but no repair in progress.");
+        break;
+
+      default:
+        log.error(`continue repairs finds itself in an unknown state ${state}`);
+        state = STATE.ABORTED;
+        break;
+
+    }
+    return state;
+  }
+
+  /* Handle being in the SENT_REQUEST or SENT_SECOND_REQUEST state with an
+     explicit response.
+  */
+  _handleResponse(state, response) {
+    let clientID = this._currentRemoteClient;
+    let flowID = this._flowID;
+
+    if (response.flowID != flowID || response.clientID != clientID ||
+        response.request != "upload") {
+      log.info("got a response to a different repair request", response);
+      // hopefully just a stale request that finally came in (either from
+      // an entirely different repair flow, or from a client we've since
+      // given up on.) It doesn't mean we need to abort though...
+      return state;
+    }
+    // Pull apart the response and see if it provided everything we asked for.
+    let remainingIDs = Array.from(CommonUtils.difference(this._currentIDs, response.ids));
+    log.info(`repair response from ${clientID} provided "${response.ids}", remaining now "${remainingIDs}"`);
+    this._currentIDs = remainingIDs;
+    if (remainingIDs.length) {
+      // try a new client for the remaining ones.
+      state = STATE.NEED_NEW_CLIENT;
+    } else {
+      state = STATE.FINISHED;
+    }
+    // record telemetry about this
+    let extra = {
+      deviceID: this.service.identity.hashedDeviceID(clientID),
+      flowID,
+      numIDs: response.ids.length.toString(),
+    }
+    this.service.recordTelemetryEvent("repair", "response", "upload", extra);
+    return state;
+  }
+
+  /* Issue a repair request to a specific client.
+  */
+  _writeRequest(clientID) {
+    log.trace("writing repair request to client", clientID);
+    let ids = this._currentIDs;
+    if (!ids) {
+      throw new AbortRepairError("Attempting to write a request, but there are no IDs");
+    }
+    let flowID = this._flowID;
+    // Post a command to that client.
+    let request = {
+      collection: "bookmarks",
+      request: "upload",
+      requestor: this.service.clientsEngine.localID,
+      ids,
+      flowID,
+    }
+    this.service.clientsEngine.sendCommand("repairRequest", [request], clientID, { flowID });
+    this.prefs.set(PREF.REPAIR_WHEN, Math.floor(this._now()));
+    // record telemetry about this
+    let extra = {
+      deviceID: this.service.identity.hashedDeviceID(clientID),
+      flowID,
+      numIDs: ids.length.toString(),
+    }
+    this.service.recordTelemetryEvent("repair", "request", "upload", extra);
+  }
+
+  _findNextClient() {
+    let alreadyDone = this._getPreviousRemoteClients();
+    alreadyDone.push(this._currentRemoteClient);
+    let remoteClients = this.service.clientsEngine.remoteClients;
+    // we want to consider the most-recently synced clients first.
+    remoteClients.sort((a, b) => b.serverLastModified - a.serverLastModified);
+    for (let client of remoteClients) {
+      log.trace("findNextClient considering", client);
+      if (alreadyDone.indexOf(client.id) == -1 && this._isSuitableClient(client)) {
+        return client.id;
+      }
+    }
+    log.trace("findNextClient found no client");
+    return null;
+  }
+
+  /* Is the passed client record suitable as a repair responder?
+  */
+  _isSuitableClient(client) {
+    // filter only desktop firefox running > 53 (ie, any 54)
+    return (client.type == DEVICE_TYPE_DESKTOP &&
+            Services.vc.compare(client.version, 53) > 0);
+  }
+
+  /* Is our command still in the "commands" queue for the specific client?
+  */
+  _isCommandPending(clientID, flowID) {
+    // getClientCommands() is poorly named - it's only outgoing commands
+    // from us we have yet to write. For our purposes, we want to check
+    // them and commands previously written (which is in .commands)
+    let commands = [...this.service.clientsEngine.getClientCommands(clientID),
+                    ...this.service.clientsEngine.remoteClient(clientID).commands || []];
+    for (let command of commands) {
+      if (command.command != "repairRequest" || command.args.length != 1) {
+        continue;
+      }
+      let arg = command.args[0];
+      if (arg.collection == "bookmarks" && arg.request == "upload" &&
+          arg.flowID == flowID) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // accessors for our prefs.
+  get _currentState() {
+    return this.prefs.get(PREF.CURRENT_STATE, STATE.NOT_REPAIRING);
+  }
+  set _currentState(newState) {
+    this.prefs.set(PREF.CURRENT_STATE, newState);
+  }
+
+  get _currentIDs() {
+    let ids = this.prefs.get(PREF.REPAIR_MISSING_IDS, "");
+    return ids.length ? ids.split(" ") : [];
+  }
+  set _currentIDs(arrayOfIDs) {
+    this.prefs.set(PREF.REPAIR_MISSING_IDS, arrayOfIDs.join(" "));
+  }
+
+  get _currentRemoteClient() {
+    return this.prefs.get(PREF.REPAIR_CURRENT_CLIENT);
+  }
+  set _currentRemoteClient(clientID) {
+    this.prefs.set(PREF.REPAIR_CURRENT_CLIENT, clientID);
+  }
+
+  get _flowID() {
+    return this.prefs.get(PREF.REPAIR_ID);
+  }
+  set _flowID(val) {
+    this.prefs.set(PREF.REPAIR_ID, val);
+  }
+
+  // use a function for this pref to offer somewhat sane semantics.
+  _getPreviousRemoteClients() {
+    let alreadyDone = this.prefs.get(PREF.REPAIR_PREVIOUS_CLIENTS, "");
+    return alreadyDone.length ? alreadyDone.split(" ") : [];
+  }
+  _addToPreviousRemoteClients(clientID) {
+    let arrayOfClientIDs = this._getPreviousRemoteClients();
+    arrayOfClientIDs.push(clientID);
+    this.prefs.set(PREF.REPAIR_PREVIOUS_CLIENTS, arrayOfClientIDs.join(" "));
+  }
+
+  /* Used for test mocks.
+  */
+  _now() {
+    // We use the server time, which is SECONDS
+    return AsyncResource.serverTime;
+  }
+}
--- a/services/sync/modules/collection_repair.js
+++ b/services/sync/modules/collection_repair.js
@@ -3,23 +3,27 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 "use strict";
 
 const Cu = Components.utils;
 
 Cu.import("resource://gre/modules/XPCOMUtils.jsm");
 Cu.import("resource://services-sync/main.js");
 
+XPCOMUtils.defineLazyModuleGetter(this, "BookmarkRepairRequestor",
+  "resource://services-sync/bookmark_repair.js");
+
 this.EXPORTED_SYMBOLS = ["getRepairRequestor", "getAllRepairRequestors",
                          "CollectionRepairRequestor",
                          "getRepairResponder",
                          "CollectionRepairResponder"];
 
 // The individual requestors/responders, lazily loaded.
 const REQUESTORS = {
+  bookmarks: ["bookmark_repair.js", "BookmarkRepairRequestor"],
 }
 
 const RESPONDERS = {
 }
 
 // Should we maybe enforce the requestors being a singleton?
 function _getRepairConstructor(which, collection) {
   if (!(collection in which)) {
--- a/services/sync/moz.build
+++ b/services/sync/moz.build
@@ -14,16 +14,17 @@ XPCSHELL_TESTS_MANIFESTS += ['tests/unit
 EXTRA_COMPONENTS += [
     'SyncComponents.manifest',
     'Weave.js',
 ]
 
 EXTRA_JS_MODULES['services-sync'] += [
     'modules/addonsreconciler.js',
     'modules/addonutils.js',
+    'modules/bookmark_repair.js',
     'modules/bookmark_validator.js',
     'modules/browserid_identity.js',
     'modules/collection_repair.js',
     'modules/collection_validator.js',
     'modules/doctor.js',
     'modules/engines.js',
     'modules/keys.js',
     'modules/main.js',
new file mode 100644
--- /dev/null
+++ b/services/sync/tests/unit/test_bookmark_repair_requestor.js
@@ -0,0 +1,514 @@
+/* Any copyright is dedicated to the Public Domain.
+   http://creativecommons.org/publicdomain/zero/1.0/ */
+Cu.import("resource://services-sync/bookmark_repair.js");
+
+initTestLogging("Trace");
+
+function makeClientRecord(id, fields = {}) {
+  return {
+    id,
+    version: fields.version || "54.0a1",
+    type: fields.type || "desktop",
+    stale: fields.stale || false,
+    serverLastModified: fields.serverLastModified || 0,
+  }
+}
+
+class MockClientsEngine {
+  constructor(clientList) {
+    this._clientList = clientList;
+    this._sentCommands = {}
+  }
+
+  get remoteClients() {
+    return Object.values(this._clientList);
+  }
+
+  remoteClient(id) {
+    return this._clientList[id];
+  }
+
+  sendCommand(command, args, clientID) {
+    let cc = this._sentCommands[clientID] || [];
+    cc.push({ command, args });
+    this._sentCommands[clientID] = cc;
+  }
+
+  getClientCommands(clientID) {
+    return this._sentCommands[clientID] || [];
+  }
+}
+
+class MockIdentity {
+  hashedDeviceID(did) {
+    return did; // don't hash it to make testing easier.
+  }
+}
+
+class MockService {
+  constructor(clientList) {
+    this.clientsEngine = new MockClientsEngine(clientList);
+    this.identity = new MockIdentity();
+    this._recordedEvents = [];
+  }
+
+  recordTelemetryEvent(object, method, value, extra = undefined) {
+    this._recordedEvents.push({ method, object, value, extra });
+  }
+}
+
+function checkState(expected) {
+  equal(Services.prefs.getCharPref("services.sync.repairs.bookmarks.state"), expected);
+}
+
+function checkRepairFinished() {
+  try {
+    let state = Services.prefs.getCharPref("services.sync.repairs.bookmarks.state");
+    ok(false, state);
+  } catch (ex) {
+    ok(true, "no repair preference exists");
+  }
+}
+
+function checkOutgoingCommand(service, clientID) {
+  let sent = service.clientsEngine._sentCommands;
+  deepEqual(Object.keys(sent), [clientID]);
+  equal(sent[clientID].length, 1);
+  equal(sent[clientID][0].command, "repairRequest");
+}
+
+function NewBookmarkRepairRequestor(mockService) {
+  let req = new BookmarkRepairRequestor(mockService);
+  req._now = () => Date.now() / 1000; // _now() is seconds.
+  return req;
+}
+
+add_task(async function test_requestor_no_clients() {
+  let mockService = new MockService({ });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+
+  requestor.startRepairs(validationInfo, flowID);
+  // there are no clients, so we should end up in "finished" (which we need to
+  // check via telemetry)
+  deepEqual(mockService._recordedEvents, [
+    { object: "repair",
+      method: "started",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    },
+    { object: "repair",
+      method: "finished",
+      value: undefined,
+      extra: { flowID, numIDs: 3, reason: undefined },
+    }
+  ]);
+});
+
+add_task(async function test_requestor_one_client_no_response() {
+  let mockService = new MockService({ "client-a": makeClientRecord("client-a") });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+  requestor.startRepairs(validationInfo, flowID);
+  // the command should now be outgoing.
+  checkOutgoingCommand(mockService, "client-a");
+
+  checkState(requestor.STATE.SENT_REQUEST);
+  // asking it to continue stays in that state until we timeout or the command
+  // is removed.
+  requestor.continueRepairs();
+  checkState(requestor.STATE.SENT_REQUEST);
+
+  // now pretend that client synced.
+  mockService.clientsEngine._sentCommands = {};
+  requestor.continueRepairs();
+  checkState(requestor.STATE.SENT_SECOND_REQUEST);
+  // the command should be outgoing again.
+  checkOutgoingCommand(mockService, "client-a");
+
+  // pretend that client synced again without writing a command.
+  mockService.clientsEngine._sentCommands = {};
+  requestor.continueRepairs();
+  // There are no more clients, so we've given up.
+
+  checkRepairFinished();
+  deepEqual(mockService._recordedEvents, [
+    { object: "repair",
+      method: "started",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "finished",
+      value: undefined,
+      extra: { flowID, numIDs: 3, reason: undefined },
+    }
+  ]);
+});
+
+add_task(async function test_requestor_one_client_no_sync() {
+  let mockService = new MockService({ "client-a": makeClientRecord("client-a") });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+  requestor.startRepairs(validationInfo, flowID);
+  // the command should now be outgoing.
+  checkOutgoingCommand(mockService, "client-a");
+
+  checkState(requestor.STATE.SENT_REQUEST);
+
+  // pretend we are now in the future.
+  let theFuture = Date.now() + 300000000;
+  requestor._now = () => theFuture;
+
+  requestor.continueRepairs();
+
+  // We should be finished as we gave up in disgust.
+  checkRepairFinished();
+  deepEqual(mockService._recordedEvents, [
+    { object: "repair",
+      method: "started",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "abandon",
+      value: "silent",
+      extra: { flowID, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "finished",
+      value: undefined,
+      extra: { flowID, numIDs: 3, reason: undefined },
+    }
+  ]);
+});
+
+add_task(async function test_requestor_latest_client_used() {
+  let mockService = new MockService({
+    "client-early": makeClientRecord("client-early", { serverLastModified: Date.now() - 10 }),
+    "client-late": makeClientRecord("client-late", { serverLastModified: Date.now() }),
+  });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        { parent: "x", child: "a" },
+      ],
+      orphans: [],
+    }
+  }
+  requestor.startRepairs(validationInfo, Utils.makeGUID());
+  // the repair command should be outgoing to the most-recent client.
+  checkOutgoingCommand(mockService, "client-late");
+  checkState(requestor.STATE.SENT_REQUEST);
+  // and this test is done - reset the repair.
+  requestor.prefs.resetBranch();
+});
+
+add_task(async function test_requestor_client_vanishes() {
+  let mockService = new MockService({
+    "client-a": makeClientRecord("client-a"),
+    "client-b": makeClientRecord("client-b"),
+  });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+  requestor.startRepairs(validationInfo, flowID);
+  // the command should now be outgoing.
+  checkOutgoingCommand(mockService, "client-a");
+
+  checkState(requestor.STATE.SENT_REQUEST);
+
+  mockService.clientsEngine._sentCommands = {};
+  // Now let's pretend the client vanished.
+  delete mockService.clientsEngine._clientList["client-a"];
+
+  requestor.continueRepairs();
+  // We should have moved on to client-b.
+  checkState(requestor.STATE.SENT_REQUEST);
+  checkOutgoingCommand(mockService, "client-b");
+
+  // Now let's pretend client B wrote all missing IDs.
+  let response = {
+    collection: "bookmarks",
+    request: "upload",
+    flowID: requestor._flowID,
+    clientID: "client-b",
+    ids: ["a", "b", "c"],
+  }
+  requestor.continueRepairs(response);
+
+  // We should be finished as we got all our IDs.
+  checkRepairFinished();
+  deepEqual(mockService._recordedEvents, [
+    { object: "repair",
+      method: "started",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "abandon",
+      value: "missing",
+      extra: { flowID, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-b" },
+    },
+    { object: "repair",
+      method: "response",
+      value: "upload",
+      extra: { flowID, deviceID: "client-b", numIDs: 3 },
+    },
+    { object: "repair",
+      method: "finished",
+      value: undefined,
+      extra: { flowID, numIDs: 0, reason: undefined },
+    }
+  ]);
+});
+
+add_task(async function test_requestor_success_responses() {
+  let mockService = new MockService({
+    "client-a": makeClientRecord("client-a"),
+    "client-b": makeClientRecord("client-b"),
+  });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+  requestor.startRepairs(validationInfo, flowID);
+  // the command should now be outgoing.
+  checkOutgoingCommand(mockService, "client-a");
+
+  checkState(requestor.STATE.SENT_REQUEST);
+
+  mockService.clientsEngine._sentCommands = {};
+  // Now let's pretend the client wrote a response.
+  let response = {
+    collection: "bookmarks",
+    request: "upload",
+    clientID: "client-a",
+    flowID: requestor._flowID,
+    ids: ["a", "b"],
+  }
+  requestor.continueRepairs(response);
+  // We should have moved on to client 2.
+  checkState(requestor.STATE.SENT_REQUEST);
+  checkOutgoingCommand(mockService, "client-b");
+
+  // Now let's pretend client B write the missing ID.
+  response = {
+    collection: "bookmarks",
+    request: "upload",
+    clientID: "client-b",
+    flowID: requestor._flowID,
+    ids: ["c"],
+  }
+  requestor.continueRepairs(response);
+
+  // We should be finished as we got all our IDs.
+  checkRepairFinished();
+  deepEqual(mockService._recordedEvents, [
+    { object: "repair",
+      method: "started",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "response",
+      value: "upload",
+      extra: { flowID, deviceID: "client-a", numIDs: 2 },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 1, deviceID: "client-b" },
+    },
+    { object: "repair",
+      method: "response",
+      value: "upload",
+      extra: { flowID, deviceID: "client-b", numIDs: 1 },
+    },
+    { object: "repair",
+      method: "finished",
+      value: undefined,
+      extra: { flowID, numIDs: 0, reason: undefined },
+    }
+  ]);
+});
+
+add_task(async function test_client_suitability() {
+  let mockService = new MockService({
+    "client-a": makeClientRecord("client-a"),
+    "client-b": makeClientRecord("client-b", { type: "mobile" }),
+    "client-c": makeClientRecord("client-c", { version: "52.0a1" }),
+    "client-d": makeClientRecord("client-c", { version: "54.0a1" }),
+  });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  ok(requestor._isSuitableClient(mockService.clientsEngine.remoteClient("client-a")));
+  ok(!requestor._isSuitableClient(mockService.clientsEngine.remoteClient("client-b")));
+  ok(!requestor._isSuitableClient(mockService.clientsEngine.remoteClient("client-c")));
+  ok(requestor._isSuitableClient(mockService.clientsEngine.remoteClient("client-d")));
+});
+
+add_task(async function test_requestor_already_repairing_at_start() {
+  let mockService = new MockService({ });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  requestor.anyClientsRepairing = () => true;
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+
+  ok(!requestor.startRepairs(validationInfo, flowID),
+     "Shouldn't start repairs");
+  equal(mockService._recordedEvents.length, 1);
+  equal(mockService._recordedEvents[0].method, "aborted");
+});
+
+add_task(async function test_requestor_already_repairing_continue() {
+  let clientB = makeClientRecord("client-b")
+  let mockService = new MockService({
+    "client-a": makeClientRecord("client-a"),
+    "client-b": clientB
+  });
+  let requestor = NewBookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+  requestor.startRepairs(validationInfo, flowID);
+  // the command should now be outgoing.
+  checkOutgoingCommand(mockService, "client-a");
+
+  checkState(requestor.STATE.SENT_REQUEST);
+  mockService.clientsEngine._sentCommands = {};
+
+  // Now let's pretend the client wrote a response (it doesn't matter what's in here)
+  let response = {
+    collection: "bookmarks",
+    request: "upload",
+    clientID: "client-a",
+    flowID: requestor._flowID,
+    ids: ["a", "b"],
+  };
+
+  // and another client also started a request
+  clientB.commands = [{
+    args: [{ collection: "bookmarks", flowID: "asdf" }],
+    command: "repairRequest",
+  }];
+
+
+  requestor.continueRepairs(response);
+
+  // We should have aborted now
+  checkRepairFinished();
+  const expected = [
+    { method: "started",
+      object: "repair",
+      value: undefined,
+      extra: { flowID: flowID, numIDs: "3" },
+    },
+    { method: "request",
+      object: "repair",
+      value: "upload",
+      extra: { flowID: flowID, numIDs: "3", deviceID: "client-a" },
+    },
+    { method: "aborted",
+      object: "repair",
+      value: undefined,
+      extra: { flowID: flowID, numIDs: "3", reason: "other clients repairing" },
+    }
+  ];
+
+  deepEqual(mockService._recordedEvents, expected);
+});
--- a/services/sync/tests/unit/xpcshell.ini
+++ b/services/sync/tests/unit/xpcshell.ini
@@ -131,16 +131,17 @@ tags = addons
 [test_bookmark_duping.js]
 [test_bookmark_engine.js]
 [test_bookmark_invalid.js]
 [test_bookmark_legacy_microsummaries_support.js]
 [test_bookmark_livemarks.js]
 [test_bookmark_order.js]
 [test_bookmark_places_query_rewriting.js]
 [test_bookmark_record.js]
+[test_bookmark_repair_requestor.js]
 [test_bookmark_smart_bookmarks.js]
 [test_bookmark_store.js]
 [test_bookmark_decline_undecline.js]
 # Too many intermittent "ASSERTION: thread pool wasn't shutdown: '!mPool'" (bug 804479)
 skip-if = debug
 [test_bookmark_tracker.js]
 requesttimeoutfactor = 4
 [test_bookmark_validator.js]
--- a/tools/lint/eslint/modules.json
+++ b/tools/lint/eslint/modules.json
@@ -15,16 +15,17 @@
   "assertions.js": ["Assert", "Expect"],
   "async.js": ["Async"],
   "AsyncSpellCheckTestHelper.jsm": ["onSpellCheck"],
   "AutoMigrate.jsm": ["AutoMigrate"],
   "Battery.jsm": ["GetBattery", "Battery"],
   "blocklist-clients.js": ["AddonBlocklistClient", "GfxBlocklistClient", "OneCRLBlocklistClient", "PluginBlocklistClient"],
   "blocklist-updater.js": ["checkVersions", "addTestBlocklistClient"],
   "bogus_element_type.jsm": [],
+  "bookmark_repair.js": ["BookmarkRepairRequestor"],
   "bookmark_validator.js": ["BookmarkValidator", "BookmarkProblemData"],
   "bookmarks.js": ["BookmarksEngine", "PlacesItem", "Bookmark", "BookmarkFolder", "BookmarkQuery", "Livemark", "BookmarkSeparator"],
   "bookmarks.jsm": ["PlacesItem", "Bookmark", "Separator", "Livemark", "BookmarkFolder", "DumpBookmarks"],
   "BootstrapMonitor.jsm": ["monitor"],
   "browser-loader.js": ["BrowserLoader"],
   "browserid_identity.js": ["BrowserIDManager", "AuthenticationError"],
   "CertUtils.jsm": ["BadCertHandler", "checkCert", "readCertPrefs", "validateCert"],
   "clients.js": ["ClientEngine", "ClientsRec"],