Bug 1055102 - Properly handle Unicode in Bagheera payloads. r=bsmedberg, a=lmandel
authorGregory Szorc <gps@mozilla.com>
Tue, 19 Aug 2014 09:12:12 -0700
changeset 208353 4f18903bc230
parent 208352 61980c2f6177
child 208354 fa7360fe9779
push id3836
push userryanvm@gmail.com
push date2014-08-20 23:01 +0000
treeherdermozilla-beta@f5d4b16203aa [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbsmedberg, lmandel
bugs1055102, 915850
milestone32.0
Bug 1055102 - Properly handle Unicode in Bagheera payloads. r=bsmedberg, a=lmandel It was observed that FHR was sending invalid JSON payloads to the server. Specifically, JSON payloads contained invalid Unicode strings. Investigation revealed that the culprint was CommonUtils.convertString() silently swallowing high bytes. When the Bagheera client went to gzip the JSON payload, the input buffer into gzip was missing high bytes. This patch changes the bagheera client to UTF-8 encode strings before gzip, thus ensuring all data is preserved. A corresponding change was also added to the mock bagheera server implementation. Alternatively, we could have changed CommonUtils.convertString() to be high byte aware. However, many consumers rely on this function. This patch is written with the intent of being uplifted and the change performed is targeted at the specific problem. Tests for Unicode preserving behavior have been added to both the generic Bagheera client and to FHR. The latter test is arguably not necessary, but peace of mind is a good thing, especially with FHR. See also bug 915850.
services/common/bagheeraclient.js
services/common/modules-testing/bagheeraserver.js
services/common/tests/unit/test_bagheera_client.js
services/healthreport/tests/xpcshell/test_healthreporter.js
--- a/services/common/bagheeraclient.js
+++ b/services/common/bagheeraclient.js
@@ -170,16 +170,20 @@ BagheeraClient.prototype = Object.freeze
     if (options.deleteIDs && options.deleteIDs.length > 0) {
       deleteIDs = options.deleteIDs;
       this._log.debug("Will delete " + deleteIDs.join(", "));
       request.setHeader("X-Obsolete-Document", deleteIDs.join(","));
     }
 
     let deferred = Promise.defer();
 
+    // The string converter service used by CommonUtils.convertString()
+    // silently throws away high bytes. We need to convert the string to
+    // consist of only low bytes first.
+    data = CommonUtils.encodeUTF8(data);
     data = CommonUtils.convertString(data, "uncompressed", "deflate");
     if (options.telemetryCompressed) {
       try {
         let h = Services.telemetry.getHistogramById(options.telemetryCompressed);
         h.add(data.length);
       } catch (ex) {
         this._log.warn("Unable to record telemetry for compressed payload size: " +
                        CommonUtils.exceptionStr(ex));
--- a/services/common/modules-testing/bagheeraserver.js
+++ b/services/common/modules-testing/bagheeraserver.js
@@ -251,16 +251,18 @@ BagheeraServer.prototype = {
     }
 
     if (ct.startsWith("application/json+zlib")) {
       this._log.debug("Uncompressing entity body with deflate.");
       requestBody = CommonUtils.convertString(requestBody, "deflate",
                                               "uncompressed");
     }
 
+    requestBody = CommonUtils.decodeUTF8(requestBody);
+
     this._log.debug("HTTP request body: " + requestBody);
 
     let doc;
     try {
       doc = JSON.parse(requestBody);
     } catch(ex) {
       this._log.info("JSON parse error.");
       throw HTTP_400;
--- a/services/common/tests/unit/test_bagheera_client.js
+++ b/services/common/tests/unit/test_bagheera_client.js
@@ -64,16 +64,40 @@ add_test(function test_post_json_bad_dat
     function onResult(result) {
     do_check_true(result.transportSuccess);
     do_check_false(result.serverSuccess);
 
     server.stop(run_next_test);
   });
 });
 
+add_task(function* test_unicode_payload() {
+  let [client, server] = getClientAndServer();
+  server.createNamespace("foo");
+
+  const EXPECTED = "πόλλ' οἶδ' ἀλώπηξ, ἀλλ' ἐχῖνος ἓν μέγα";
+
+  let result = yield client.uploadJSON("foo", "bar", {test: EXPECTED});
+  Assert.ok(result.transportSuccess);
+  Assert.ok(result.serverSuccess);
+
+  let p = server.getDocument("foo", "bar");
+  Assert.equal(p.test, EXPECTED);
+
+  result = yield client.uploadJSON("foo", "baz", JSON.stringify({test: EXPECTED}));
+  Assert.ok(result.transportSuccess);
+  Assert.ok(result.serverSuccess);
+  p = server.getDocument("foo", "baz");
+  Assert.equal(p.test, EXPECTED);
+
+  let deferred = Promise.defer();
+  server.stop(() => deferred.resolve());
+  yield deferred.promise;
+});
+
 add_task(function test_post_delete_multiple_obsolete_documents () {
   let [client, server] = getClientAndServer();
   let namespace = "foo";
   let documents = [
     [namespace, "one", "{v:1}"],
     [namespace, "two", "{v:2}"],
     [namespace, "three", "{v:3}"],
     [namespace, "four", "{v:4}"],
--- a/services/healthreport/tests/xpcshell/test_healthreporter.js
+++ b/services/healthreport/tests/xpcshell/test_healthreporter.js
@@ -1197,8 +1197,64 @@ add_task(function* test_nonstring_client
     yield reporter.init();
 
     do_check_neq(reporter._state.clientID, null);
     do_check_eq(reporter._state.clientID.length, 36);
   } finally {
     reporter._shutdown();
   }
 });
+
+function UnicodeMeasurement() {
+  Metrics.Measurement.call(this);
+}
+
+UnicodeMeasurement.prototype = {
+  __proto__: Metrics.Measurement.prototype,
+
+  name: "unicode",
+  version: 1,
+
+  fields: {
+    "last-text": {type: Metrics.Storage.FIELD_LAST_TEXT},
+  },
+};
+
+function UnicodeProvider() {
+  Metrics.Provider.call(this);
+}
+
+UnicodeProvider.prototype = {
+  __proto__: Metrics.Provider.prototype,
+
+  name: "unicode",
+
+  measurementTypes: [UnicodeMeasurement],
+
+  collectConstantData: function () {
+    return this.enqueueStorageOperation(() => {
+      let m = this.getMeasurement("unicode", 1);
+      return m.setLastText("last-text", "ᄃᄄᄅ");
+    });
+  },
+};
+
+// Check for proper handling of Unicode in payload.
+add_task(function* test_unicode_payload() {
+  let [reporter, server] = yield getReporterAndServer("unicode_payload");
+  try {
+    yield reporter._providerManager.registerProviderFromType(UnicodeProvider);
+
+    let deferred = Promise.defer();
+    let request = new DataSubmissionRequest(deferred, new Date());
+    reporter.requestDataUpload(request);
+    yield deferred.promise;
+    Assert.equal(request.state, request.SUBMISSION_SUCCESS);
+    Assert.ok(server.hasDocument(reporter.serverNamespace, reporter.lastSubmitID));
+
+    let p = server.getDocument(reporter.serverNamespace, reporter.lastSubmitID);
+    let v = p.data.last['unicode.unicode']['last-text'];
+    Assert.equal(v, "ᄃᄄᄅ");
+  } finally {
+    yield shutdownServer(server);
+    reporter._shutdown();
+  }
+});