servo: Merge #17061 - EventSource: decode UTF-8 code points across network packets (from servo:utf8); r=nox
authorSimon Sapin <simon.sapin@exyr.org>
Tue, 30 May 2017 03:50:18 -0500
changeset 409419 4c881f05785de7b390f476fcc01a0be9317a989d
parent 409418 f50f39148516d431a605816c374498b0b8bfc9e0
child 409420 8d98a318d6ecf6ecdbcfdf6c30f9cd8254fdea32
push id7391
push usermtabara@mozilla.com
push dateMon, 12 Jun 2017 13:08:53 +0000
treeherdermozilla-beta@2191d7f87e2e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnox
milestone55.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
servo: Merge #17061 - EventSource: decode UTF-8 code points across network packets (from servo:utf8); r=nox Source-Repo: https://github.com/servo/servo Source-Revision: 25d0c561413a42fd375ea04d891855581fdf5168
servo/Cargo.lock
servo/components/devtools/Cargo.toml
servo/components/devtools/actors/network_event.rs
servo/components/devtools/lib.rs
servo/components/script/Cargo.toml
servo/components/script/body.rs
servo/components/script/dom/blob.rs
servo/components/script/dom/eventsource.rs
servo/components/script/dom/textencoder.rs
servo/components/script/lib.rs
--- a/servo/Cargo.lock
+++ b/servo/Cargo.lock
@@ -627,17 +627,16 @@ dependencies = [
  "blurz 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "devtools"
 version = "0.0.1"
 dependencies = [
  "devtools_traits 0.0.1",
- "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
  "hyper 0.10.10 (registry+https://github.com/rust-lang/crates.io-index)",
  "hyper_serde 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "ipc-channel 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "msg 0.0.1",
  "serde 0.9.15 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 0.9.15 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_json 0.9.10 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -2369,16 +2368,17 @@ dependencies = [
  "smallvec 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "style 0.0.1",
  "style_traits 0.0.1",
  "swapper 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "time 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)",
  "tinyfiledialogs 2.5.9 (registry+https://github.com/rust-lang/crates.io-index)",
  "unicode-segmentation 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "url 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "utf-8 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "uuid 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "webrender_traits 0.39.0 (git+https://github.com/servo/webrender)",
  "webvr 0.0.1",
  "webvr_traits 0.0.1",
  "xml5ever 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
--- a/servo/components/devtools/Cargo.toml
+++ b/servo/components/devtools/Cargo.toml
@@ -6,17 +6,16 @@ license = "MPL-2.0"
 publish = false
 
 [lib]
 name = "devtools"
 path = "lib.rs"
 
 [dependencies]
 devtools_traits = {path = "../devtools_traits"}
-encoding = "0.2"
 hyper = "0.10"
 hyper_serde = "0.6"
 ipc-channel = "0.7"
 log = "0.3.5"
 msg = {path = "../msg"}
 serde = "0.9"
 serde_derive = "0.9"
 serde_json = "0.9"
--- a/servo/components/devtools/actors/network_event.rs
+++ b/servo/components/devtools/actors/network_event.rs
@@ -4,18 +4,16 @@
 
 //! Liberally derived from the [Firefox JS implementation]
 //! (http://mxr.mozilla.org/mozilla-central/source/toolkit/devtools/server/actors/webconsole.js).
 //! Handles interaction with the remote web console on network events (HTTP requests, responses) in Servo.
 
 use actor::{Actor, ActorMessageStatus, ActorRegistry};
 use devtools_traits::HttpRequest as DevtoolsHttpRequest;
 use devtools_traits::HttpResponse as DevtoolsHttpResponse;
-use encoding::all::UTF_8;
-use encoding::types::{DecoderTrap, Encoding};
 use hyper::header::{ContentType, Cookie};
 use hyper::header::Headers;
 use hyper::http::RawStatus;
 use hyper::method::Method;
 use protocol::JsonPacketStream;
 use serde_json::{Map, Value};
 use std::borrow::Cow;
 use std::net::TcpStream;
@@ -356,17 +354,17 @@ impl NetworkEventActor {
         self.request.connect_time = request.connect_time;
         self.request.send_time = request.send_time;
         self.is_xhr = request.is_xhr;
     }
 
     pub fn add_response(&mut self, response: DevtoolsHttpResponse) {
         self.response.headers = response.headers.clone();
         self.response.status = response.status.as_ref().map(|&(s, ref st)| {
-            let status_text = UTF_8.decode(st, DecoderTrap::Replace).unwrap();
+            let status_text = String::from_utf8_lossy(st).into_owned();
             RawStatus(s, Cow::from(status_text))
         });
         self.response.body = response.body.clone();
     }
 
     pub fn event_actor(&self) -> EventActor {
         // TODO: Send the correct values for startedDateTime, isXHR, private
         EventActor {
--- a/servo/components/devtools/lib.rs
+++ b/servo/components/devtools/lib.rs
@@ -10,17 +10,16 @@
 #![crate_name = "devtools"]
 #![crate_type = "rlib"]
 
 #![allow(non_snake_case)]
 #![deny(unsafe_code)]
 #![feature(box_syntax)]
 
 extern crate devtools_traits;
-extern crate encoding;
 extern crate hyper;
 extern crate ipc_channel;
 #[macro_use]
 extern crate log;
 extern crate msg;
 extern crate serde;
 #[macro_use]
 extern crate serde_derive;
--- a/servo/components/script/Cargo.toml
+++ b/servo/components/script/Cargo.toml
@@ -84,13 +84,14 @@ servo_rand = {path = "../rand"}
 servo_url = {path = "../url"}
 smallvec = "0.3"
 style = {path = "../style"}
 style_traits = {path = "../style_traits"}
 swapper = "0.0.4"
 time = "0.1.12"
 unicode-segmentation = "1.1.0"
 url = {version = "1.2", features = ["heap_size", "query_encoding"]}
+utf-8 = "0.7"
 uuid = {version = "0.4", features = ["v4"]}
 xml5ever = {version = "0.7", features = ["unstable"]}
 webrender_traits = {git = "https://github.com/servo/webrender", features = ["ipc"]}
 webvr = {path = "../webvr"}
 webvr_traits = {path = "../webvr_traits"}
--- a/servo/components/script/body.rs
+++ b/servo/components/script/body.rs
@@ -6,18 +6,16 @@ use dom::bindings::codegen::Bindings::Fo
 use dom::bindings::error::{Error, Fallible};
 use dom::bindings::js::Root;
 use dom::bindings::reflector::DomObject;
 use dom::bindings::str::USVString;
 use dom::blob::{Blob, BlobImpl};
 use dom::formdata::FormData;
 use dom::globalscope::GlobalScope;
 use dom::promise::Promise;
-use encoding::all::UTF_8;
-use encoding::types::{DecoderTrap, Encoding};
 use js::jsapi::JSContext;
 use js::jsapi::JS_ClearPendingException;
 use js::jsapi::JS_ParseJSON;
 use js::jsapi::Value as JSValue;
 use js::jsval::UndefinedValue;
 use mime::{Mime, TopLevel, SubLevel};
 use std::cell::Ref;
 use std::rc::Rc;
@@ -105,24 +103,23 @@ fn run_package_data_algorithm<T: BodyOpe
         BodyType::Text => run_text_data_algorithm(bytes),
         BodyType::Json => run_json_data_algorithm(cx, bytes),
         BodyType::Blob => run_blob_data_algorithm(&global, bytes, mime),
         BodyType::FormData => run_form_data_algorithm(&global, bytes, mime),
     }
 }
 
 fn run_text_data_algorithm(bytes: Vec<u8>) -> Fallible<FetchedData> {
-    let text = UTF_8.decode(&bytes, DecoderTrap::Replace).unwrap();
-    Ok(FetchedData::Text(text))
+    Ok(FetchedData::Text(String::from_utf8_lossy(&bytes).into_owned()))
 }
 
 #[allow(unsafe_code)]
 fn run_json_data_algorithm(cx: *mut JSContext,
                            bytes: Vec<u8>) -> Fallible<FetchedData> {
-    let json_text = UTF_8.decode(&bytes, DecoderTrap::Replace).unwrap();
+    let json_text = String::from_utf8_lossy(&bytes);
     let json_text: Vec<u16> = json_text.encode_utf16().collect();
     rooted!(in(cx) let mut rval = UndefinedValue());
     unsafe {
         if !JS_ParseJSON(cx,
                          json_text.as_ptr(),
                          json_text.len() as u32,
                          rval.handle_mut()) {
             JS_ClearPendingException(cx);
--- a/servo/components/script/dom/blob.rs
+++ b/servo/components/script/dom/blob.rs
@@ -7,18 +7,16 @@ use dom::bindings::codegen::Bindings::Bl
 use dom::bindings::codegen::Bindings::BlobBinding::BlobMethods;
 use dom::bindings::codegen::UnionTypes::BlobOrString;
 use dom::bindings::error::{Error, Fallible};
 use dom::bindings::js::{JS, Root};
 use dom::bindings::reflector::{DomObject, Reflector, reflect_dom_object};
 use dom::bindings::str::DOMString;
 use dom::globalscope::GlobalScope;
 use dom_struct::dom_struct;
-use encoding::all::UTF_8;
-use encoding::types::{EncoderTrap, Encoding};
 use ipc_channel::ipc;
 use net_traits::{CoreResourceMsg, IpcSend};
 use net_traits::blob_url_store::{BlobBuf, get_blob_origin};
 use net_traits::filemanager_thread::{FileManagerThreadMsg, ReadFileProgress, RelativePos};
 use std::mem;
 use std::ops::Index;
 use std::path::PathBuf;
 use uuid::Uuid;
@@ -332,22 +330,21 @@ fn read_file(global: &GlobalScope, id: U
 /// Extract bytes from BlobParts, used by Blob and File constructor
 /// https://w3c.github.io/FileAPI/#constructorBlob
 pub fn blob_parts_to_bytes(blobparts: Vec<BlobOrString>) -> Result<Vec<u8>, ()> {
     let mut ret = vec![];
 
     for blobpart in &blobparts {
         match blobpart {
             &BlobOrString::String(ref s) => {
-                let mut bytes = UTF_8.encode(s, EncoderTrap::Replace).map_err(|_|())?;
-                ret.append(&mut bytes);
+                ret.extend(s.as_bytes());
             },
             &BlobOrString::Blob(ref b) => {
-                let mut bytes = b.get_bytes().unwrap_or(vec![]);
-                ret.append(&mut bytes);
+                let bytes = b.get_bytes().unwrap_or(vec![]);
+                ret.extend(bytes);
             },
         }
     }
 
     Ok(ret)
 }
 
 impl BlobMethods for Blob {
--- a/servo/components/script/dom/eventsource.rs
+++ b/servo/components/script/dom/eventsource.rs
@@ -11,18 +11,16 @@ use dom::bindings::js::Root;
 use dom::bindings::refcounted::Trusted;
 use dom::bindings::reflector::{DomObject, reflect_dom_object};
 use dom::bindings::str::DOMString;
 use dom::event::Event;
 use dom::eventtarget::EventTarget;
 use dom::globalscope::GlobalScope;
 use dom::messageevent::MessageEvent;
 use dom_struct::dom_struct;
-use encoding::Encoding;
-use encoding::all::UTF_8;
 use euclid::length::Length;
 use hyper::header::{Accept, qitem};
 use ipc_channel::ipc;
 use ipc_channel::router::ROUTER;
 use js::conversions::ToJSValConvertible;
 use js::jsapi::JSAutoCompartment;
 use js::jsval::UndefinedValue;
 use mime::{Mime, TopLevel, SubLevel};
@@ -34,16 +32,17 @@ use script_thread::Runnable;
 use servo_atoms::Atom;
 use servo_url::ServoUrl;
 use std::cell::Cell;
 use std::mem;
 use std::str::{Chars, FromStr};
 use std::sync::{Arc, Mutex};
 use task_source::TaskSource;
 use timers::OneshotTimerCallback;
+use utf8;
 
 header! { (LastEventId, "Last-Event-ID") => [String] }
 
 const DEFAULT_RECONNECTION_TIME: u64 = 5000;
 
 #[derive(JSTraceable, PartialEq, Copy, Clone, Debug, HeapSizeOf)]
 struct GenerationId(u32);
 
@@ -71,16 +70,18 @@ pub struct EventSource {
 enum ParserState {
     Field,
     Comment,
     Value,
     Eol
 }
 
 struct EventSourceContext {
+    incomplete_utf8: Option<utf8::Incomplete>,
+
     event_source: Trusted<EventSource>,
     gen_id: GenerationId,
     action_sender: ipc::IpcSender<FetchResponseMsg>,
 
     parser_state: ParserState,
     field: String,
     value: String,
     origin: String,
@@ -288,22 +289,51 @@ impl FetchResponseListener for EventSour
             }
             Err(_) => {
                 self.reestablish_the_connection();
             }
         }
     }
 
     fn process_response_chunk(&mut self, chunk: Vec<u8>) {
-        let mut stream = String::new();
-        UTF_8.raw_decoder().raw_feed(&chunk, &mut stream);
-        self.parse(stream.chars())
+        let mut input = &*chunk;
+        if let Some(mut incomplete) = self.incomplete_utf8.take() {
+            match incomplete.try_complete(input) {
+                None => return,
+                Some((result, remaining_input)) => {
+                    self.parse(result.unwrap_or("\u{FFFD}").chars());
+                    input = remaining_input;
+                }
+            }
+        }
+
+        while !input.is_empty() {
+            match utf8::decode(&input) {
+                Ok(s) => {
+                    self.parse(s.chars());
+                    return
+                }
+                Err(utf8::DecodeError::Invalid { valid_prefix, remaining_input, .. }) => {
+                    self.parse(valid_prefix.chars());
+                    self.parse("\u{FFFD}".chars());
+                    input = remaining_input;
+                }
+                Err(utf8::DecodeError::Incomplete { valid_prefix, incomplete_suffix }) => {
+                    self.parse(valid_prefix.chars());
+                    self.incomplete_utf8 = Some(incomplete_suffix);
+                    return
+                }
+            }
+        }
     }
 
     fn process_response_eof(&mut self, _response: Result<(), NetworkError>) {
+        if let Some(_) = self.incomplete_utf8.take() {
+            self.parse("\u{FFFD}".chars());
+        }
         self.reestablish_the_connection();
     }
 }
 
 impl PreInvoke for EventSourceContext {
     fn should_invoke(&self) -> bool {
         self.event_source.root().generation_id.get() == self.gen_id
     }
@@ -373,16 +403,18 @@ impl EventSource {
         request.headers.set(Accept(vec![qitem(mime!(Text / EventStream))]));
         // Step 11
         request.cache_mode = CacheMode::NoStore;
         // Step 12
         *ev.request.borrow_mut() = Some(request.clone());
         // Step 14
         let (action_sender, action_receiver) = ipc::channel().unwrap();
         let context = EventSourceContext {
+            incomplete_utf8: None,
+
             event_source: Trusted::new(&ev),
             gen_id: ev.generation_id.get(),
             action_sender: action_sender.clone(),
 
             parser_state: ParserState::Eol,
             field: String::new(),
             value: String::new(),
             origin: String::new(),
--- a/servo/components/script/dom/textencoder.rs
+++ b/servo/components/script/dom/textencoder.rs
@@ -6,19 +6,16 @@ use core::nonzero::NonZero;
 use dom::bindings::codegen::Bindings::TextEncoderBinding;
 use dom::bindings::codegen::Bindings::TextEncoderBinding::TextEncoderMethods;
 use dom::bindings::error::Fallible;
 use dom::bindings::js::Root;
 use dom::bindings::reflector::{Reflector, reflect_dom_object};
 use dom::bindings::str::{DOMString, USVString};
 use dom::globalscope::GlobalScope;
 use dom_struct::dom_struct;
-use encoding::EncoderTrap;
-use encoding::Encoding;
-use encoding::all::UTF_8;
 use js::jsapi::{JSContext, JSObject};
 use js::typedarray::{Uint8Array, CreateWith};
 use std::ptr;
 
 #[dom_struct]
 pub struct TextEncoder {
     reflector_: Reflector,
 }
@@ -40,22 +37,22 @@ impl TextEncoder {
     pub fn Constructor(global: &GlobalScope) -> Fallible<Root<TextEncoder>> {
         Ok(TextEncoder::new(global))
     }
 }
 
 impl TextEncoderMethods for TextEncoder {
     // https://encoding.spec.whatwg.org/#dom-textencoder-encoding
     fn Encoding(&self) -> DOMString {
-        DOMString::from(UTF_8.name())
+        DOMString::from("utf-8")
     }
 
     #[allow(unsafe_code)]
     // https://encoding.spec.whatwg.org/#dom-textencoder-encode
     unsafe fn Encode(&self, cx: *mut JSContext, input: USVString) -> NonZero<*mut JSObject> {
-        let encoded = UTF_8.encode(&input.0, EncoderTrap::Strict).unwrap();
+        let encoded = input.0.as_bytes();
 
         rooted!(in(cx) let mut js_object = ptr::null_mut());
         assert!(Uint8Array::create(cx, CreateWith::Slice(&encoded), js_object.handle_mut()).is_ok());
 
         NonZero::new(js_object.get())
     }
 }
--- a/servo/components/script/lib.rs
+++ b/servo/components/script/lib.rs
@@ -97,16 +97,17 @@ extern crate smallvec;
 extern crate style;
 extern crate style_traits;
 extern crate swapper;
 extern crate time;
 #[cfg(any(target_os = "macos", target_os = "linux", target_os = "windows"))]
 extern crate tinyfiledialogs;
 extern crate unicode_segmentation;
 extern crate url;
+extern crate utf8;
 extern crate uuid;
 extern crate webrender_traits;
 extern crate webvr_traits;
 extern crate xml5ever;
 
 mod body;
 pub mod clipboard_provider;
 mod devtools;