servo: Merge #17061 - EventSource: decode UTF-8 code points across network packets (from servo:utf8); r=nox
authorSimon Sapin <simon.sapin@exyr.org>
Tue, 30 May 2017 03:50:18 -0500
changeset 1141447 4c881f05785de7b390f476fcc01a0be9317a989d
parent 1141446 f50f39148516d431a605816c374498b0b8bfc9e0
child 1141448 8d98a318d6ecf6ecdbcfdf6c30f9cd8254fdea32
push id193843
push useraramine@mozilla.com
push dateTue, 30 May 2017 14:10:14 +0000
treeherdertry@d3ab8b33176a [default view] [failures only]
reviewersnox
milestone55.0a1
servo: Merge #17061 - EventSource: decode UTF-8 code points across network packets (from servo:utf8); r=nox Source-Repo: https://github.com/servo/servo Source-Revision: 25d0c561413a42fd375ea04d891855581fdf5168
servo/Cargo.lock
servo/components/devtools/Cargo.toml
servo/components/devtools/actors/network_event.rs
servo/components/devtools/lib.rs
servo/components/script/Cargo.toml
servo/components/script/body.rs
servo/components/script/dom/blob.rs
servo/components/script/dom/eventsource.rs
servo/components/script/dom/textencoder.rs
servo/components/script/lib.rs
--- a/servo/Cargo.lock
+++ b/servo/Cargo.lock
@@ -627,17 +627,16 @@ dependencies = [
  "blurz 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "devtools"
 version = "0.0.1"
 dependencies = [
  "devtools_traits 0.0.1",
- "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
  "hyper 0.10.10 (registry+https://github.com/rust-lang/crates.io-index)",
  "hyper_serde 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "ipc-channel 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "msg 0.0.1",
  "serde 0.9.15 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 0.9.15 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_json 0.9.10 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -2369,16 +2368,17 @@ dependencies = [
  "smallvec 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "style 0.0.1",
  "style_traits 0.0.1",
  "swapper 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "time 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)",
  "tinyfiledialogs 2.5.9 (registry+https://github.com/rust-lang/crates.io-index)",
  "unicode-segmentation 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "url 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "utf-8 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "uuid 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "webrender_traits 0.39.0 (git+https://github.com/servo/webrender)",
  "webvr 0.0.1",
  "webvr_traits 0.0.1",
  "xml5ever 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
--- a/servo/components/devtools/Cargo.toml
+++ b/servo/components/devtools/Cargo.toml
@@ -6,17 +6,16 @@ license = "MPL-2.0"
 publish = false
 
 [lib]
 name = "devtools"
 path = "lib.rs"
 
 [dependencies]
 devtools_traits = {path = "../devtools_traits"}
-encoding = "0.2"
 hyper = "0.10"
 hyper_serde = "0.6"
 ipc-channel = "0.7"
 log = "0.3.5"
 msg = {path = "../msg"}
 serde = "0.9"
 serde_derive = "0.9"
 serde_json = "0.9"
--- a/servo/components/devtools/actors/network_event.rs
+++ b/servo/components/devtools/actors/network_event.rs
@@ -4,18 +4,16 @@
 
 //! Liberally derived from the [Firefox JS implementation]
 //! (http://mxr.mozilla.org/mozilla-central/source/toolkit/devtools/server/actors/webconsole.js).
 //! Handles interaction with the remote web console on network events (HTTP requests, responses) in Servo.
 
 use actor::{Actor, ActorMessageStatus, ActorRegistry};
 use devtools_traits::HttpRequest as DevtoolsHttpRequest;
 use devtools_traits::HttpResponse as DevtoolsHttpResponse;
-use encoding::all::UTF_8;
-use encoding::types::{DecoderTrap, Encoding};
 use hyper::header::{ContentType, Cookie};
 use hyper::header::Headers;
 use hyper::http::RawStatus;
 use hyper::method::Method;
 use protocol::JsonPacketStream;
 use serde_json::{Map, Value};
 use std::borrow::Cow;
 use std::net::TcpStream;
@@ -356,17 +354,17 @@ impl NetworkEventActor {
         self.request.connect_time = request.connect_time;
         self.request.send_time = request.send_time;
         self.is_xhr = request.is_xhr;
     }
 
     pub fn add_response(&mut self, response: DevtoolsHttpResponse) {
         self.response.headers = response.headers.clone();
         self.response.status = response.status.as_ref().map(|&(s, ref st)| {
-            let status_text = UTF_8.decode(st, DecoderTrap::Replace).unwrap();
+            let status_text = String::from_utf8_lossy(st).into_owned();
             RawStatus(s, Cow::from(status_text))
         });
         self.response.body = response.body.clone();
     }
 
     pub fn event_actor(&self) -> EventActor {
         // TODO: Send the correct values for startedDateTime, isXHR, private
         EventActor {
--- a/servo/components/devtools/lib.rs
+++ b/servo/components/devtools/lib.rs
@@ -10,17 +10,16 @@
 #![crate_name = "devtools"]
 #![crate_type = "rlib"]
 
 #![allow(non_snake_case)]
 #![deny(unsafe_code)]
 #![feature(box_syntax)]
 
 extern crate devtools_traits;
-extern crate encoding;
 extern crate hyper;
 extern crate ipc_channel;
 #[macro_use]
 extern crate log;
 extern crate msg;
 extern crate serde;
 #[macro_use]
 extern crate serde_derive;
--- a/servo/components/script/Cargo.toml
+++ b/servo/components/script/Cargo.toml
@@ -84,13 +84,14 @@ servo_rand = {path = "../rand"}
 servo_url = {path = "../url"}
 smallvec = "0.3"
 style = {path = "../style"}
 style_traits = {path = "../style_traits"}
 swapper = "0.0.4"
 time = "0.1.12"
 unicode-segmentation = "1.1.0"
 url = {version = "1.2", features = ["heap_size", "query_encoding"]}
+utf-8 = "0.7"
 uuid = {version = "0.4", features = ["v4"]}
 xml5ever = {version = "0.7", features = ["unstable"]}
 webrender_traits = {git = "https://github.com/servo/webrender", features = ["ipc"]}
 webvr = {path = "../webvr"}
 webvr_traits = {path = "../webvr_traits"}
--- a/servo/components/script/body.rs
+++ b/servo/components/script/body.rs
@@ -6,18 +6,16 @@ use dom::bindings::codegen::Bindings::Fo
 use dom::bindings::error::{Error, Fallible};
 use dom::bindings::js::Root;
 use dom::bindings::reflector::DomObject;
 use dom::bindings::str::USVString;
 use dom::blob::{Blob, BlobImpl};
 use dom::formdata::FormData;
 use dom::globalscope::GlobalScope;
 use dom::promise::Promise;
-use encoding::all::UTF_8;
-use encoding::types::{DecoderTrap, Encoding};
 use js::jsapi::JSContext;
 use js::jsapi::JS_ClearPendingException;
 use js::jsapi::JS_ParseJSON;
 use js::jsapi::Value as JSValue;
 use js::jsval::UndefinedValue;
 use mime::{Mime, TopLevel, SubLevel};
 use std::cell::Ref;
 use std::rc::Rc;
@@ -105,24 +103,23 @@ fn run_package_data_algorithm<T: BodyOpe
         BodyType::Text => run_text_data_algorithm(bytes),
         BodyType::Json => run_json_data_algorithm(cx, bytes),
         BodyType::Blob => run_blob_data_algorithm(&global, bytes, mime),
         BodyType::FormData => run_form_data_algorithm(&global, bytes, mime),
     }
 }
 
 fn run_text_data_algorithm(bytes: Vec<u8>) -> Fallible<FetchedData> {
-    let text = UTF_8.decode(&bytes, DecoderTrap::Replace).unwrap();
-    Ok(FetchedData::Text(text))
+    Ok(FetchedData::Text(String::from_utf8_lossy(&bytes).into_owned()))
 }
 
 #[allow(unsafe_code)]
 fn run_json_data_algorithm(cx: *mut JSContext,
                            bytes: Vec<u8>) -> Fallible<FetchedData> {
-    let json_text = UTF_8.decode(&bytes, DecoderTrap::Replace).unwrap();
+    let json_text = String::from_utf8_lossy(&bytes);
     let json_text: Vec<u16> = json_text.encode_utf16().collect();
     rooted!(in(cx) let mut rval = UndefinedValue());
     unsafe {
         if !JS_ParseJSON(cx,
                          json_text.as_ptr(),
                          json_text.len() as u32,
                          rval.handle_mut()) {
             JS_ClearPendingException(cx);
--- a/servo/components/script/dom/blob.rs
+++ b/servo/components/script/dom/blob.rs
@@ -7,18 +7,16 @@ use dom::bindings::codegen::Bindings::Bl
 use dom::bindings::codegen::Bindings::BlobBinding::BlobMethods;
 use dom::bindings::codegen::UnionTypes::BlobOrString;
 use dom::bindings::error::{Error, Fallible};
 use dom::bindings::js::{JS, Root};
 use dom::bindings::reflector::{DomObject, Reflector, reflect_dom_object};
 use dom::bindings::str::DOMString;
 use dom::globalscope::GlobalScope;
 use dom_struct::dom_struct;
-use encoding::all::UTF_8;
-use encoding::types::{EncoderTrap, Encoding};
 use ipc_channel::ipc;
 use net_traits::{CoreResourceMsg, IpcSend};
 use net_traits::blob_url_store::{BlobBuf, get_blob_origin};
 use net_traits::filemanager_thread::{FileManagerThreadMsg, ReadFileProgress, RelativePos};
 use std::mem;
 use std::ops::Index;
 use std::path::PathBuf;
 use uuid::Uuid;
@@ -332,22 +330,21 @@ fn read_file(global: &GlobalScope, id: U
 /// Extract bytes from BlobParts, used by Blob and File constructor
 /// https://w3c.github.io/FileAPI/#constructorBlob
 pub fn blob_parts_to_bytes(blobparts: Vec<BlobOrString>) -> Result<Vec<u8>, ()> {
     let mut ret = vec![];
 
     for blobpart in &blobparts {
         match blobpart {
             &BlobOrString::String(ref s) => {
-                let mut bytes = UTF_8.encode(s, EncoderTrap::Replace).map_err(|_|())?;
-                ret.append(&mut bytes);
+                ret.extend(s.as_bytes());
             },
             &BlobOrString::Blob(ref b) => {
-                let mut bytes = b.get_bytes().unwrap_or(vec![]);
-                ret.append(&mut bytes);
+                let bytes = b.get_bytes().unwrap_or(vec![]);
+                ret.extend(bytes);
             },
         }
     }
 
     Ok(ret)
 }
 
 impl BlobMethods for Blob {
--- a/servo/components/script/dom/eventsource.rs
+++ b/servo/components/script/dom/eventsource.rs
@@ -11,18 +11,16 @@ use dom::bindings::js::Root;
 use dom::bindings::refcounted::Trusted;
 use dom::bindings::reflector::{DomObject, reflect_dom_object};
 use dom::bindings::str::DOMString;
 use dom::event::Event;
 use dom::eventtarget::EventTarget;
 use dom::globalscope::GlobalScope;
 use dom::messageevent::MessageEvent;
 use dom_struct::dom_struct;
-use encoding::Encoding;
-use encoding::all::UTF_8;
 use euclid::length::Length;
 use hyper::header::{Accept, qitem};
 use ipc_channel::ipc;
 use ipc_channel::router::ROUTER;
 use js::conversions::ToJSValConvertible;
 use js::jsapi::JSAutoCompartment;
 use js::jsval::UndefinedValue;
 use mime::{Mime, TopLevel, SubLevel};
@@ -34,16 +32,17 @@ use script_thread::Runnable;
 use servo_atoms::Atom;
 use servo_url::ServoUrl;
 use std::cell::Cell;
 use std::mem;
 use std::str::{Chars, FromStr};
 use std::sync::{Arc, Mutex};
 use task_source::TaskSource;
 use timers::OneshotTimerCallback;
+use utf8;
 
 header! { (LastEventId, "Last-Event-ID") => [String] }
 
 const DEFAULT_RECONNECTION_TIME: u64 = 5000;
 
 #[derive(JSTraceable, PartialEq, Copy, Clone, Debug, HeapSizeOf)]
 struct GenerationId(u32);
 
@@ -71,16 +70,18 @@ pub struct EventSource {
 enum ParserState {
     Field,
     Comment,
     Value,
     Eol
 }
 
 struct EventSourceContext {
+    incomplete_utf8: Option<utf8::Incomplete>,
+
     event_source: Trusted<EventSource>,
     gen_id: GenerationId,
     action_sender: ipc::IpcSender<FetchResponseMsg>,
 
     parser_state: ParserState,
     field: String,
     value: String,
     origin: String,
@@ -288,22 +289,51 @@ impl FetchResponseListener for EventSour
             }
             Err(_) => {
                 self.reestablish_the_connection();
             }
         }
     }
 
     fn process_response_chunk(&mut self, chunk: Vec<u8>) {
-        let mut stream = String::new();
-        UTF_8.raw_decoder().raw_feed(&chunk, &mut stream);
-        self.parse(stream.chars())
+        let mut input = &*chunk;
+        if let Some(mut incomplete) = self.incomplete_utf8.take() {
+            match incomplete.try_complete(input) {
+                None => return,
+                Some((result, remaining_input)) => {
+                    self.parse(result.unwrap_or("\u{FFFD}").chars());
+                    input = remaining_input;
+                }
+            }
+        }
+
+        while !input.is_empty() {
+            match utf8::decode(&input) {
+                Ok(s) => {
+                    self.parse(s.chars());
+                    return
+                }
+                Err(utf8::DecodeError::Invalid { valid_prefix, remaining_input, .. }) => {
+                    self.parse(valid_prefix.chars());
+                    self.parse("\u{FFFD}".chars());
+                    input = remaining_input;
+                }
+                Err(utf8::DecodeError::Incomplete { valid_prefix, incomplete_suffix }) => {
+                    self.parse(valid_prefix.chars());
+                    self.incomplete_utf8 = Some(incomplete_suffix);
+                    return
+                }
+            }
+        }
     }
 
     fn process_response_eof(&mut self, _response: Result<(), NetworkError>) {
+        if let Some(_) = self.incomplete_utf8.take() {
+            self.parse("\u{FFFD}".chars());
+        }
         self.reestablish_the_connection();
     }
 }
 
 impl PreInvoke for EventSourceContext {
     fn should_invoke(&self) -> bool {
         self.event_source.root().generation_id.get() == self.gen_id
     }
@@ -373,16 +403,18 @@ impl EventSource {
         request.headers.set(Accept(vec![qitem(mime!(Text / EventStream))]));
         // Step 11
         request.cache_mode = CacheMode::NoStore;
         // Step 12
         *ev.request.borrow_mut() = Some(request.clone());
         // Step 14
         let (action_sender, action_receiver) = ipc::channel().unwrap();
         let context = EventSourceContext {
+            incomplete_utf8: None,
+
             event_source: Trusted::new(&ev),
             gen_id: ev.generation_id.get(),
             action_sender: action_sender.clone(),
 
             parser_state: ParserState::Eol,
             field: String::new(),
             value: String::new(),
             origin: String::new(),
--- a/servo/components/script/dom/textencoder.rs
+++ b/servo/components/script/dom/textencoder.rs
@@ -6,19 +6,16 @@ use core::nonzero::NonZero;
 use dom::bindings::codegen::Bindings::TextEncoderBinding;
 use dom::bindings::codegen::Bindings::TextEncoderBinding::TextEncoderMethods;
 use dom::bindings::error::Fallible;
 use dom::bindings::js::Root;
 use dom::bindings::reflector::{Reflector, reflect_dom_object};
 use dom::bindings::str::{DOMString, USVString};
 use dom::globalscope::GlobalScope;
 use dom_struct::dom_struct;
-use encoding::EncoderTrap;
-use encoding::Encoding;
-use encoding::all::UTF_8;
 use js::jsapi::{JSContext, JSObject};
 use js::typedarray::{Uint8Array, CreateWith};
 use std::ptr;
 
 #[dom_struct]
 pub struct TextEncoder {
     reflector_: Reflector,
 }
@@ -40,22 +37,22 @@ impl TextEncoder {
     pub fn Constructor(global: &GlobalScope) -> Fallible<Root<TextEncoder>> {
         Ok(TextEncoder::new(global))
     }
 }
 
 impl TextEncoderMethods for TextEncoder {
     // https://encoding.spec.whatwg.org/#dom-textencoder-encoding
     fn Encoding(&self) -> DOMString {
-        DOMString::from(UTF_8.name())
+        DOMString::from("utf-8")
     }
 
     #[allow(unsafe_code)]
     // https://encoding.spec.whatwg.org/#dom-textencoder-encode
     unsafe fn Encode(&self, cx: *mut JSContext, input: USVString) -> NonZero<*mut JSObject> {
-        let encoded = UTF_8.encode(&input.0, EncoderTrap::Strict).unwrap();
+        let encoded = input.0.as_bytes();
 
         rooted!(in(cx) let mut js_object = ptr::null_mut());
         assert!(Uint8Array::create(cx, CreateWith::Slice(&encoded), js_object.handle_mut()).is_ok());
 
         NonZero::new(js_object.get())
     }
 }
--- a/servo/components/script/lib.rs
+++ b/servo/components/script/lib.rs
@@ -97,16 +97,17 @@ extern crate smallvec;
 extern crate style;
 extern crate style_traits;
 extern crate swapper;
 extern crate time;
 #[cfg(any(target_os = "macos", target_os = "linux", target_os = "windows"))]
 extern crate tinyfiledialogs;
 extern crate unicode_segmentation;
 extern crate url;
+extern crate utf8;
 extern crate uuid;
 extern crate webrender_traits;
 extern crate webvr_traits;
 extern crate xml5ever;
 
 mod body;
 pub mod clipboard_provider;
 mod devtools;