servo: Merge #14794 - Refactor HSTSList to use HashMap (from mrnayak:hsts-refactor); r=emilio
authorRaghav <rmuddur@gmail.com>
Fri, 30 Dec 2016 10:33:40 -0800
changeset 340449 8b58f577c79e4bae767720e5f99ed3cc749560a6
parent 340448 fbfd4758421668a91c8f553214bc5efeea325cb4
child 340450 228278d0d319013c0ae6169ca1f3ccf8b8ca915a
push id31307
push usergszorc@mozilla.com
push dateSat, 04 Feb 2017 00:59:06 +0000
treeherdermozilla-central@94079d43835f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersemilio
servo: Merge #14794 - Refactor HSTSList to use HashMap (from mrnayak:hsts-refactor); r=emilio Refactored HSTSList to use HashMap, where the key of HashMap is the base domain. Every time when we check if a host is secure, we find the base domain of the host and get a vector of HSTS entries associated with the base domain. While this will not give O(1) look up time, we would have a smaller list to iterate for every lookup. I have added one unit test to validate `HashMap` changes. <!-- Please describe your changes on the following line: --> --- <!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `__` with appropriate data: --> - [X] `./mach build -d` does not report any errors - [X] `./mach test-tidy` does not report any errors - [X] These changes fix #14756 <!-- Either: --> - [X] There are tests for these changes OR <!-- Pull requests that do not address these steps are welcome, but they will require additional verification as part of the review process. --> Source-Repo: https://github.com/servo/servo Source-Revision: b06f4aec67ff93b1d667a5817084b0952e56664e
servo/components/net/hsts.rs
servo/tests/unit/net/hsts.rs
--- a/servo/components/net/hsts.rs
+++ b/servo/components/net/hsts.rs
@@ -1,15 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use net_traits::IncludeSubdomains;
+use net_traits::pub_domains::reg_suffix;
 use rustc_serialize::json::decode;
 use servo_config::resource_files::read_resource_file;
+use std::collections::HashMap;
 use std::net::{Ipv4Addr, Ipv6Addr};
 use std::str::from_utf8;
 use time;
 use url::Url;
 
 #[derive(RustcDecodable, RustcEncodable, Clone)]
 pub struct HstsEntry {
     pub host: String,
@@ -31,92 +33,106 @@ impl HstsEntry {
             })
         }
     }
 
     pub fn is_expired(&self) -> bool {
         match (self.max_age, self.timestamp) {
             (Some(max_age), Some(timestamp)) => {
                 (time::get_time().sec as u64) - timestamp >= max_age
-            },
+            }
 
             _ => false
         }
     }
 
     fn matches_domain(&self, host: &str) -> bool {
         !self.is_expired() && self.host == host
     }
 
     fn matches_subdomain(&self, host: &str) -> bool {
         !self.is_expired() && host.ends_with(&format!(".{}", self.host))
     }
 }
 
 #[derive(RustcDecodable, RustcEncodable, Clone)]
 pub struct HstsList {
-    pub entries: Vec<HstsEntry>
+    pub entries_map: HashMap<String, Vec<HstsEntry>>,
 }
 
 impl HstsList {
     pub fn new() -> HstsList {
-        HstsList {
-            entries: vec![]
-        }
+        HstsList { entries_map: HashMap::new() }
     }
 
     /// Create an `HstsList` from the bytes of a JSON preload file.
     pub fn from_preload(preload_content: &[u8]) -> Option<HstsList> {
-        from_utf8(&preload_content)
+        #[derive(RustcDecodable)]
+        struct HstsEntries {
+            entries: Vec<HstsEntry>,
+        }
+
+        let hsts_entries: Option<HstsEntries> = from_utf8(&preload_content)
             .ok()
-            .and_then(|c| decode(c).ok())
+            .and_then(|c| decode(c).ok());
+
+        hsts_entries.map_or(None, |hsts_entries| {
+            let mut hsts_list: HstsList = HstsList::new();
+
+            for hsts_entry in hsts_entries.entries {
+                hsts_list.push(hsts_entry);
+            }
+
+            return Some(hsts_list);
+        })
     }
 
     pub fn from_servo_preload() -> HstsList {
         let file_bytes = read_resource_file("hsts_preload.json")
                             .expect("Could not find Servo HSTS preload file");
         HstsList::from_preload(&file_bytes)
             .expect("Servo HSTS preload file is invalid")
     }
 
     pub fn is_host_secure(&self, host: &str) -> bool {
-        // TODO - Should this be faster than O(n)? The HSTS list is only a few
-        // hundred or maybe thousand entries...
-        //
-        // Could optimise by searching for exact matches first (via a map or
-        // something), then checking for subdomains.
-        self.entries.iter().any(|e| {
-            if e.include_subdomains {
-                e.matches_subdomain(host) || e.matches_domain(host)
-            } else {
-                e.matches_domain(host)
-            }
+        let base_domain = reg_suffix(host);
+        self.entries_map.get(base_domain).map_or(false, |entries| {
+            entries.iter().any(|e| {
+                if e.include_subdomains {
+                    e.matches_subdomain(host) || e.matches_domain(host)
+                } else {
+                    e.matches_domain(host)
+                }
+            })
+        })
+    }
+
+    fn has_domain(&self, host: &str, base_domain: &str) -> bool {
+        self.entries_map.get(base_domain).map_or(false, |entries| {
+            entries.iter().any(|e| e.matches_domain(&host))
         })
     }
 
-    fn has_domain(&self, host: &str) -> bool {
-        self.entries.iter().any(|e| {
-            e.matches_domain(&host)
-        })
-    }
-
-    fn has_subdomain(&self, host: &str) -> bool {
-        self.entries.iter().any(|e| {
-            e.matches_subdomain(host)
-        })
+    fn has_subdomain(&self, host: &str, base_domain: &str) -> bool {
+       self.entries_map.get(base_domain).map_or(false, |entries| {
+            entries.iter().any(|e| e.matches_subdomain(host))
+       })
     }
 
     pub fn push(&mut self, entry: HstsEntry) {
-        let have_domain = self.has_domain(&entry.host);
-        let have_subdomain = self.has_subdomain(&entry.host);
+        let host = entry.host.clone();
+        let base_domain = reg_suffix(&host);
+        let have_domain = self.has_domain(&entry.host, base_domain);
+        let have_subdomain = self.has_subdomain(&entry.host, base_domain);
 
+        let entries = self.entries_map.entry(base_domain.to_owned()).or_insert(vec![]);
         if !have_domain && !have_subdomain {
-            self.entries.push(entry);
+            entries.push(entry);
         } else if !have_subdomain {
-            for e in &mut self.entries {
+            for e in entries {
                 if e.matches_domain(&entry.host) {
                     e.include_subdomains = entry.include_subdomains;
                     e.max_age = entry.max_age;
                 }
             }
         }
     }
 }
--- a/servo/tests/unit/net/hsts.rs
+++ b/servo/tests/unit/net/hsts.rs
@@ -1,15 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use net::hsts::{HstsEntry, HstsList};
 use net::hsts::secure_url;
 use net_traits::IncludeSubdomains;
+use std::collections::HashMap;
 use time;
 use url::Url;
 
 #[test]
 fn test_hsts_entry_is_not_expired_when_it_has_no_timestamp() {
     let entry = HstsEntry {
         host: "mozilla.org".to_owned(),
         include_subdomains: false,
@@ -58,73 +59,100 @@ fn test_hsts_entry_cant_be_created_with_
     let entry = HstsEntry::new(
         "4.4.4.4".to_owned(), IncludeSubdomains::NotIncluded, None
     );
 
     assert!(entry.is_none(), "able to create HstsEntry with IPv4 host");
 }
 
 #[test]
-fn test_push_entry_with_0_max_age_evicts_entry_from_list() {
+fn test_base_domain_in_entries_map() {
+    let entries_map = HashMap::new();
+
     let mut list = HstsList {
-        entries: vec!(HstsEntry::new("mozilla.org".to_owned(),
-            IncludeSubdomains::NotIncluded, Some(500000u64)).unwrap())
+        entries_map: entries_map
+    };
+
+    list.push(HstsEntry::new("servo.mozilla.org".to_owned(),
+        IncludeSubdomains::NotIncluded, None).unwrap());
+    list.push(HstsEntry::new("firefox.mozilla.org".to_owned(),
+        IncludeSubdomains::NotIncluded, None).unwrap());
+    list.push(HstsEntry::new("bugzilla.org".to_owned(),
+        IncludeSubdomains::NotIncluded, None).unwrap());
+
+    assert_eq!(list.entries_map.len(), 2);
+    assert_eq!(list.entries_map.get("mozilla.org").unwrap().len(), 2);
+}
+
+#[test]
+fn test_push_entry_with_0_max_age_evicts_entry_from_list() {
+    let mut entries_map = HashMap::new();
+    entries_map.insert("mozilla.org".to_owned(), vec!(HstsEntry::new("mozilla.org".to_owned(),
+            IncludeSubdomains::NotIncluded, Some(500000u64)).unwrap()));
+    let mut list = HstsList {
+        entries_map: entries_map
     };
 
     list.push(HstsEntry::new("mozilla.org".to_owned(),
         IncludeSubdomains::NotIncluded, Some(0)).unwrap());
 
     assert!(list.is_host_secure("mozilla.org") == false)
 }
 
 #[test]
 fn test_push_entry_to_hsts_list_should_not_add_subdomains_whose_superdomain_is_already_matched() {
+    let mut entries_map = HashMap::new();
+    entries_map.insert("mozilla.org".to_owned(), vec!(HstsEntry::new("mozilla.org".to_owned(),
+            IncludeSubdomains::Included, None).unwrap()));
     let mut list = HstsList {
-        entries: vec!(HstsEntry::new("mozilla.org".to_owned(),
-            IncludeSubdomains::Included, None).unwrap())
+        entries_map: entries_map
     };
 
     list.push(HstsEntry::new("servo.mozilla.org".to_owned(),
         IncludeSubdomains::NotIncluded, None).unwrap());
 
-    assert!(list.entries.len() == 1)
+    assert!(list.entries_map.get("mozilla.org").unwrap().len() == 1)
 }
 
 #[test]
 fn test_push_entry_to_hsts_list_should_update_existing_domain_entrys_include_subdomains() {
+    let mut entries_map = HashMap::new();
+    entries_map.insert("mozilla.org".to_owned(), vec!(HstsEntry::new("mozilla.org".to_owned(),
+            IncludeSubdomains::Included, None).unwrap()));
     let mut list = HstsList {
-        entries: vec!(HstsEntry::new("mozilla.org".to_owned(),
-            IncludeSubdomains::Included, None).unwrap())
+        entries_map: entries_map
     };
 
     assert!(list.is_host_secure("servo.mozilla.org"));
 
     list.push(HstsEntry::new("mozilla.org".to_owned(),
         IncludeSubdomains::NotIncluded, None).unwrap());
 
     assert!(!list.is_host_secure("servo.mozilla.org"))
 }
 
 #[test]
 fn test_push_entry_to_hsts_list_should_not_create_duplicate_entry() {
+    let mut entries_map = HashMap::new();
+    entries_map.insert("mozilla.org".to_owned(), vec!(HstsEntry::new("mozilla.org".to_owned(),
+            IncludeSubdomains::NotIncluded, None).unwrap()));
     let mut list = HstsList {
-        entries: vec!(HstsEntry::new("mozilla.org".to_owned(),
-            IncludeSubdomains::NotIncluded, None).unwrap())
+        entries_map: entries_map
     };
 
     list.push(HstsEntry::new("mozilla.org".to_owned(),
         IncludeSubdomains::NotIncluded, None).unwrap());
 
-    assert!(list.entries.len() == 1)
+    assert!(list.entries_map.get("mozilla.org").unwrap().len() == 1)
 }
 
 #[test]
 fn test_push_multiple_entrie_to_hsts_list_should_add_them_all() {
     let mut list = HstsList {
-        entries: Vec::new()
+        entries_map: HashMap::new()
     };
 
     assert!(!list.is_host_secure("mozilla.org"));
     assert!(!list.is_host_secure("bugzilla.org"));
 
     list.push(HstsEntry::new("mozilla.org".to_owned(),
         IncludeSubdomains::Included, None).unwrap());
     list.push(HstsEntry::new("bugzilla.org".to_owned(),
@@ -132,17 +160,17 @@ fn test_push_multiple_entrie_to_hsts_lis
 
     assert!(list.is_host_secure("mozilla.org"));
     assert!(list.is_host_secure("bugzilla.org"));
 }
 
 #[test]
 fn test_push_entry_to_hsts_list_should_add_an_entry() {
     let mut list = HstsList {
-        entries: Vec::new()
+        entries_map: HashMap::new()
     };
 
     assert!(!list.is_host_secure("mozilla.org"));
 
     list.push(HstsEntry::new("mozilla.org".to_owned(),
         IncludeSubdomains::Included, None).unwrap());
 
     assert!(list.is_host_secure("mozilla.org"));
@@ -150,113 +178,126 @@ fn test_push_entry_to_hsts_list_should_a
 
 #[test]
 fn test_parse_hsts_preload_should_return_none_when_json_invalid() {
     let mock_preload_content = b"derp";
     assert!(HstsList::from_preload(mock_preload_content).is_none(), "invalid preload list should not have parsed")
 }
 
 #[test]
-fn test_parse_hsts_preload_should_return_none_when_json_contains_no_entries_key() {
+fn test_parse_hsts_preload_should_return_none_when_json_contains_no_entries_map_key() {
     let mock_preload_content = b"{\"nothing\": \"to see here\"}";
     assert!(HstsList::from_preload(mock_preload_content).is_none(), "invalid preload list should not have parsed")
 }
 
 #[test]
 fn test_parse_hsts_preload_should_decode_host_and_includes_subdomains() {
     let mock_preload_content = b"{\
                                      \"entries\": [\
                                         {\"host\": \"mozilla.org\",\
                                          \"include_subdomains\": false}\
                                      ]\
                                  }";
     let hsts_list = HstsList::from_preload(mock_preload_content);
-    let entries = hsts_list.unwrap().entries;
+    let entries_map = hsts_list.unwrap().entries_map;
 
-    assert_eq!(entries[0].host, "mozilla.org");
-    assert!(!entries[0].include_subdomains);
+    assert_eq!(entries_map.get("mozilla.org").unwrap()[0].host, "mozilla.org");
+    assert!(!entries_map.get("mozilla.org").unwrap()[0].include_subdomains);
 }
 
 #[test]
-fn test_hsts_list_with_no_entries_does_not_is_host_secure() {
+fn test_hsts_list_with_no_entries_map_does_not_is_host_secure() {
     let hsts_list = HstsList {
-        entries: Vec::new()
+        entries_map: HashMap::new()
     };
 
     assert!(!hsts_list.is_host_secure("mozilla.org"));
 }
 
 #[test]
 fn test_hsts_list_with_exact_domain_entry_is_is_host_secure() {
+    let mut entries_map = HashMap::new();
+    entries_map.insert("mozilla.org".to_owned(),  vec![HstsEntry::new("mozilla.org".to_owned(),
+            IncludeSubdomains::NotIncluded, None).unwrap()]);
+
     let hsts_list = HstsList {
-        entries: vec![HstsEntry::new("mozilla.org".to_owned(),
-            IncludeSubdomains::NotIncluded, None).unwrap()]
+        entries_map: entries_map
     };
 
     assert!(hsts_list.is_host_secure("mozilla.org"));
 }
 
 #[test]
 fn test_hsts_list_with_subdomain_when_include_subdomains_is_true_is_is_host_secure() {
+    let mut entries_map = HashMap::new();
+    entries_map.insert("mozilla.org".to_owned(), vec![HstsEntry::new("mozilla.org".to_owned(),
+            IncludeSubdomains::Included, None).unwrap()]);
     let hsts_list = HstsList {
-        entries: vec![HstsEntry::new("mozilla.org".to_owned(),
-            IncludeSubdomains::Included, None).unwrap()]
+        entries_map: entries_map
     };
 
     assert!(hsts_list.is_host_secure("servo.mozilla.org"));
 }
 
 #[test]
 fn test_hsts_list_with_subdomain_when_include_subdomains_is_false_is_not_is_host_secure() {
+    let mut entries_map = HashMap::new();
+    entries_map.insert("mozilla.org".to_owned(), vec![HstsEntry::new("mozilla.org".to_owned(),
+            IncludeSubdomains::NotIncluded, None).unwrap()]);
     let hsts_list = HstsList {
-        entries: vec![HstsEntry::new("mozilla.org".to_owned(),
-            IncludeSubdomains::NotIncluded, None).unwrap()]
+        entries_map: entries_map
     };
 
     assert!(!hsts_list.is_host_secure("servo.mozilla.org"));
 }
 
 #[test]
 fn test_hsts_list_with_subdomain_when_host_is_not_a_subdomain_is_not_is_host_secure() {
+    let mut entries_map = HashMap::new();
+    entries_map.insert("mozilla.org".to_owned(), vec![HstsEntry::new("mozilla.org".to_owned(),
+            IncludeSubdomains::Included, None).unwrap()]);
     let hsts_list = HstsList {
-        entries: vec![HstsEntry::new("mozilla.org".to_owned(),
-            IncludeSubdomains::Included, None).unwrap()]
+        entries_map: entries_map
     };
 
     assert!(!hsts_list.is_host_secure("servo-mozilla.org"));
 }
 
 #[test]
 fn test_hsts_list_with_subdomain_when_host_is_exact_match_is_is_host_secure() {
+    let mut entries_map = HashMap::new();
+    entries_map.insert("mozilla.org".to_owned(), vec![HstsEntry::new("mozilla.org".to_owned(),
+            IncludeSubdomains::Included, None).unwrap()]);
     let hsts_list = HstsList {
-        entries: vec![HstsEntry::new("mozilla.org".to_owned(),
-            IncludeSubdomains::Included, None).unwrap()]
+        entries_map: entries_map
     };
 
     assert!(hsts_list.is_host_secure("mozilla.org"));
 }
 
 #[test]
 fn test_hsts_list_with_expired_entry_is_not_is_host_secure() {
-    let hsts_list = HstsList {
-        entries: vec![HstsEntry {
+    let mut entries_map = HashMap::new();
+    entries_map.insert("mozilla.org".to_owned(), vec![HstsEntry {
             host: "mozilla.org".to_owned(),
             include_subdomains: false,
             max_age: Some(20),
             timestamp: Some(time::get_time().sec as u64 - 100u64)
-        }]
+        }]);
+    let hsts_list = HstsList {
+        entries_map: entries_map
     };
 
     assert!(!hsts_list.is_host_secure("mozilla.org"));
 }
 
 #[test]
 fn test_preload_hsts_domains_well_formed() {
     let hsts_list = HstsList::from_servo_preload();
-    assert!(!hsts_list.entries.is_empty());
+    assert!(!hsts_list.entries_map.is_empty());
 }
 
 #[test]
 fn test_secure_url_does_not_change_explicit_port() {
     let url = Url::parse("http://mozilla.org:8080/").unwrap();
     let secure = secure_url(&url);
 
     assert!(secure.port().unwrap() == 8080u16);