Bug 1497898 - Use testfile mtimes to pre-filter files iterated over for the manifest update, r=ato
☠☠ backed out by fe1c2bb6cfbc ☠ ☠
authorAhilya Sinha <ahilyasinha99@gmail.com>
Thu, 11 Oct 2018 12:26:30 +0000
changeset 500618 5e3b8ad4c8f42342d9d16723236261501aafd040
parent 500617 95679778f4ed58a60e3c5befcb99a9f38661234a
child 500619 9afac925aef8a815e674ffb2e56c4220f7913004
push id1864
push userffxbld-merge
push dateMon, 03 Dec 2018 15:51:40 +0000
treeherdermozilla-release@f040763d99ad [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersato
bugs1497898
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1497898 - Use testfile mtimes to pre-filter files iterated over for the manifest update, r=ato Differential Revision: https://phabricator.services.mozilla.com/D8221
testing/web-platform/tests/tools/manifest/commands.json
testing/web-platform/tests/tools/manifest/item.py
testing/web-platform/tests/tools/manifest/manifest.py
testing/web-platform/tests/tools/manifest/tests/test_manifest.py
testing/web-platform/tests/tools/manifest/update.py
testing/web-platform/tests/tools/manifest/vcs.py
--- a/testing/web-platform/tests/tools/manifest/commands.json
+++ b/testing/web-platform/tests/tools/manifest/commands.json
@@ -1,6 +1,5 @@
 {"manifest":
  {"path": "update.py", "script": "run", "parser": "create_parser", "help": "Update the MANIFEST.json file",
   "virtualenv": false},
  "manifest-download":
- {"path": "download.py", "script": "run", "parser": "create_parser", "help": "Download recent pregenerated MANIFEST.json file",
-  "virtualenv": false}}
+ {"path": "download.py", "script": "run", "parser": "create_parser", "help": "Download recent pregenerated MANIFEST.json file", "virtualenv": false}}
--- a/testing/web-platform/tests/tools/manifest/item.py
+++ b/testing/web-platform/tests/tools/manifest/item.py
@@ -1,25 +1,27 @@
 from six.moves.urllib.parse import urljoin, urlparse
 from abc import ABCMeta, abstractproperty
 
 
-def get_source_file(source_files, tests_root, manifest, path):
-    def make_new():
+class SourceFileCache(object):
+    def __init__(self):
+        self.source_files = {}
+
+    def make_new(self, tests_root, path, url_base):
         from .sourcefile import SourceFile
 
-        return SourceFile(tests_root, path, manifest.url_base)
+        return SourceFile(tests_root, path, url_base)
 
-    if source_files is None:
-        return make_new()
+    def get(self, tests_root, manifest, path):
 
-    if path not in source_files:
-        source_files[path] = make_new()
+        if path not in self.source_files:
+            self.source_files[path] = self.make_new(tests_root, path, manifest.url_base)
 
-    return source_files[path]
+        return self.source_files[path]
 
 
 item_types = {}
 
 
 class ManifestItemMeta(ABCMeta):
     """Custom metaclass that registers all the subclasses in the
     item_types dictionary according to the value of their item_type
@@ -32,16 +34,18 @@ class ManifestItemMeta(ABCMeta):
         return rv
 
 
 class ManifestItem(object):
     __metaclass__ = ManifestItemMeta
 
     item_type = None
 
+    source_file_cache = SourceFileCache()
+
     def __init__(self, source_file, manifest=None):
         self.manifest = manifest
         self.source_file = source_file
 
     @abstractproperty
     def id(self):
         """The test's id (usually its url)"""
         pass
@@ -79,18 +83,18 @@ class ManifestItem(object):
 
     def __repr__(self):
         return "<%s.%s id=%s, path=%s>" % (self.__module__, self.__class__.__name__, self.id, self.path)
 
     def to_json(self):
         return [{}]
 
     @classmethod
-    def from_json(cls, manifest, tests_root, path, obj, source_files=None):
-        source_file = get_source_file(source_files, tests_root, manifest, path)
+    def from_json(cls, manifest, tests_root, path, obj):
+        source_file = cls.source_file_cache.get(tests_root, manifest, path)
         return cls(source_file,
                    manifest=manifest)
 
 
 class URLManifestItem(ManifestItem):
     def __init__(self, source_file, url, url_base="/", manifest=None):
         ManifestItem.__init__(self, source_file, manifest=manifest)
         self._url = url
@@ -108,18 +112,18 @@ class URLManifestItem(ManifestItem):
     def url(self):
         return urljoin(self.url_base, self._url)
 
     def to_json(self):
         rv = [self._url, {}]
         return rv
 
     @classmethod
-    def from_json(cls, manifest, tests_root, path, obj, source_files=None):
-        source_file = get_source_file(source_files, tests_root, manifest, path)
+    def from_json(cls, manifest, tests_root, path, obj):
+        source_file = cls.source_file_cache.get(tests_root, manifest, path)
         url, extras = obj
         return cls(source_file,
                    url,
                    url_base=manifest.url_base,
                    manifest=manifest)
 
 
 class TestharnessTest(URLManifestItem):
@@ -140,18 +144,18 @@ class TestharnessTest(URLManifestItem):
             rv[-1]["timeout"] = self.timeout
         if self.testdriver:
             rv[-1]["testdriver"] = self.testdriver
         if self.jsshell:
             rv[-1]["jsshell"] = True
         return rv
 
     @classmethod
-    def from_json(cls, manifest, tests_root, path, obj, source_files=None):
-        source_file = get_source_file(source_files, tests_root, manifest, path)
+    def from_json(cls, manifest, tests_root, path, obj):
+        source_file = cls.source_file_cache.get(tests_root, manifest, path)
 
         url, extras = obj
         return cls(source_file,
                    url,
                    url_base=manifest.url_base,
                    timeout=extras.get("timeout"),
                    testdriver=bool(extras.get("testdriver")),
                    jsshell=bool(extras.get("jsshell")),
@@ -182,18 +186,18 @@ class RefTestNode(URLManifestItem):
             extras["timeout"] = self.timeout
         if self.viewport_size is not None:
             extras["viewport_size"] = self.viewport_size
         if self.dpi is not None:
             extras["dpi"] = self.dpi
         return rv
 
     @classmethod
-    def from_json(cls, manifest, tests_root, path, obj, source_files=None):
-        source_file = get_source_file(source_files, tests_root, manifest, path)
+    def from_json(cls, manifest, tests_root, path, obj):
+        source_file = cls.source_file_cache.get(tests_root, manifest, path)
         url, references, extras = obj
         return cls(source_file,
                    url,
                    references,
                    url_base=manifest.url_base,
                    timeout=extras.get("timeout"),
                    viewport_size=extras.get("viewport_size"),
                    dpi=extras.get("dpi"),
@@ -243,18 +247,18 @@ class WebDriverSpecTest(URLManifestItem)
 
     def to_json(self):
         rv = URLManifestItem.to_json(self)
         if self.timeout is not None:
             rv[-1]["timeout"] = self.timeout
         return rv
 
     @classmethod
-    def from_json(cls, manifest, tests_root, path, obj, source_files=None):
-        source_file = get_source_file(source_files, tests_root, manifest, path)
+    def from_json(cls, manifest, tests_root, path, obj):
+        source_file = cls.source_file_cache.get(tests_root, manifest, path)
 
         url, extras = obj
         return cls(source_file,
                    url,
                    url_base=manifest.url_base,
                    timeout=extras.get("timeout"),
                    manifest=manifest)
 
--- a/testing/web-platform/tests/tools/manifest/manifest.py
+++ b/testing/web-platform/tests/tools/manifest/manifest.py
@@ -1,18 +1,22 @@
 import itertools
-import json
 import os
 from collections import defaultdict
-from six import iteritems, itervalues, viewkeys, string_types
+from six import iteritems, iterkeys, itervalues, string_types
 
-from .item import ManualTest, WebDriverSpecTest, Stub, RefTestNode, RefTest, TestharnessTest, SupportFile, ConformanceCheckerTest, VisualTest
+from .item import (ManualTest, WebDriverSpecTest, Stub, RefTestNode, RefTest,
+                   TestharnessTest, SupportFile, ConformanceCheckerTest, VisualTest)
 from .log import get_logger
 from .utils import from_os_path, to_os_path
 
+try:
+    import ujson as json
+except ImportError:
+    import json
 
 CURRENT_VERSION = 5
 
 
 class ManifestError(Exception):
     pass
 
 
@@ -22,32 +26,183 @@ class ManifestVersionMismatch(ManifestEr
 
 def iterfilter(filters, iter):
     for f in filters:
         iter = f(iter)
     for item in iter:
         yield item
 
 
+item_classes = {"testharness": TestharnessTest,
+                "reftest": RefTest,
+                "reftest_node": RefTestNode,
+                "manual": ManualTest,
+                "stub": Stub,
+                "wdspec": WebDriverSpecTest,
+                "conformancechecker": ConformanceCheckerTest,
+                "visual": VisualTest,
+                "support": SupportFile}
+
+
+class TypeData(object):
+    def __init__(self, manifest, type_cls):
+        """Dict-like object containing the TestItems for each test type.
+
+        Loading an actual Item class for each test is unnecessarily
+        slow, so this class allows lazy-loading of the test
+        items. When the manifest is loaded we store the raw json
+        corresponding to the test type, and only create an Item
+        subclass when the test is accessed. In order to remain
+        API-compatible with consumers that depend on getting an Item
+        from iteration, we do egerly load all items when iterating
+        over the class.
+
+        """
+        self.manifest = manifest
+        self.type_cls = type_cls
+        self.data = {}
+        self.json_data = None
+        self.tests_root = None
+
+    def __getitem__(self, key):
+        if key not in self.data:
+            self.load(key)
+        return self.data[key]
+
+    def __bool__(self):
+        return bool(self.data)
+
+    def __len__(self):
+        return len(self.data)
+
+    def __delitem__(self, key):
+        del self.data[key]
+
+    def __setitem__(self, key, value):
+        self.data[key] = value
+
+    def __contains__(self, key):
+        self.load_all()
+        return key in self.data
+
+    def __iter__(self):
+        self.load_all()
+        return self.data.__iter__()
+
+    def pop(self, key, default=None):
+        try:
+            value = self[key]
+        except ValueError:
+            value = default
+        else:
+            del self.data[key]
+        return value
+
+    def get(self, key, default=None):
+        try:
+            return self[key]
+        except ValueError:
+            return default
+
+    def itervalues(self):
+        self.load_all()
+        return itervalues(self.data)
+
+    def iteritems(self):
+        self.load_all()
+        for path, tests in iteritems(self.data):
+            yield path, tests
+
+    def load(self, key):
+        """Load a specific Item given a path"""
+        if self.json_data is not None:
+            data = set()
+            path = from_os_path(key)
+            for test in self.json_data.get(path, []):
+                manifest_item = self.type_cls.from_json(self.manifest,
+                                                        self.tests_root,
+                                                        path,
+                                                        test)
+                data.add(manifest_item)
+            self.data[key] = data
+        else:
+            raise ValueError
+
+    def load_all(self):
+        """Load all test items in this class"""
+        if self.json_data is not None:
+            for path, value in iteritems(self.json_data):
+                key = to_os_path(path)
+                if key in self.data:
+                    continue
+                data = set()
+                for test in self.json_data.get(path, []):
+                    manifest_item = self.type_cls.from_json(self.manifest,
+                                                            self.tests_root,
+                                                            path,
+                                                            test)
+                    data.add(manifest_item)
+                self.data[key] = data
+            self.json_data = None
+
+    def set_json(self, tests_root, data):
+        if not isinstance(data, dict):
+            raise ValueError("Got a %s expected a dict" % (type(data)))
+        self.tests_root = tests_root
+        self.json_data = data
+
+    def paths(self):
+        """Get a list of all paths containing items of this type,
+        without actually constructing all the items"""
+        rv = set(iterkeys(self.data))
+        if self.json_data:
+            rv |= set(to_os_path(item) for item in iterkeys(self.json_data))
+        return rv
+
+
+class ManifestData(dict):
+    def __init__(self, manifest, meta_filters=None):
+        """Dictionary subclass containing a TypeData instance for each test type,
+        keyed by type name"""
+        self.initialized = False
+        for key, value in item_classes.iteritems():
+            self[key] = TypeData(manifest, value, meta_filters=meta_filters)
+        self.initialized = True
+        self.json_obj = None
+
+    def __setitem__(self, key, value):
+        if self.initialized:
+            raise AttributeError
+        dict.__setitem__(self, key, value)
+
+    def paths(self):
+        """Get a list of all paths containing test items
+        without actually constructing all the items"""
+        rv = set()
+        for item_data in itervalues(self):
+            rv |= set(item_data.paths())
+        return rv
+
+
 class Manifest(object):
     def __init__(self, url_base="/"):
         assert url_base is not None
         self._path_hash = {}
-        self._data = defaultdict(dict)
+        self._data = ManifestData(self)
         self._reftest_nodes_by_url = None
         self.url_base = url_base
 
     def __iter__(self):
         return self.itertypes()
 
     def itertypes(self, *types):
         if not types:
             types = sorted(self._data.keys())
         for item_type in types:
-            for path, tests in sorted(iteritems(self._data[item_type])):
+            for path, tests in sorted(self._data[item_type]):
                 yield item_type, path, tests
 
     def iterpath(self, path):
         for type_tests in self._data.values():
             for test in type_tests.get(path, set()):
                 yield test
 
     def iterdir(self, dir_name):
@@ -69,71 +224,85 @@ class Manifest(object):
                     by_url[node.url] = node
             self._reftest_nodes_by_url = by_url
         return self._reftest_nodes_by_url
 
     def get_reference(self, url):
         return self.reftest_nodes_by_url.get(url)
 
     def update(self, tree):
-        new_data = defaultdict(dict)
-        new_hashes = {}
+        """Update the manifest given an iterable of items that make up the updated manifest.
 
+        The iterable must either generate tuples of the form (SourceFile, True) for paths
+        that are to be updated, or (path, False) for items that are not to be updated. This
+        unusual API is designed as an optimistaion meaning that SourceFile items need not be
+        constructed in the case we are not updating a path, but the absence of an item from
+        the iterator may be used to remove defunct entries from the manifest."""
         reftest_nodes = []
-        old_files = defaultdict(set, {k: set(viewkeys(v)) for k, v in iteritems(self._data)})
+        seen_files = set()
 
         changed = False
         reftest_changes = False
 
-        for source_file in tree:
-            rel_path = source_file.rel_path
-            file_hash = source_file.hash
+        prev_files = self._data.paths()
+
+        reftest_types = ("reftest", "reftest_node")
 
-            is_new = rel_path not in self._path_hash
-            hash_changed = False
+        for source_file, update in tree:
+            if not update:
+                rel_path = source_file
+                seen_files.add(rel_path)
+            else:
+                rel_path = source_file.rel_path
+                seen_files.add(rel_path)
+
+                file_hash = source_file.hash
+
+                is_new = rel_path not in self._path_hash
+                hash_changed = False
 
-            if not is_new:
-                old_hash, old_type = self._path_hash[rel_path]
-                old_files[old_type].remove(rel_path)
-                if old_hash != file_hash:
-                    new_type, manifest_items = source_file.manifest_items()
-                    hash_changed = True
+                if not is_new:
+                    old_hash, old_type = self._path_hash[rel_path]
+                    if old_hash != file_hash:
+                        new_type, manifest_items = source_file.manifest_items()
+                        hash_changed = True
+                    else:
+                        new_type, manifest_items = old_type, self._data[old_type][rel_path]
+                    if old_type in reftest_types and new_type != old_type:
+                        reftest_changes = True
                 else:
-                    new_type, manifest_items = old_type, self._data[old_type][rel_path]
-                if old_type in ("reftest", "reftest_node") and new_type != old_type:
-                    reftest_changes = True
-            else:
-                new_type, manifest_items = source_file.manifest_items()
+                    new_type, manifest_items = source_file.manifest_items()
 
-            if new_type in ("reftest", "reftest_node"):
-                reftest_nodes.extend(manifest_items)
+                if new_type in ("reftest", "reftest_node"):
+                    reftest_nodes.extend(manifest_items)
+                    if is_new or hash_changed:
+                        reftest_changes = True
+                elif new_type:
+                    self._data[new_type][rel_path] = set(manifest_items)
+
+                self._path_hash[rel_path] = (file_hash, new_type)
+
                 if is_new or hash_changed:
-                    reftest_changes = True
-            elif new_type:
-                new_data[new_type][rel_path] = set(manifest_items)
+                    changed = True
 
-            new_hashes[rel_path] = (file_hash, new_type)
+        deleted = prev_files - seen_files
+        if deleted:
+            changed = True
+            for rel_path in deleted:
+                _, old_type = self._path_hash[rel_path]
+                if old_type in reftest_types:
+                    reftest_changes = True
+                del self._path_hash[rel_path]
+                del self._data[old_type][rel_path]
 
-            if is_new or hash_changed:
-                changed = True
-
-        if reftest_changes or old_files["reftest"] or old_files["reftest_node"]:
+        if reftest_changes:
             reftests, reftest_nodes, changed_hashes = self._compute_reftests(reftest_nodes)
-            new_data["reftest"] = reftests
-            new_data["reftest_node"] = reftest_nodes
-            new_hashes.update(changed_hashes)
-        else:
-            new_data["reftest"] = self._data["reftest"]
-            new_data["reftest_node"] = self._data["reftest_node"]
-
-        if any(itervalues(old_files)):
-            changed = True
-
-        self._data = new_data
-        self._path_hash = new_hashes
+            self._data["reftest"].data = reftests
+            self._data["reftest_node"].data = reftest_nodes
+            self._path_hash.update(changed_hashes)
 
         return changed
 
     def _compute_reftests(self, reftest_nodes):
         self._reftest_nodes_by_url = {}
         has_inbound = set()
         for item in reftest_nodes:
             for ref_url, ref_type in item.references:
@@ -163,17 +332,17 @@ class Manifest(object):
 
     def to_json(self):
         out_items = {
             test_type: {
                 from_os_path(path):
                 [t for t in sorted(test.to_json() for test in tests)]
                 for path, tests in iteritems(type_paths)
             }
-            for test_type, type_paths in iteritems(self._data)
+            for test_type, type_paths in self._data.iteritems() if type_paths
         }
         rv = {"url_base": self.url_base,
               "paths": {from_os_path(k): v for k, v in iteritems(self._path_hash)},
               "items": out_items,
               "version": CURRENT_VERSION}
         return rv
 
     @classmethod
@@ -183,49 +352,26 @@ class Manifest(object):
             raise ManifestVersionMismatch
 
         self = cls(url_base=obj.get("url_base", "/"))
         if not hasattr(obj, "items") and hasattr(obj, "paths"):
             raise ManifestError
 
         self._path_hash = {to_os_path(k): v for k, v in iteritems(obj["paths"])}
 
-        item_classes = {"testharness": TestharnessTest,
-                        "reftest": RefTest,
-                        "reftest_node": RefTestNode,
-                        "manual": ManualTest,
-                        "stub": Stub,
-                        "wdspec": WebDriverSpecTest,
-                        "conformancechecker": ConformanceCheckerTest,
-                        "visual": VisualTest,
-                        "support": SupportFile}
-
         meta_filters = meta_filters or []
 
-        source_files = {}
-
         for test_type, type_paths in iteritems(obj["items"]):
             if test_type not in item_classes:
                 raise ManifestError
 
             if types and test_type not in types:
                 continue
 
-            test_cls = item_classes[test_type]
-            tests = defaultdict(set)
-            for path, manifest_tests in iteritems(type_paths):
-                path = to_os_path(path)
-                for test in iterfilter(meta_filters, manifest_tests):
-                    manifest_item = test_cls.from_json(self,
-                                                       tests_root,
-                                                       path,
-                                                       test,
-                                                       source_files=source_files)
-                    tests[path].add(manifest_item)
-            self._data[test_type] = tests
+            self._data[test_type].set_json(tests_root, type_paths)
 
         return self
 
 
 def load(tests_root, manifest, types=None, meta_filters=None):
     logger = get_logger()
 
     # "manifest" is a path or file-like object.
@@ -247,10 +393,10 @@ def load(tests_root, manifest, types=Non
     return Manifest.from_json(tests_root, json.load(manifest), types=types, meta_filters=meta_filters)
 
 
 def write(manifest, manifest_path):
     dir_name = os.path.dirname(manifest_path)
     if not os.path.exists(dir_name):
         os.makedirs(dir_name)
     with open(manifest_path, "wb") as f:
-        json.dump(manifest.to_json(), f, sort_keys=True, indent=1, separators=(',', ': '))
+        json.dump(manifest.to_json(), f, sort_keys=True, indent=1)
         f.write("\n")
--- a/testing/web-platform/tests/tools/manifest/tests/test_manifest.py
+++ b/testing/web-platform/tests/tools/manifest/tests/test_manifest.py
@@ -67,17 +67,17 @@ def sourcefile_strategy(draw):
 
 @h.given(hs.lists(sourcefile_strategy(),
                   min_size=1, average_size=10, max_size=1000,
                   unique_by=lambda x: x.rel_path))
 @h.example([SourceFileWithTest("a", "0"*40, item.ConformanceCheckerTest)])
 def test_manifest_to_json(s):
     m = manifest.Manifest()
 
-    assert m.update(s) is True
+    assert m.update((item, True) for item in s) is True
 
     json_str = m.to_json()
     loaded = manifest.Manifest.from_json("/", json_str)
 
     assert list(loaded) == list(m)
 
     assert loaded.to_json() == json_str
 
@@ -85,228 +85,226 @@ def test_manifest_to_json(s):
 @h.given(hs.lists(sourcefile_strategy(),
                   min_size=1, average_size=10,
                   unique_by=lambda x: x.rel_path))
 @h.example([SourceFileWithTest("a", "0"*40, item.TestharnessTest)])
 @h.example([SourceFileWithTest("a", "0"*40, item.RefTest, [("/aa", "==")])])
 def test_manifest_idempotent(s):
     m = manifest.Manifest()
 
-    assert m.update(s) is True
+    assert m.update((item, True) for item in s) is True
 
     m1 = list(m)
 
-    assert m.update(s) is False
+    assert m.update((item, True) for item in s) is False
 
     assert list(m) == m1
 
 
 def test_manifest_to_json_forwardslash():
     m = manifest.Manifest()
 
     s = SourceFileWithTest("a/b", "0"*40, item.TestharnessTest)
 
-    assert m.update([s]) is True
+    assert m.update([(s, True)]) is True
 
     assert m.to_json() == {
         'paths': {
             'a/b': ('0000000000000000000000000000000000000000', 'testharness')
         },
         'version': 5,
         'url_base': '/',
         'items': {
-            'reftest': {},
-            'reftest_node': {},
             'testharness': {
                 'a/b': [['/a/b', {}]]
             }
         }
     }
 
 
 def test_manifest_to_json_backslash():
     m = manifest.Manifest()
 
     s = SourceFileWithTest("a\\b", "0"*40, item.TestharnessTest)
 
     if os.path.sep == "\\":
-        assert m.update([s]) is True
+        assert m.update([(s, True)]) is True
 
         assert m.to_json() == {
             'paths': {
                 'a/b': ('0000000000000000000000000000000000000000', 'testharness')
             },
             'version': 5,
             'url_base': '/',
             'items': {
-                'reftest': {},
-                'reftest_node': {},
                 'testharness': {
                     'a/b': [['/a/b', {}]]
                 }
             }
         }
     else:
         with pytest.raises(ValueError):
             # one of these must raise ValueError
             # the first must return True if it doesn't raise
-            assert m.update([s]) is True
+            assert m.update([(s, True)]) is True
             m.to_json()
 
 
 def test_manifest_from_json_backslash():
     json_obj = {
         'paths': {
             'a\\b': ('0000000000000000000000000000000000000000', 'testharness')
         },
         'version': 5,
         'url_base': '/',
         'items': {
-            'reftest': {},
-            'reftest_node': {},
             'testharness': {
                 'a\\b': [['/a/b', {}]]
             }
         }
     }
 
     with pytest.raises(ValueError):
         manifest.Manifest.from_json("/", json_obj)
 
 
 def test_reftest_computation_chain():
     m = manifest.Manifest()
 
     s1 = SourceFileWithTest("test1", "0"*40, item.RefTest, [("/test2", "==")])
     s2 = SourceFileWithTest("test2", "0"*40, item.RefTest, [("/test3", "==")])
 
-    m.update([s1, s2])
+    m.update([(s1, True), (s2, True)])
 
     test1 = s1.manifest_items()[1][0]
     test2 = s2.manifest_items()[1][0]
     test2_node = test2.to_RefTestNode()
 
     assert list(m) == [("reftest", test1.path, {test1}),
                        ("reftest_node", test2.path, {test2_node})]
 
 
 def test_reftest_computation_chain_update_add():
     m = manifest.Manifest()
 
     s2 = SourceFileWithTest("test2", "0"*40, item.RefTest, [("/test3", "==")])
     test2 = s2.manifest_items()[1][0]
 
-    assert m.update([s2]) is True
+    assert m.update([(s2, True)]) is True
 
     assert list(m) == [("reftest", test2.path, {test2})]
 
     s1 = SourceFileWithTest("test1", "0"*40, item.RefTest, [("/test2", "==")])
     test1 = s1.manifest_items()[1][0]
 
     # s2's hash is unchanged, but it has gone from a test to a node
-    assert m.update([s1, s2]) is True
+    assert m.update([(s1, True), (s2, True)]) is True
 
     test2_node = test2.to_RefTestNode()
 
     assert list(m) == [("reftest", test1.path, {test1}),
                        ("reftest_node", test2.path, {test2_node})]
 
 
 def test_reftest_computation_chain_update_remove():
     m = manifest.Manifest()
 
     s1 = SourceFileWithTest("test1", "0"*40, item.RefTest, [("/test2", "==")])
     s2 = SourceFileWithTest("test2", "0"*40, item.RefTest, [("/test3", "==")])
 
-    assert m.update([s1, s2]) is True
+    assert m.update([(s1, True), (s2, True)]) is True
 
     test1 = s1.manifest_items()[1][0]
     test2 = s2.manifest_items()[1][0]
     test2_node = test2.to_RefTestNode()
 
     assert list(m) == [("reftest", test1.path, {test1}),
                        ("reftest_node", test2.path, {test2_node})]
 
     # s2's hash is unchanged, but it has gone from a node to a test
-    assert m.update([s2]) is True
+    assert m.update([(s2, True)]) is True
 
     assert list(m) == [("reftest", test2.path, {test2})]
 
 
 def test_reftest_computation_chain_update_test_type():
     m = manifest.Manifest()
 
     s1 = SourceFileWithTest("test", "0"*40, item.RefTest, [("/test-ref", "==")])
 
-    assert m.update([s1]) is True
+    assert m.update([(s1, True)]) is True
 
     test1 = s1.manifest_items()[1][0]
 
     assert list(m) == [("reftest", test1.path, {test1})]
 
     # test becomes a testharness test (hash change because that is determined
     # based on the file contents). The updated manifest should not includes the
     # old reftest.
     s2 = SourceFileWithTest("test", "1"*40, item.TestharnessTest)
-    assert m.update([s2]) is True
+    assert m.update([(s2, True)]) is True
 
     test2 = s2.manifest_items()[1][0]
 
     assert list(m) == [("testharness", test2.path, {test2})]
 
 
 def test_reftest_computation_chain_update_node_change():
     m = manifest.Manifest()
 
     s1 = SourceFileWithTest("test1", "0"*40, item.RefTest, [("/test2", "==")])
     s2 = SourceFileWithTest("test2", "0"*40, item.RefTestNode, [("/test3", "==")])
 
-    assert m.update([s1, s2]) is True
+    assert m.update([(s1, True), (s2, True)]) is True
 
     test1 = s1.manifest_items()[1][0]
     test2 = s2.manifest_items()[1][0]
 
     assert list(m) == [("reftest", test1.path, {test1}),
                        ("reftest_node", test2.path, {test2})]
 
     #test2 changes to support type
     s2 = SourceFileWithTest("test2", "1"*40, item.SupportFile)
 
-    assert m.update([s1,s2]) is True
+    assert m.update([(s1, True), (s2, True)]) is True
     test3 = s2.manifest_items()[1][0]
 
     assert list(m) == [("reftest", test1.path, {test1}),
                        ("support", test3.path, {test3})]
 
 
 def test_iterpath():
     m = manifest.Manifest()
 
+    # This has multiple test types from the same file, which isn't really supported,
+    # so pretend they have different hashes
     sources = [SourceFileWithTest("test1", "0"*40, item.RefTest, [("/test1-ref", "==")]),
                SourceFileWithTest("test2", "0"*40, item.RefTest, [("/test2-ref", "==")]),
-               SourceFileWithTests("test2", "0"*40, item.TestharnessTest, [("/test2-1.html",),
+               SourceFileWithTests("test2", "1"*40, item.TestharnessTest, [("/test2-1.html",),
                                                                            ("/test2-2.html",)]),
                SourceFileWithTest("test3", "0"*40, item.TestharnessTest)]
-    m.update(sources)
+    m.update([(s, True) for s in sources])
 
     assert set(item.url for item in m.iterpath("test2")) == set(["/test2",
                                                                  "/test2-1.html",
                                                                  "/test2-2.html"])
     assert set(m.iterpath("missing")) == set()
 
 
 def test_filter():
     m = manifest.Manifest()
 
+    # This has multiple test types from the same file, which isn't really supported,
+    # so pretend they have different hashes
     sources = [SourceFileWithTest("test1", "0"*40, item.RefTest, [("/test1-ref", "==")]),
-               SourceFileWithTest("test2", "0"*40, item.RefTest, [("/test2-ref", "==")]),
+               SourceFileWithTest("test2", "1"*40, item.RefTest, [("/test2-ref", "==")]),
                SourceFileWithTests("test2", "0"*40, item.TestharnessTest, [("/test2-1.html",),
                                                                            ("/test2-2.html",)]),
                SourceFileWithTest("test3", "0"*40, item.TestharnessTest)]
-    m.update(sources)
+    m.update([(s, True) for s in sources])
 
     json = m.to_json()
 
     def filter(it):
         for test in it:
             if test[0] in ["/test2-2.html", "/test3"]:
                 yield test
 
@@ -323,19 +321,60 @@ def test_filter():
 
 
 def test_reftest_node_by_url():
     m = manifest.Manifest()
 
     s1 = SourceFileWithTest("test1", "0"*40, item.RefTest, [("/test2", "==")])
     s2 = SourceFileWithTest("test2", "0"*40, item.RefTest, [("/test3", "==")])
 
-    m.update([s1, s2])
+    m.update([(s1, True), (s2, True)])
 
     test1 = s1.manifest_items()[1][0]
     test2 = s2.manifest_items()[1][0]
     test2_node = test2.to_RefTestNode()
 
     assert m.reftest_nodes_by_url == {"/test1": test1,
                                       "/test2": test2_node}
     m._reftest_nodes_by_url = None
     assert m.reftest_nodes_by_url == {"/test1": test1,
                                       "/test2": test2_node}
+
+
+def test_no_update():
+    m = manifest.Manifest()
+
+    s1 = SourceFileWithTest("test1", "0"*40, item.TestharnessTest)
+    s2 = SourceFileWithTest("test2", "0"*40, item.TestharnessTest)
+
+    m.update([(s1, True), (s2, True)])
+
+    test1 = s1.manifest_items()[1][0]
+    test2 = s2.manifest_items()[1][0]
+
+    assert list(m) == [("testharness", test1.path, {test1}),
+                       ("testharness", test2.path, {test2})]
+
+    s1_1 = SourceFileWithTest("test1", "1"*40, item.TestharnessTest)
+
+    m.update([(s1, True), (s2.rel_path, False)])
+
+    test1_1 = s1_1.manifest_items()[1][0]
+
+    assert list(m) == [("testharness", test1_1.path, {test1_1}),
+                       ("testharness", test2.path, {test2})]
+
+
+def test_no_update_delete():
+    m = manifest.Manifest()
+
+    s1 = SourceFileWithTest("test1", "0"*40, item.TestharnessTest)
+    s2 = SourceFileWithTest("test2", "0"*40, item.TestharnessTest)
+
+    m.update([(s1, True), (s2, True)])
+
+    s1_1 = SourceFileWithTest("test1", "1"*40, item.TestharnessTest)
+
+    m.update([(s1, True)])
+
+    test1_1 = s1_1.manifest_items()[1][0]
+
+    assert list(m) == [("testharness", test1_1.path, {test1_1})]
--- a/testing/web-platform/tests/tools/manifest/update.py
+++ b/testing/web-platform/tests/tools/manifest/update.py
@@ -8,25 +8,39 @@ from .log import get_logger
 from .download import download_from_github
 
 here = os.path.dirname(__file__)
 
 wpt_root = os.path.abspath(os.path.join(here, os.pardir, os.pardir))
 
 logger = get_logger()
 
-def update(tests_root, manifest, working_copy=False):
+
+def update(tests_root, manifest, working_copy=False, cache_root=None, rebuild=False):
     logger.info("Updating manifest")
     tree = None
+    if cache_root is None:
+        cache_root = os.path.join(tests_root, ".cache")
+    if not os.path.exists(cache_root):
+        try:
+            os.makedirs(cache_root)
+        except IOError:
+            cache_root = None
+
     if not working_copy:
-        tree = vcs.Git.for_path(tests_root, manifest.url_base)
+        tree = vcs.Git.for_path(tests_root, manifest.url_base,
+                                cache_path=cache_root, rebuild=rebuild)
     if tree is None:
-        tree = vcs.FileSystem(tests_root, manifest.url_base)
+        tree = vcs.FileSystem(tests_root, manifest.url_base,
+                              cache_path=cache_root, rebuild=rebuild)
 
-    return manifest.update(tree)
+    try:
+        return manifest.update(tree)
+    finally:
+        tree.dump_caches()
 
 
 def update_from_cli(**kwargs):
     tests_root = kwargs["tests_root"]
     path = kwargs["path"]
     assert tests_root is not None
 
     m = None
@@ -41,17 +55,19 @@ def update_from_cli(**kwargs):
             logger.info("Manifest version changed, rebuilding")
             m = None
 
     if m is None:
         m = manifest.Manifest(kwargs["url_base"])
 
     changed = update(tests_root,
                      m,
-                     working_copy=kwargs["work"])
+                     working_copy=kwargs["work"],
+                     cache_root=kwargs["cache_root"],
+                     rebuild=kwargs["rebuild"])
     if changed:
         manifest.write(m, path)
 
 
 def abs_path(path):
     return os.path.abspath(os.path.expanduser(path))
 
 
@@ -68,33 +84,35 @@ def create_parser():
         "--work", action="store_true", default=False,
         help="Build from the working tree rather than the latest commit")
     parser.add_argument(
         "--url-base", action="store", default="/",
         help="Base url to use as the mount point for tests in this manifest.")
     parser.add_argument(
         "--no-download", dest="download", action="store_false", default=True,
         help="Never attempt to download the manifest.")
+    parser.add_argument(
+        "--cache-root", action="store", default=os.path.join(wpt_root, ".wptcache"),
+        help="Path in which to store any caches (default <tests_root>/.wptcache/")
     return parser
 
 
 def find_top_repo():
     path = here
     rv = None
     while path != "/":
         if vcs.is_git_repo(path):
             rv = path
         path = os.path.abspath(os.path.join(path, os.pardir))
 
     return rv
 
 
-def run(**kwargs):
+def run(*args, **kwargs):
     if kwargs["path"] is None:
         kwargs["path"] = os.path.join(kwargs["tests_root"], "MANIFEST.json")
-
     update_from_cli(**kwargs)
 
 
 def main():
     opts = create_parser().parse_args()
 
     run(**vars(opts))
--- a/testing/web-platform/tests/tools/manifest/vcs.py
+++ b/testing/web-platform/tests/tools/manifest/vcs.py
@@ -1,17 +1,18 @@
+import json
 import os
+import platform
 import subprocess
-import platform
 
 from .sourcefile import SourceFile
 
 
 class Git(object):
-    def __init__(self, repo_root, url_base):
+    def __init__(self, repo_root, url_base, filters=None):
         self.root = os.path.abspath(repo_root)
         self.git = Git.get_func(repo_root)
         self.url_base = url_base
 
     @staticmethod
     def get_func(repo_path):
         def git(cmd, *args):
             full_cmd = ["git", cmd] + list(args)
@@ -69,32 +70,78 @@ class Git(object):
                 if rel_path in local_changes:
                     contents = self._show_file(rel_path)
                 else:
                     contents = None
                 yield SourceFile(self.root,
                                  rel_path,
                                  self.url_base,
                                  hash,
-                                 contents=contents)
+                                 contents=contents), True
 
 
 class FileSystem(object):
-    def __init__(self, root, url_base):
+    def __init__(self, root, url_base, mtime_filter):
         self.root = root
         self.url_base = url_base
         from gitignore import gitignore
         self.path_filter = gitignore.PathFilter(self.root, extras=[".git/"])
+        self.mtime_filter = mtime_filter
 
     def __iter__(self):
-        paths = self.get_paths()
-        for path in paths:
-            yield SourceFile(self.root, path, self.url_base)
+        mtime_cache = self.mtime_cache
+        for dirpath, dirnames, filenames in self.path_filter(walk(".")):
+            for filename, path_stat in filenames:
+                # We strip the ./ prefix off the path
+                path = os.path.join(dirpath, filename)
+                if mtime_cache is None or mtime_cache.updated(path, path_stat):
+                    yield SourceFile(self.root, path, self.url_base), True
+                else:
+                    yield path, False
+        self.ignore_cache.dump()
+
+    def dump_caches(self):
+        for cache in [self.mtime_cache, self.ignore_cache]:
+            if cache is not None:
+                cache.dump()
+
+
+class CacheFile(object):
+    file_name = None
+
+    def __init__(self, cache_root, rebuild=False):
+        if not os.path.exists(cache_root):
+            os.makedirs(cache_root)
+        self.path = os.path.join(cache_root, self.file_name)
+        self.data = self.load(rebuild)
+        self.modified = False
 
-    def get_paths(self):
-        for dirpath, dirnames, filenames in os.walk(self.root):
-            for filename in filenames:
-                path = os.path.relpath(os.path.join(dirpath, filename), self.root)
-                if self.path_filter(path):
-                    yield path
+    def dump(self):
+        missing = set(self.data.keys()) - self.updated
+        if not missing or not self.modified:
+            return
+        for item in missing:
+            del self.data[item]
+        with open(self.path, 'w') as f:
+            json.dump(self.data, f, indent=1)
+
+    def load(self):
+        try:
+            with open(self.path, 'r') as f:
+                return json.load(f)
+        except IOError:
+            return {}
 
-            dirnames[:] = [item for item in dirnames if self.path_filter(
-                           os.path.relpath(os.path.join(dirpath, item), self.root) + "/")]
+    def update(self, rel_path, stat=None):
+        self.updated.add(rel_path)
+        try:
+            if stat is None:
+                stat = os.stat(os.path.join(self.root,
+                                            rel_path))
+        except Exception:
+            return True
+
+        mtime = stat.st_mtime
+        if mtime != self.data.get(rel_path):
+            self.modified = True
+            self.data[rel_path] = mtime
+            return True
+        return False