Bug 1305877 - Make HashChunker stable; r?jgraham draft
authorGregory Szorc <gps@mozilla.com>
Tue, 27 Sep 2016 16:31:26 -0700
changeset 419185 5857432b28a0cb6d6eb1cdfafe0a9ed377b37e08
parent 419033 9baec74b3db1bf005c66ae2f50bafbdb02c3be38
child 419186 a5858576decc3b2f7e122c3d3c428af4cfb59e3b
push id30877
push userbmo:gps@mozilla.com
push dateThu, 29 Sep 2016 21:44:46 +0000
reviewersjgraham
bugs1305877
milestone52.0a1
Bug 1305877 - Make HashChunker stable; r?jgraham The built-in hash() function uses the backing memory address for hashing. This is essentially random. Switch to md5 so input is consistently hashed across processes. MozReview-Commit-ID: D52uzttE5hc
testing/web-platform/harness/wptrunner/testloader.py
--- a/testing/web-platform/harness/wptrunner/testloader.py
+++ b/testing/web-platform/harness/wptrunner/testloader.py
@@ -1,11 +1,11 @@
+import hashlib
 import json
 import os
-import sys
 import urlparse
 from abc import ABCMeta, abstractmethod
 from Queue import Empty
 from collections import defaultdict, OrderedDict, deque
 from multiprocessing import Queue
 
 import manifestinclude
 import manifestexpected
@@ -38,22 +38,24 @@ class Unchunked(TestChunker):
         assert self.total_chunks == 1
 
     def __call__(self, manifest):
         for item in manifest:
             yield item
 
 
 class HashChunker(TestChunker):
-    def __call__(self):
+    def __call__(self, manifest):
         chunk_index = self.chunk_number - 1
         for test_path, tests in manifest:
-            if hash(test_path) % self.total_chunks == chunk_index:
+            h = int(hashlib.md5(test_path).hexdigest(), 16)
+            if h % self.total_chunks == chunk_index:
                 yield test_path, tests
 
+
 class EqualTimeChunker(TestChunker):
     def _group_by_directory(self, manifest_items):
         """Split the list of manifest items into a ordered dict that groups tests in
         so that anything in the same subdirectory beyond a depth of 3 is in the same
         group. So all tests in a/b/c, a/b/c/d and a/b/c/e will be grouped together
         and separate to tests in a/b/f
 
         Returns: tuple (ordered dict of {test_dir: PathData}, total estimated runtime)