Bug 866241 - Fix some issues with repo manifest parsing. r=catlee
authorTed Mielczarek <ted@mielczarek.org>
Fri, 26 Apr 2013 15:30:19 -0400
changeset 141460 2b526c0d0b587d5cb34f5822b4eccc5219b159cf
parent 141459 680afe05ac35d59d43c522b7c87a82e58242490a
child 141461 e13bb42d287235c2e8d3ebb3f6d84d21918aa896
push id2579
push userakeybl@mozilla.com
push dateMon, 24 Jun 2013 18:52:47 +0000
treeherdermozilla-beta@b69b7de8a05a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerscatlee
bugs866241
milestone23.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 866241 - Fix some issues with repo manifest parsing. r=catlee
toolkit/crashreporter/tools/symbolstore.py
--- a/toolkit/crashreporter/tools/symbolstore.py
+++ b/toolkit/crashreporter/tools/symbolstore.py
@@ -218,38 +218,35 @@ class GitFileInfo(VCSFileInfo):
 
     def GetFilename(self):
         if self.revision and self.clean_root:
             return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision)
         return self.file
 
 # Utility functions
 
-# A cache of repo info for each srcdir.
-srcdirRepoInfo = {}
-
 # A cache of files for which VCS info has already been determined. Used to
 # prevent extra filesystem activity or process launching.
 vcsFileInfoCache = {}
 
 def IsInDir(file, dir):
     # the lower() is to handle win32+vc8, where
     # the source filenames come out all lowercase,
     # but the srcdir can be mixed case
     return os.path.abspath(file).lower().startswith(os.path.abspath(dir).lower())
 
 def GetVCSFilenameFromSrcdir(file, srcdir):
-    if srcdir not in srcdirRepoInfo:
+    if srcdir not in Dumper.srcdirRepoInfo:
         # Not in cache, so find it adnd cache it
         if os.path.isdir(os.path.join(srcdir, '.hg')):
-            srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir)
+            Dumper.srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir)
         else:
             # Unknown VCS or file is not in a repo.
             return None
-    return srcdirRepoInfo[srcdir].GetFileInfo(file)
+    return Dumper.srcdirRepoInfo[srcdir].GetFileInfo(file)
 
 def GetVCSFilename(file, srcdirs):
     """Given a full path to a file, and the top source directory,
     look for version control information about this file, and return
     a tuple containing
     1) a specially formatted filename that contains the VCS type,
     VCS location, relative filename, and revision number, formatted like:
     vcs:vcs location:filename:revision
@@ -301,21 +298,22 @@ def SourceIndex(fileStream, outputPath, 
     pdbStreamFile.write('''SRCSRV: ini ------------------------------------------------\r\nVERSION=2\r\nINDEXVERSION=2\r\nVERCTRL=http\r\nSRCSRV: variables ------------------------------------------\r\nHGSERVER=''')
     pdbStreamFile.write(vcs_root)
     pdbStreamFile.write('''\r\nSRCSRVVERCTRL=http\r\nHTTP_EXTRACT_TARGET=%hgserver%/raw-file/%var3%/%var2%\r\nSRCSRVTRG=%http_extract_target%\r\nSRCSRV: source files ---------------------------------------\r\n''')
     pdbStreamFile.write(fileStream) # can't do string interpolation because the source server also uses this and so there are % in the above
     pdbStreamFile.write("SRCSRV: end ------------------------------------------------\r\n\n")
     pdbStreamFile.close()
     return result
 
-def WorkerInitializer(cls, lock):
+def WorkerInitializer(cls, lock, srcdirRepoInfo):
     """Windows worker processes won't have run GlobalInit, and due to a lack of fork(),
-    won't inherit the class variables from the parent. The only one they need is the lock,
-    so we run an initializer to set it. Redundant but harmless on other platforms."""
+    won't inherit the class variables from the parent. They only need a few variables,
+    so we run an initializer to set them. Redundant but harmless on other platforms."""
     cls.lock = lock
+    cls.srcdirRepoInfo = srcdirRepoInfo
 
 def StartProcessFilesWork(dumper, files, arch_num, arch, vcs_root, after, after_arg):
     """multiprocessing can't handle methods as Process targets, so we define
     a simple wrapper function around the work method."""
     return dumper.ProcessFilesWork(files, arch_num, arch, vcs_root, after, after_arg)
 
 class Dumper:
     """This class can dump symbols from a file with debug info, and
@@ -377,17 +375,19 @@ class Dumper:
             # assume a dual core machine if we can't find out for some reason
             # probably better on single core anyway due to I/O constraints
             num_cpus = 2
 
         # have to create any locks etc before the pool
         cls.manager = module.Manager()
         cls.jobs_condition = Dumper.manager.Condition()
         cls.lock = Dumper.manager.RLock()
-        cls.pool = module.Pool(num_cpus, WorkerInitializer, (cls, cls.lock))
+        cls.srcdirRepoInfo = Dumper.manager.dict()
+        cls.pool = module.Pool(num_cpus, WorkerInitializer,
+                               (cls, cls.lock, cls.srcdirRepoInfo))
 
     def JobStarted(self, file_key):
         """Increments the number of submitted jobs for the specified key file,
         defined as the original file we processed; note that a single key file
         can generate up to 1 + len(self.archs) jobs in the Mac case."""
         with Dumper.jobs_condition:
             self.jobs_record[file_key] += 1
             Dumper.jobs_condition.notify_all()
@@ -419,17 +419,21 @@ class Dumper:
         """
         Parse an XML manifest of repository info as produced
         by the `repo manifest -r` command.
         """
         doc = parse(repo_manifest)
         if doc.firstChild.tagName != "manifest":
             return
         # First, get remotes.
-        remotes = dict([(r.getAttribute("name"), r.getAttribute("fetch")) for r in doc.getElementsByTagName("remote")])
+        def ensure_slash(u):
+            if not u.endswith("/"):
+                return u + "/"
+            return u
+        remotes = dict([(r.getAttribute("name"), ensure_slash(r.getAttribute("fetch"))) for r in doc.getElementsByTagName("remote")])
         # And default remote.
         default_remote = None
         if doc.getElementsByTagName("default"):
             default_remote = doc.getElementsByTagName("default")[0].getAttribute("remote")
         # Now get projects. Assume they're relative to repo_manifest.
         base_dir = os.path.abspath(os.path.dirname(repo_manifest))
         for proj in doc.getElementsByTagName("project"):
             # name is the repository URL relative to the remote path.
@@ -454,17 +458,17 @@ class Dumper:
             if remote.startswith("git:"):
                 remote = "http" + remote[3:]
             # Add this project to srcdirs.
             srcdir = os.path.join(base_dir, path)
             self.srcdirs.append(srcdir)
             # And cache its VCS file info. Currently all repos mentioned
             # in a repo manifest are assumed to be git.
             root = urlparse.urljoin(remote, name)
-            srcdirRepoInfo[srcdir] = GitRepoInfo(srcdir, rev, root)
+            Dumper.srcdirRepoInfo[srcdir] = GitRepoInfo(srcdir, rev, root)
 
     # subclasses override this
     def ShouldProcess(self, file):
         return not any(fnmatch.fnmatch(os.path.basename(file), exclude) for exclude in self.exclude)
 
     # and can override this
     def ShouldSkipDir(self, dir):
         return False