Bug 866241 - Fix some issues with repo manifest parsing. r=catlee
authorTed Mielczarek <ted@mielczarek.org>
Fri, 26 Apr 2013 15:30:19 -0400
changeset 141460 2b526c0d0b587d5cb34f5822b4eccc5219b159cf
parent 141459 680afe05ac35d59d43c522b7c87a82e58242490a
child 141461 e13bb42d287235c2e8d3ebb3f6d84d21918aa896
push id2579
push userakeybl@mozilla.com
push dateMon, 24 Jun 2013 18:52:47 +0000
--- a/toolkit/crashreporter/tools/symbolstore.py
+++ b/toolkit/crashreporter/tools/symbolstore.py
@@ -218,38 +218,35 @@ class GitFileInfo(VCSFileInfo):
     def GetFilename(self):
         if self.revision and self.clean_root:
             return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision)
         return self.file
 # Utility functions
-# A cache of repo info for each srcdir.
-srcdirRepoInfo = {}
 # A cache of files for which VCS info has already been determined. Used to
 # prevent extra filesystem activity or process launching.
 vcsFileInfoCache = {}
 def IsInDir(file, dir):
     # the lower() is to handle win32+vc8, where
     # the source filenames come out all lowercase,
     # but the srcdir can be mixed case
     return os.path.abspath(file).lower().startswith(os.path.abspath(dir).lower())
 def GetVCSFilenameFromSrcdir(file, srcdir):
-    if srcdir not in srcdirRepoInfo:
+    if srcdir not in Dumper.srcdirRepoInfo:
         # Not in cache, so find it adnd cache it
         if os.path.isdir(os.path.join(srcdir, '.hg')):
-            srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir)
+            Dumper.srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir)
             # Unknown VCS or file is not in a repo.
             return None
-    return srcdirRepoInfo[srcdir].GetFileInfo(file)
+    return Dumper.srcdirRepoInfo[srcdir].GetFileInfo(file)
 def GetVCSFilename(file, srcdirs):
     """Given a full path to a file, and the top source directory,
     look for version control information about this file, and return
     a tuple containing
     1) a specially formatted filename that contains the VCS type,
     VCS location, relative filename, and revision number, formatted like:
     vcs:vcs location:filename:revision
@@ -301,21 +298,22 @@ def SourceIndex(fileStream, outputPath, 
     pdbStreamFile.write('''SRCSRV: ini ------------------------------------------------\r\nVERSION=2\r\nINDEXVERSION=2\r\nVERCTRL=http\r\nSRCSRV: variables ------------------------------------------\r\nHGSERVER=''')
     pdbStreamFile.write('''\r\nSRCSRVVERCTRL=http\r\nHTTP_EXTRACT_TARGET=%hgserver%/raw-file/%var3%/%var2%\r\nSRCSRVTRG=%http_extract_target%\r\nSRCSRV: source files ---------------------------------------\r\n''')
     pdbStreamFile.write(fileStream) # can't do string interpolation because the source server also uses this and so there are % in the above
     pdbStreamFile.write("SRCSRV: end ------------------------------------------------\r\n\n")
     return result
-def WorkerInitializer(cls, lock):
+def WorkerInitializer(cls, lock, srcdirRepoInfo):
     """Windows worker processes won't have run GlobalInit, and due to a lack of fork(),
-    won't inherit the class variables from the parent. The only one they need is the lock,
-    so we run an initializer to set it. Redundant but harmless on other platforms."""
+    won't inherit the class variables from the parent. They only need a few variables,
+    so we run an initializer to set them. Redundant but harmless on other platforms."""
     cls.lock = lock
+    cls.srcdirRepoInfo = srcdirRepoInfo
 def StartProcessFilesWork(dumper, files, arch_num, arch, vcs_root, after, after_arg):
     """multiprocessing can't handle methods as Process targets, so we define
     a simple wrapper function around the work method."""
     return dumper.ProcessFilesWork(files, arch_num, arch, vcs_root, after, after_arg)
 class Dumper:
     """This class can dump symbols from a file with debug info, and
@@ -377,17 +375,19 @@ class Dumper:
             # assume a dual core machine if we can't find out for some reason
             # probably better on single core anyway due to I/O constraints
             num_cpus = 2
         # have to create any locks etc before the pool
         cls.manager = module.Manager()
         cls.jobs_condition = Dumper.manager.Condition()
         cls.lock = Dumper.manager.RLock()
-        cls.pool = module.Pool(num_cpus, WorkerInitializer, (cls, cls.lock))
+        cls.srcdirRepoInfo = Dumper.manager.dict()
+        cls.pool = module.Pool(num_cpus, WorkerInitializer,
+                               (cls, cls.lock, cls.srcdirRepoInfo))
     def JobStarted(self, file_key):
         """Increments the number of submitted jobs for the specified key file,
         defined as the original file we processed; note that a single key file
         can generate up to 1 + len(self.archs) jobs in the Mac case."""
         with Dumper.jobs_condition:
             self.jobs_record[file_key] += 1
@@ -419,17 +419,21 @@ class Dumper:
         Parse an XML manifest of repository info as produced
         by the `repo manifest -r` command.
         doc = parse(repo_manifest)
         if doc.firstChild.tagName != "manifest":
         # First, get remotes.
-        remotes = dict([(r.getAttribute("name"), r.getAttribute("fetch")) for r in doc.getElementsByTagName("remote")])
+        def ensure_slash(u):
+            if not u.endswith("/"):
+                return u + "/"
+            return u
+        remotes = dict([(r.getAttribute("name"), ensure_slash(r.getAttribute("fetch"))) for r in doc.getElementsByTagName("remote")])
         # And default remote.
         default_remote = None
         if doc.getElementsByTagName("default"):
             default_remote = doc.getElementsByTagName("default")[0].getAttribute("remote")
         # Now get projects. Assume they're relative to repo_manifest.
         base_dir = os.path.abspath(os.path.dirname(repo_manifest))
         for proj in doc.getElementsByTagName("project"):
             # name is the repository URL relative to the remote path.
@@ -454,17 +458,17 @@ class Dumper:
             if remote.startswith("git:"):
                 remote = "http" + remote[3:]
             # Add this project to srcdirs.
             srcdir = os.path.join(base_dir, path)
             # And cache its VCS file info. Currently all repos mentioned
             # in a repo manifest are assumed to be git.
             root = urlparse.urljoin(remote, name)
-            srcdirRepoInfo[srcdir] = GitRepoInfo(srcdir, rev, root)
+            Dumper.srcdirRepoInfo[srcdir] = GitRepoInfo(srcdir, rev, root)
     # subclasses override this
     def ShouldProcess(self, file):
         return not any(fnmatch.fnmatch(os.path.basename(file), exclude) for exclude in self.exclude)
     # and can override this
     def ShouldSkipDir(self, dir):
         return False