changegroup: fix treemanifests on merges
authorMartin von Zweigbergk <martinvonz@google.com>
Fri, 12 Feb 2016 23:09:09 -0800
changeset 30425 1ac8ce137377102012adeeddec661954c578cde6
parent 30424 7279e01323470eed3aacc991b7cd1c6cf7023907
child 30426 a4286175ecba60920dc29bbf4916b55d0daeeccf
push id196
push usergszorc@mozilla.com
push dateTue, 01 Mar 2016 22:10:24 +0000
changegroup: fix treemanifests on merges The current code for generating treemanifest revisions takes the list of files in the changeset and finds the directories from them. This does not work for merges, since a merge may pick file A from one side and file B from another and neither of them would appear in the changeset's "files" list, but the manifest would still change. Fix this by instead walking the root manifest log for all needed revisions, storing all needed file and subdirectory revisions, then recursively visiting the subdirectories. This also turns out to be faster: cloning a version of hg core converted to treemanifests went from ~28s to ~19s (timing somewhat unfair: before this patch, timed until crash; after this patch, timed until manifests complete). The new algorithm is used only on treemanifest repos. Although it works equally well on flat manifests, we leave the iteration over files in the changeset for flat manifests for now.
mercurial/changegroup.py
mercurial/manifest.py
tests/test-treemanifest.t
--- a/mercurial/changegroup.py
+++ b/mercurial/changegroup.py
@@ -548,37 +548,16 @@ class headerlessfixup(object):
     def read(self, n):
         if self._h:
             d, self._h = self._h[:n], self._h[n:]
             if len(d) < n:
                 d += readexactly(self._fh, n - len(d))
             return d
         return readexactly(self._fh, n)
 
-def _moddirs(files):
-    """Given a set of modified files, find the list of modified directories.
-
-    This returns a list of (path to changed dir, changed dir) tuples,
-    as that's what the one client needs anyway.
-
-    >>> _moddirs(['a/b/c.py', 'a/b/c.txt', 'a/d/e/f/g.txt', 'i.txt', ])
-    [('/', 'a/'), ('a/', 'b/'), ('a/', 'd/'), ('a/d/', 'e/'), ('a/d/e/', 'f/')]
-
-    """
-    alldirs = set()
-    for f in files:
-        path = f.split('/')[:-1]
-        for i in xrange(len(path) - 1, -1, -1):
-            dn = '/'.join(path[:i])
-            current = dn + '/', path[i] + '/'
-            if current in alldirs:
-                break
-            alldirs.add(current)
-    return sorted(alldirs)
-
 class cg1packer(object):
     deltaheader = _CHANGEGROUPV1_DELTA_HEADER
     version = '01'
     def __init__(self, repo, bundlecaps=None):
         """Given a source repo, construct a bundler.
 
         bundlecaps is optional and can be used to specify the set of
         capabilities which can be used to build the bundle.
@@ -750,91 +729,79 @@ class cg1packer(object):
         yield self.close()
 
         if clnodes:
             repo.hook('outgoing', node=hex(clnodes[0]), source=source)
 
     def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
                           mfchangedfiles, fnodes):
         repo = self._repo
-        ml = repo.manifest
+        dirlog = repo.manifest.dirlog
         tmfnodes = {'': mfs}
 
         # Callback for the manifest, used to collect linkrevs for filelog
         # revisions.
         # Returns the linkrev node (collected in lookupcl).
         def makelookupmflinknode(dir):
             if fastpathlinkrev:
                 assert not dir
                 return mfs.__getitem__
 
-            if dir:
-                return tmfnodes[dir].get
-
             def lookupmflinknode(x):
                 """Callback for looking up the linknode for manifests.
 
                 Returns the linkrev node for the specified manifest.
 
                 SIDE EFFECT:
 
                 1) fclnodes gets populated with the list of relevant
                    file nodes if we're not using fastpathlinkrev
                 2) When treemanifests are in use, collects treemanifest nodes
                    to send
 
                 Note that this means manifests must be completely sent to
                 the client before you can trust the list of files and
                 treemanifests to send.
                 """
-                clnode = mfs[x]
-                # We no longer actually care about reading deltas of
-                # the manifest here, because we already know the list
-                # of changed files, so for treemanifests (which
-                # lazily-load anyway to *generate* a readdelta) we can
-                # just load them with read() and then we'll actually
-                # be able to correctly load node IDs from the
-                # submanifest entries.
+                clnode = tmfnodes[dir][x]
+                mdata = dirlog(dir).readshallowfast(x)
                 if 'treemanifest' in repo.requirements:
-                    mdata = ml.read(x)
+                    for p, n, fl in mdata.iterentries():
+                        if fl == 't': # subdirectory manifest
+                            subdir = dir + p + '/'
+                            tmfclnodes = tmfnodes.setdefault(subdir, {})
+                            tmfclnode = tmfclnodes.setdefault(n, clnode)
+                            if clrevorder[clnode] < clrevorder[tmfclnode]:
+                                tmfclnodes[n] = clnode
+                        else:
+                            f = dir + p
+                            fclnodes = fnodes.setdefault(f, {})
+                            fclnode = fclnodes.setdefault(n, clnode)
+                            if clrevorder[clnode] < clrevorder[fclnode]:
+                                fclnodes[n] = clnode
                 else:
-                    mdata = ml.readfast(x)
-                for f in mfchangedfiles[x]:
-                    try:
-                        n = mdata[f]
-                    except KeyError:
-                        continue
-                    # record the first changeset introducing this filelog
-                    # version
-                    fclnodes = fnodes.setdefault(f, {})
-                    fclnode = fclnodes.setdefault(n, clnode)
-                    if clrevorder[clnode] < clrevorder[fclnode]:
-                        fclnodes[n] = clnode
-                # gather list of changed treemanifest nodes
-                if 'treemanifest' in repo.requirements:
-                    submfs = {'/': mdata}
-                    for dn, bn in _moddirs(mfchangedfiles[x]):
+                    for f in mfchangedfiles[x]:
                         try:
-                            submf = submfs[dn]
-                            submf = submf._dirs[bn]
+                            n = mdata[f]
                         except KeyError:
-                            continue # deleted directory, so nothing to send
-                        submfs[submf.dir()] = submf
-                        tmfclnodes = tmfnodes.setdefault(submf.dir(), {})
-                        tmfclnode = tmfclnodes.setdefault(submf._node, clnode)
-                        if clrevorder[clnode] < clrevorder[tmfclnode]:
-                            tmfclnodes[n] = clnode
+                            continue
+                        # record the first changeset introducing this filelog
+                        # version
+                        fclnodes = fnodes.setdefault(f, {})
+                        fclnode = fclnodes.setdefault(n, clnode)
+                        if clrevorder[clnode] < clrevorder[fclnode]:
+                            fclnodes[n] = clnode
                 return clnode
             return lookupmflinknode
 
         size = 0
         while tmfnodes:
             dir = min(tmfnodes)
             nodes = tmfnodes[dir]
-            prunednodes = self.prune(ml.dirlog(dir), nodes, commonrevs)
+            prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
             for x in self._packmanifests(dir, prunednodes,
                                          makelookupmflinknode(dir)):
                 size += len(x)
                 yield x
             del tmfnodes[dir]
         self._verbosenote(_('%8.i (manifests)\n') % size)
         yield self._manifestsdone()
 
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -980,16 +980,25 @@ class manifest(revlog.revlog):
         to retrieve the data.
         '''
         r = self.rev(node)
         deltaparent = self.deltaparent(r)
         if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
             return self.readdelta(node)
         return self.read(node)
 
+    def readshallowfast(self, node):
+        '''like readfast(), but calls readshallowdelta() instead of readdelta()
+        '''
+        r = self.rev(node)
+        deltaparent = self.deltaparent(r)
+        if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
+            return self.readshallowdelta(node)
+        return self.readshallow(node)
+
     def read(self, node):
         if node == revlog.nullid:
             return self._newmanifest() # don't upset local cache
         if node in self._mancache:
             return self._mancache[node][0]
         if self._treeondisk:
             def gettext():
                 return self.revision(node)
@@ -1001,16 +1010,23 @@ class manifest(revlog.revlog):
             arraytext = None
         else:
             text = self.revision(node)
             m = self._newmanifest(text)
             arraytext = array.array('c', text)
         self._mancache[node] = (m, arraytext)
         return m
 
+    def readshallow(self, node):
+        '''Reads the manifest in this directory. When using flat manifests,
+        this manifest will generally have files in subdirectories in it. Does
+        not cache the manifest as the callers generally do not read the same
+        version twice.'''
+        return manifestdict(self.revision(node))
+
     def find(self, node, f):
         '''look up entry for a single file efficiently.
         return (node, flags) pair if found, (None, None) if not.'''
         m = self.read(node)
         try:
             return m.find(f)
         except KeyError:
             return None, None
--- a/tests/test-treemanifest.t
+++ b/tests/test-treemanifest.t
@@ -358,16 +358,23 @@ Pushing to an empty repo works
   pushing to clone
   searching for changes
   adding changesets
   adding manifests
   adding file changes
   added 11 changesets with 15 changes to 10 files (+3 heads)
   $ grep treemanifest clone/.hg/requires
   treemanifest
+  $ hg -R clone verify
+  checking changesets
+  checking manifests
+  checking directory manifests
+  crosschecking files in changesets and manifests
+  checking files
+  10 files, 11 changesets, 15 total revisions
 
 Create deeper repo with tree manifests.
 
   $ hg --config experimental.treemanifest=True init deeprepo
   $ cd deeprepo
 
   $ mkdir .A
   $ mkdir b