treemanifest: store submanifest revlog per directory
authorMartin von Zweigbergk <martinvonz@google.com>
Mon, 13 Apr 2015 23:21:02 -0700
changeset 25153 b5052fc73300faf5c6f2146dee68d8d4f82a0f6c
parent 25152 252412e24551bdabc55fd0287a781b7adca0da9b
child 25154 f41539418b413b5aec512aa3fcf48996a39e81ac
push id13
push usergszorc@mozilla.com
push dateTue, 26 May 2015 00:39:25 +0000
treemanifest: store submanifest revlog per directory With this change, when tree manifests are enabled (in .hg/requires), commits will be written with one manifest revlog per directory. The manifest revlogs are stored in .hg/store/meta/$dir/00manifest.[id]. Flat manifests can still be read and interacted with as usual (they are also read into treemanifest instances). The functionality for writing treemanifest as a flat manifest to disk is still left in the code; tests still pass with '_treeinmem=True' hardcoded. Exchange is not yet implemented.
mercurial/manifest.py
mercurial/store.py
tests/test-treemanifest.t
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -439,37 +439,56 @@ def _splittopdir(f):
         dir, subpath = f.split('/', 1)
         return dir + '/', subpath
     else:
         return '', f
 
 class treemanifest(object):
     def __init__(self, dir='', text=''):
         self._dir = dir
+        self._node = revlog.nullid
         self._dirs = {}
         # Using _lazymanifest here is a little slower than plain old dicts
         self._files = {}
         self._flags = {}
-        self.parse(text)
+        def readsubtree(subdir, subm):
+            raise AssertionError('treemanifest constructor only accepts '
+                                 'flat manifests')
+        self.parse(text, readsubtree)
 
     def _subpath(self, path):
         return self._dir + path
 
     def __len__(self):
         size = len(self._files)
         for m in self._dirs.values():
             size += m.__len__()
         return size
 
     def _isempty(self):
         return (not self._files and (not self._dirs or
                 util.all(m._isempty() for m in self._dirs.values())))
 
     def __str__(self):
-        return '<treemanifest dir=%s>' % self._dir
+        return ('<treemanifest dir=%s, node=%s>' %
+                (self._dir, revlog.hex(self._node)))
+
+    def dir(self):
+        '''The directory that this tree manifest represents, including a
+        trailing '/'. Empty string for the repo root directory.'''
+        return self._dir
+
+    def node(self):
+        '''This node of this instance. nullid for unsaved instances. Should
+        be updated when the instance is read or written from a revlog.
+        '''
+        return self._node
+
+    def setnode(self, node):
+        self._node = node
 
     def iteritems(self):
         for p, n in sorted(self._dirs.items() + self._files.items()):
             if p in self._files:
                 yield self._subpath(p), n
             else:
                 for f, sn in n.iteritems():
                     yield f, sn
@@ -552,26 +571,28 @@ class treemanifest(object):
             if dir not in self._dirs:
                 self._dirs[dir] = treemanifest(self._subpath(dir))
             self._dirs[dir].__setitem__(subpath, n)
         else:
             self._files[f] = n[:21] # to match manifestdict's behavior
 
     def setflag(self, f, flags):
         """Set the flags (symlink, executable) for path f."""
+        assert 'd' not in flags
         dir, subpath = _splittopdir(f)
         if dir:
             if dir not in self._dirs:
                 self._dirs[dir] = treemanifest(self._subpath(dir))
             self._dirs[dir].setflag(subpath, flags)
         else:
             self._flags[f] = flags
 
     def copy(self):
         copy = treemanifest(self._dir)
+        copy._node = self._node
         for d in self._dirs:
             copy._dirs[d] = self._dirs[d].copy()
         copy._files = dict.copy(self._files)
         copy._flags = dict.copy(self._flags)
         return copy
 
     def filesnotin(self, m2):
         '''Set of files in this manifest that are not in the other'''
@@ -732,50 +753,80 @@ class treemanifest(object):
             for fn, n2 in t2._files.iteritems():
                 if fn not in t1._files:
                     fl2 = t2._flags.get(fn, '')
                     result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
 
         _diff(self, m2)
         return result
 
-    def parse(self, text):
+    def parse(self, text, readsubtree):
         for f, n, fl in _parse(text):
-            self[f] = n
-            if fl:
-                self.setflag(f, fl)
+            if fl == 'd':
+                f = f + '/'
+                self._dirs[f] = readsubtree(self._subpath(f), n)
+            else:
+                # Use __setitem__ and setflag rather than assigning directly
+                # to _files and _flags, thereby letting us parse flat manifests
+                # as well as tree manifests.
+                self[f] = n
+                if fl:
+                    self.setflag(f, fl)
 
     def text(self, usemanifestv2=False):
         """Get the full data of this manifest as a bytestring."""
         flags = self.flags
         return _text(((f, self[f], flags(f)) for f in self.keys()),
                      usemanifestv2)
 
+    def dirtext(self, usemanifestv2=False):
+        """Get the full data of this directory as a bytestring. Make sure that
+        any submanifests have been written first, so their nodeids are correct.
+        """
+        flags = self.flags
+        dirs = [(d[:-1], self._dirs[d]._node, 'd') for d in self._dirs]
+        files = [(f, self._files[f], flags(f)) for f in self._files]
+        return _text(sorted(dirs + files), usemanifestv2)
+
+    def writesubtrees(self, m1, m2, writesubtree):
+        emptytree = treemanifest()
+        for d, subm in self._dirs.iteritems():
+            subp1 = m1._dirs.get(d, emptytree)._node
+            subp2 = m2._dirs.get(d, emptytree)._node
+            if subp1 == revlog.nullid:
+                subp1, subp2 = subp2, subp1
+            writesubtree(subm, subp1, subp2)
+
 class manifest(revlog.revlog):
-    def __init__(self, opener):
+    def __init__(self, opener, dir=''):
         # During normal operations, we expect to deal with not more than four
         # revs at a time (such as during commit --amend). When rebasing large
         # stacks of commits, the number can go up, hence the config knob below.
         cachesize = 4
         usetreemanifest = False
         usemanifestv2 = False
         opts = getattr(opener, 'options', None)
         if opts is not None:
             cachesize = opts.get('manifestcachesize', cachesize)
             usetreemanifest = opts.get('treemanifest', usetreemanifest)
             usemanifestv2 = opts.get('manifestv2', usemanifestv2)
         self._mancache = util.lrucachedict(cachesize)
-        revlog.revlog.__init__(self, opener, "00manifest.i")
         self._treeinmem = usetreemanifest
         self._treeondisk = usetreemanifest
         self._usemanifestv2 = usemanifestv2
+        indexfile = "00manifest.i"
+        if dir:
+            assert self._treeondisk
+            indexfile = "meta/" + dir + "00manifest.i"
+        revlog.revlog.__init__(self, opener, indexfile)
+        self._dir = dir
 
     def _newmanifest(self, data=''):
         if self._treeinmem:
-            return treemanifest('', data)
+            return treemanifest(self._dir, data)
         return manifestdict(data)
 
     def _slowreaddelta(self, node):
         r0 = self.deltaparent(self.rev(node))
         m0 = self.read(self.node(r0))
         m1 = self.read(node)
         md = self._newmanifest()
         for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
@@ -807,18 +858,27 @@ class manifest(revlog.revlog):
         return self.read(node)
 
     def read(self, node):
         if node == revlog.nullid:
             return self._newmanifest() # don't upset local cache
         if node in self._mancache:
             return self._mancache[node][0]
         text = self.revision(node)
-        arraytext = array.array('c', text)
-        m = self._newmanifest(text)
+        if self._treeondisk:
+            def readsubtree(dir, subm):
+                sublog = manifest(self.opener, dir)
+                return sublog.read(subm)
+            m = self._newmanifest()
+            m.parse(text, readsubtree)
+            m.setnode(node)
+            arraytext = None
+        else:
+            m = self._newmanifest(text)
+            arraytext = array.array('c', text)
         self._mancache[node] = (m, arraytext)
         return m
 
     def find(self, node, f):
         '''look up entry for a single file efficiently.
         return (node, flags) pair if found, (None, None) if not.'''
         m = self.read(node)
         try:
@@ -846,15 +906,39 @@ class manifest(revlog.revlog):
             cachedelta = self.rev(p1), deltatext
             text = util.buffer(arraytext)
             n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
         else:
             # The first parent manifest isn't already loaded, so we'll
             # just encode a fulltext of the manifest and pass that
             # through to the revlog layer, and let it handle the delta
             # process.
-            text = m.text(self._usemanifestv2)
-            arraytext = array.array('c', text)
-            n = self.addrevision(text, transaction, link, p1, p2)
+            if self._treeondisk:
+                m1 = self.read(p1)
+                m2 = self.read(p2)
+                n = self._addtree(m, transaction, link, m1, m2)
+                arraytext = None
+            else:
+                text = m.text(self._usemanifestv2)
+                n = self.addrevision(text, transaction, link, p1, p2)
+                arraytext = array.array('c', text)
 
         self._mancache[n] = (m, arraytext)
 
         return n
+
+    def _addtree(self, m, transaction, link, m1, m2):
+        def writesubtree(subm, subp1, subp2):
+            sublog = manifest(self.opener, subm.dir())
+            sublog.add(subm, transaction, link, subp1, subp2, None, None)
+        m.writesubtrees(m1, m2, writesubtree)
+        text = m.dirtext(self._usemanifestv2)
+        # If the manifest is unchanged compared to one parent,
+        # don't write a new revision
+        if text == m1.dirtext(self._usemanifestv2):
+            n = m1.node()
+        elif text == m2.dirtext(self._usemanifestv2):
+            n = m2.node()
+        else:
+            n = self.addrevision(text, transaction, link, m1.node(), m2.node())
+        # Save nodeid so parent manifest can calculate its nodeid
+        m.setnode(n)
+        return n
--- a/mercurial/store.py
+++ b/mercurial/store.py
@@ -182,17 +182,17 @@ def _auxencode(path, dotencode):
     return path
 
 _maxstorepathlen = 120
 _dirprefixlen = 8
 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
 
 def _hashencode(path, dotencode):
     digest = _sha(path).hexdigest()
-    le = lowerencode(path[5:]).split('/') # skips prefix 'data/'
+    le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
     parts = _auxencode(le, dotencode)
     basename = parts[-1]
     _root, ext = os.path.splitext(basename)
     sdirs = []
     sdirslen = 0
     for p in parts[:-1]:
         d = p[:_dirprefixlen]
         if d[-1] in '. ':
new file mode 100644
--- /dev/null
+++ b/tests/test-treemanifest.t
@@ -0,0 +1,278 @@
+
+Set up repo
+
+  $ hg --config experimental.treemanifest=True init repo
+  $ cd repo
+
+Requirements get set on init
+
+  $ grep treemanifest .hg/requires
+  treemanifest
+
+Without directories, looks like any other repo
+
+  $ echo 0 > a
+  $ echo 0 > b
+  $ hg ci -Aqm initial
+  $ hg debugdata -m 0
+  a\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
+  b\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
+
+Submanifest is stored in separate revlog
+
+  $ mkdir dir1
+  $ echo 1 > dir1/a
+  $ echo 1 > dir1/b
+  $ echo 1 > e
+  $ hg ci -Aqm 'add dir1'
+  $ hg debugdata -m 1
+  a\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
+  b\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
+  dir1\x008b3ffd73f901e83304c83d33132c8e774ceac44ed (esc)
+  e\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc)
+  $ hg debugdata .hg/store/meta/dir1/00manifest.i 0
+  a\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc)
+  b\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc)
+
+Can add nested directories
+
+  $ mkdir dir1/dir1
+  $ echo 2 > dir1/dir1/a
+  $ echo 2 > dir1/dir1/b
+  $ mkdir dir1/dir2
+  $ echo 2 > dir1/dir2/a
+  $ echo 2 > dir1/dir2/b
+  $ hg ci -Aqm 'add dir1/dir1'
+  $ hg files -r .
+  a
+  b
+  dir1/a
+  dir1/b
+  dir1/dir1/a
+  dir1/dir1/b
+  dir1/dir2/a
+  dir1/dir2/b
+  e
+
+Revision is not created for unchanged directory
+
+  $ mkdir dir2
+  $ echo 3 > dir2/a
+  $ hg add dir2
+  adding dir2/a
+  $ hg debugindex .hg/store/meta/dir1/00manifest.i > before
+  $ hg ci -qm 'add dir2'
+  $ hg debugindex .hg/store/meta/dir1/00manifest.i > after
+  $ diff before after
+  $ rm before after
+
+Removing directory does not create an revlog entry
+
+  $ hg rm dir1/dir1
+  removing dir1/dir1/a
+  removing dir1/dir1/b
+  $ hg debugindex .hg/store/meta/dir1/dir1/00manifest.i > before
+  $ hg ci -qm 'remove dir1/dir1'
+  $ hg debugindex .hg/store/meta/dir1/dir1/00manifest.i > after
+  $ diff before after
+  $ rm before after
+
+Check that hg files (calls treemanifest.walk()) works
+
+  $ hg co 'desc("add dir2")'
+  2 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ hg files -r . dir1
+  dir1/a
+  dir1/b
+  dir1/dir1/a
+  dir1/dir1/b
+  dir1/dir2/a
+  dir1/dir2/b
+
+Check that status between revisions works (calls treemanifest.matches())
+
+  $ hg status --rev 'desc("add dir1")' --rev . dir1
+  A dir1/dir1/a
+  A dir1/dir1/b
+  A dir1/dir2/a
+  A dir1/dir2/b
+
+Merge creates 2-parent revision of directory revlog
+
+  $ echo 5 > dir1/a
+  $ hg ci -Aqm 'modify dir1/a'
+  $ hg co '.^'
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ echo 6 > dir1/b
+  $ hg ci -Aqm 'modify dir1/b'
+  $ hg merge 'desc("modify dir1/a")'
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  (branch merge, don't forget to commit)
+  $ hg ci -m 'conflict-free merge involving dir1/'
+  $ cat dir1/a
+  5
+  $ cat dir1/b
+  6
+  $ hg debugindex .hg/store/meta/dir1/00manifest.i
+     rev    offset  length   base linkrev nodeid       p1           p2
+       0         0      54      0       1 8b3ffd73f901 000000000000 000000000000
+       1        54      68      0       2 b66d046c644f 8b3ffd73f901 000000000000
+       2       122      12      0       4 b87265673c8a b66d046c644f 000000000000
+       3       134      95      0       5 aa5d3adcec72 b66d046c644f 000000000000
+       4       229      81      0       6 e29b066b91ad b66d046c644f 000000000000
+       5       310     107      5       7 a120ce2b83f5 e29b066b91ad aa5d3adcec72
+
+Merge keeping directory from parent 1 does not create revlog entry. (Note that
+dir1's manifest does change, but only because dir1/a's filelog changes.)
+
+  $ hg co 'desc("add dir2")'
+  2 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ echo 8 > dir2/a
+  $ hg ci -m 'modify dir2/a'
+  created new head
+
+  $ hg debugindex .hg/store/meta/dir2/00manifest.i > before
+  $ hg merge 'desc("modify dir1/a")'
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  (branch merge, don't forget to commit)
+  $ hg revert -r 'desc("modify dir2/a")' .
+  reverting dir1/a (glob)
+  $ hg ci -m 'merge, keeping parent 1'
+  $ hg debugindex .hg/store/meta/dir2/00manifest.i > after
+  $ diff before after
+  $ rm before after
+
+Merge keeping directory from parent 2 does not create revlog entry. (Note that
+dir2's manifest does change, but only because dir2/a's filelog changes.)
+
+  $ hg co 'desc("modify dir2/a")'
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ hg debugindex .hg/store/meta/dir1/00manifest.i > before
+  $ hg merge 'desc("modify dir1/a")'
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  (branch merge, don't forget to commit)
+  $ hg revert -r 'desc("modify dir1/a")' .
+  reverting dir2/a (glob)
+  $ hg ci -m 'merge, keeping parent 2'
+  created new head
+  $ hg debugindex .hg/store/meta/dir1/00manifest.i > after
+  $ diff before after
+  $ rm before after
+
+Create flat source repo for tests with mixed flat/tree manifests
+
+  $ cd ..
+  $ hg init repo-flat
+  $ cd repo-flat
+
+Create a few commits with flat manifest
+
+  $ echo 0 > a
+  $ echo 0 > b
+  $ echo 0 > e
+  $ for d in dir1 dir1/dir1 dir1/dir2 dir2
+  > do
+  >   mkdir $d
+  >   echo 0 > $d/a
+  >   echo 0 > $d/b
+  > done
+  $ hg ci -Aqm initial
+
+  $ echo 1 > a
+  $ echo 1 > dir1/a
+  $ echo 1 > dir1/dir1/a
+  $ hg ci -Aqm 'modify on branch 1'
+
+  $ hg co 0
+  3 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ echo 2 > b
+  $ echo 2 > dir1/b
+  $ echo 2 > dir1/dir1/b
+  $ hg ci -Aqm 'modify on branch 2'
+
+  $ hg merge 1
+  3 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  (branch merge, don't forget to commit)
+  $ hg ci -m 'merge of flat manifests to new flat manifest'
+
+Create clone with tree manifests enabled
+
+  $ cd ..
+  $ hg clone --pull --config experimental.treemanifest=1 repo-flat repo-mixed
+  requesting all changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 4 changesets with 17 changes to 11 files
+  updating to branch default
+  11 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ cd repo-mixed
+  $ test -f .hg/store/meta
+  [1]
+  $ grep treemanifest .hg/requires
+  treemanifest
+
+Commit should store revlog per directory
+
+  $ hg co 1
+  3 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ echo 3 > a
+  $ echo 3 > dir1/a
+  $ echo 3 > dir1/dir1/a
+  $ hg ci -m 'first tree'
+  created new head
+  $ find .hg/store/meta | sort
+  .hg/store/meta
+  .hg/store/meta/dir1
+  .hg/store/meta/dir1/00manifest.i
+  .hg/store/meta/dir1/dir1
+  .hg/store/meta/dir1/dir1/00manifest.i
+  .hg/store/meta/dir1/dir2
+  .hg/store/meta/dir1/dir2/00manifest.i
+  .hg/store/meta/dir2
+  .hg/store/meta/dir2/00manifest.i
+
+Merge of two trees
+
+  $ hg co 2
+  6 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ hg merge 1
+  3 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  (branch merge, don't forget to commit)
+  $ hg ci -m 'merge of flat manifests to new tree manifest'
+  created new head
+  $ hg diff -r 3
+
+Parent of tree root manifest should be flat manifest, and two for merge
+
+  $ hg debugindex -m
+     rev    offset  length   base linkrev nodeid       p1           p2
+       0         0      80      0       0 40536115ed9e 000000000000 000000000000
+       1        80      83      0       1 f3376063c255 40536115ed9e 000000000000
+       2       163     103      0       2 5d9b9da231a2 40536115ed9e 000000000000
+       3       266      83      0       3 d17d663cbd8a 5d9b9da231a2 f3376063c255
+       4       349     132      4       4 c05a51345f86 f3376063c255 000000000000
+       5       481     110      4       5 82594b1f557d 5d9b9da231a2 f3376063c255
+
+
+Status across flat/tree boundary should work
+
+  $ hg status --rev '.^' --rev .
+  M a
+  M dir1/a
+  M dir1/dir1/a
+
+
+Turning off treemanifest config has no effect
+
+  $ hg debugindex .hg/store/meta/dir1/00manifest.i
+     rev    offset  length   base linkrev nodeid       p1           p2
+       0         0     125      0       4 63c9c0557d24 000000000000 000000000000
+       1       125     109      0       5 23d12a1f6e0e 000000000000 000000000000
+  $ echo 2 > dir1/a
+  $ hg --config experimental.treemanifest=False ci -qm 'modify dir1/a'
+  $ hg debugindex .hg/store/meta/dir1/00manifest.i
+     rev    offset  length   base linkrev nodeid       p1           p2
+       0         0     125      0       4 63c9c0557d24 000000000000 000000000000
+       1       125     109      0       5 23d12a1f6e0e 000000000000 000000000000
+       2       234      55      0       6 3cb2d87b4250 23d12a1f6e0e 000000000000