wireprotov2: unify file revision collection and linknode derivation
authorGregory Szorc <gregory.szorc@gmail.com>
Mon, 10 Dec 2018 18:55:08 +0000
changeset 53696 08cfa77d7288
parent 53695 3ed77780f4a6
child 53697 008f3491dc53
push id1081
push usergszorc@mozilla.com
push dateMon, 10 Dec 2018 21:46:46 +0000
wireprotov2: unify file revision collection and linknode derivation The old mechanism for choosing which file revisions to send in the haveparents=True case was buggy in multiple ways - the most severe of which being that file revisions were excluded when they shouldn't have been. This commit unifies the logic for deriving the filenodes that will be sent by the "filesdata" command. We now consistently read files data from manifests. The "haveparents" argument now controls whether we iterate ctx.files() or use the full manifest to derive relevant files. The logic here is still woefully lacking to fully support shallow clones. It will require an API break to fully address. This commit should at least make the server APIs emit proper data, which is strictly better than before. Differential Revision: https://phab.mercurial-scm.org/D5406
mercurial/help/internals/wireprotocolv2.txt
mercurial/wireprotov2server.py
tests/test-wireproto-command-filesdata.t
tests/test-wireproto-exchangev2.t
--- a/mercurial/help/internals/wireprotocolv2.txt
+++ b/mercurial/help/internals/wireprotocolv2.txt
@@ -421,18 +421,20 @@ revisions introduced by the set of chang
 requested. In other words, the command may assume that all file revisions
 for all relevant paths for ancestors of the requested changeset revisions
 are present on the receiver.
 
 When ``haveparents`` is false, the command MUST assume that the receiver
 has no file revisions data. This means that all referenced file revisions
 in the queried set of changeset revisions will be sent.
 
-TODO we'll probably want a more complicated mechanism for the client to
-specify which ancestor revisions are known.
+TODO we want a more complicated mechanism for the client to specify which
+ancestor revisions are known. This is needed so intelligent deltas can be
+emitted and so updated linknodes can be sent if the client needs to adjust
+its linknodes for existing file nodes to older changeset revisions.
 TODO we may want to make linknodes an array so multiple changesets can be
 marked as introducing a file revision, since this can occur with e.g. hidden
 changesets.
 
 heads
 -----
 
 Obtain DAG heads in the repository.
--- a/mercurial/wireprotov2server.py
+++ b/mercurial/wireprotov2server.py
@@ -1151,58 +1151,48 @@ def filesdatacapabilities(repo, proto):
     extracapabilitiesfn=filesdatacapabilities)
 def filesdata(repo, proto, haveparents, fields, pathfilter, revisions):
     # TODO This should operate on a repo that exposes obsolete changesets. There
     # is a race between a client making a push that obsoletes a changeset and
     # another client fetching files data for that changeset. If a client has a
     # changeset, it should probably be allowed to access files data for that
     # changeset.
 
-    cl = repo.changelog
-    clnode = cl.node
     outgoing = resolvenodes(repo, revisions)
     filematcher = makefilematcher(repo, pathfilter)
 
-    # Figure out what needs to be emitted.
-    changedpaths = set()
     # path -> {fnode: linknode}
     fnodes = collections.defaultdict(dict)
 
+    # We collect the set of relevant file revisions by iterating the changeset
+    # revisions and either walking the set of files recorded in the changeset
+    # or by walking the manifest at that revision. There is probably room for a
+    # storage-level API to request this data, as it can be expensive to compute
+    # and would benefit from caching or alternate storage from what revlogs
+    # provide.
     for node in outgoing:
         ctx = repo[node]
-        changedpaths.update(ctx.files())
-
-    changedpaths = sorted(p for p in changedpaths if filematcher(p))
+        mctx = ctx.manifestctx()
+        md = mctx.read()
 
-    # If ancestors are known, we send file revisions having a linkrev in the
-    # outgoing set of changeset revisions.
-    if haveparents:
-        outgoingclrevs = set(cl.rev(n) for n in outgoing)
-
-        for path in changedpaths:
-            try:
-                store = getfilestore(repo, proto, path)
-            except FileAccessError as e:
-                raise error.WireprotoCommandError(e.msg, e.args)
+        if haveparents:
+            checkpaths = ctx.files()
+        else:
+            checkpaths = md.keys()
 
-            for rev in store:
-                linkrev = store.linkrev(rev)
-
-                if linkrev in outgoingclrevs:
-                    fnodes[path].setdefault(store.node(rev), clnode(linkrev))
+        for path in checkpaths:
+            fnode = md[path]
 
-    # If ancestors aren't known, we walk the manifests and send all
-    # encountered file revisions.
-    else:
-        for node in outgoing:
-            mctx = repo[node].manifestctx()
+            if path in fnodes and fnode in fnodes[path]:
+                continue
 
-            for path, fnode in mctx.read().items():
-                if filematcher(path):
-                    fnodes[path].setdefault(fnode, node)
+            if not filematcher(path):
+                continue
+
+            fnodes[path].setdefault(fnode, node)
 
     yield {
         b'totalpaths': len(fnodes),
         b'totalitems': sum(len(v) for v in fnodes.values())
     }
 
     for path, filenodes in sorted(fnodes.items()):
         try:
--- a/tests/test-wireproto-command-filesdata.t
+++ b/tests/test-wireproto-command-filesdata.t
@@ -1262,31 +1262,37 @@ rewritten accordingly
       b'totalitems': 1
     },
     {
       b'linknode': b'G\xfc0X\t\x11#,\xb2dg[@(\x19\xde\xdd\xf6\xc6\xf0',
       b'node': b'.\xd2\xa3\x91*\x0b$P C\xea\xe8N\xe4\xb2y\xc1\x8b\x90\xdd'
     }
   ]
 
-TODO this is buggy
-
   $ sendhttpv2peer << EOF
   > command filesdata
   >     revisions eval:[{
   >         b'type': b'changesetexplicit',
   >         b'nodes': [
   >             b'\x47\xfc\x30\x58\x09\x11\x23\x2c\xb2\x64\x67\x5b\x40\x28\x19\xde\xdd\xf6\xc6\xf0',
   >         ]}]
   >     fields eval:[b'linknode']
   >     haveparents eval:True
   >     pathfilter eval:{b'include': [b'path:dupe-file']}
   > EOF
   creating http peer for wire protocol version 2
   sending filesdata command
   response: gen[
     {
-      b'totalitems': 0,
-      b'totalpaths': 0
+      b'totalitems': 1,
+      b'totalpaths': 1
+    },
+    {
+      b'path': b'dupe-file',
+      b'totalitems': 1
+    },
+    {
+      b'linknode': b'G\xfc0X\t\x11#,\xb2dg[@(\x19\xde\xdd\xf6\xc6\xf0',
+      b'node': b'.\xd2\xa3\x91*\x0b$P C\xea\xe8N\xe4\xb2y\xc1\x8b\x90\xdd'
     }
   ]
 
   $ cat error.log
--- a/tests/test-wireproto-exchangev2.t
+++ b/tests/test-wireproto-exchangev2.t
@@ -1295,26 +1295,11 @@ Perform an incremental pull of both head
   $ hg -R client-linknode-2 pull -r 639c8990d6a5
   pulling from http://localhost:$HGPORT/
   searching for changes
   new changesets 639c8990d6a5
   (run 'hg update' to get a working copy)
 
 #if reporevlogstore
   $ hg -R client-linknode-2 debugrevlogindex dupe-file
-  abort: revlog 'dupe-file' not found
-  [255]
+     rev linkrev nodeid       p1           p2
+       0       2 2ed2a3912a0b 000000000000 000000000000
 #endif
-
-  $ hg -R client-linknode-2 verify
-  checking changesets
-  checking manifests
-  crosschecking files in changesets and manifests
-  checking files
-   warning: revlog 'data/dupe-file.i' not in fncache!
-   2: empty or missing dupe-file
-   dupe-file@2: manifest refers to unknown revision 2ed2a3912a0b
-  checked 3 changesets with 2 changes to 3 files
-  1 warnings encountered!
-  hint: run "hg debugrebuildfncache" to recover from corrupt fncache
-  2 integrity errors encountered!
-  (first damaged changeset appears to be 2)
-  [1]