wireprotov2: send linknodes to emitfilerevisions()
authorGregory Szorc <gregory.szorc@gmail.com>
Mon, 10 Dec 2018 18:04:12 +0000
changeset 53695 3ed77780f4a6
parent 53694 ca6372b7e566
child 53696 08cfa77d7288
push id1081
push usergszorc@mozilla.com
push dateMon, 10 Dec 2018 21:46:46 +0000
wireprotov2: send linknodes to emitfilerevisions() Previously, linknodes were calculated within emitfilerevisions() by using filectx.introrev(), which would always use the linkrev/linknode as recorded by storage. This is wrong for cases where the receiver doesn't have the changeset the linknode refers to. This commit changes the logic for linknode emission so the mapping of filenode to linknode is computed by the caller and passed into emitfilerevisions(). As part of the change, linknodes for "filesdata" in the haveparents=False case are now correct: the existing code performed a manifest walk and it was trivial to plug in the correct linknode. However, behavior for the haveparents=True case is still wrong because it relies on filtering linkrevs against the outgoing set in order to determine what to send. This will be fixed in a subsequent commit. The change test test-wireproto-exchangev2-shallow.t is a bit wonky. The test repo has 6 revisions. The changed test is performing a shallow clone with depth=1. So, only file data for revision 5 is present locally. So, the new behavior of associating the linknode with revision 5 for every file revision seems correct. Of course, when backfilling old revisions, we'll want to update the linknode. But this problem requires wire protocol support and we'll cross that bridge later. Differential Revision: https://phab.mercurial-scm.org/D5405
mercurial/wireprotov2server.py
tests/test-wireproto-command-filesdata.t
tests/test-wireproto-exchangev2-shallow.t
--- a/mercurial/wireprotov2server.py
+++ b/mercurial/wireprotov2server.py
@@ -979,35 +979,27 @@ def getfilestore(repo, proto, path):
     # "empty" files and return an error.
     fl = repo.file(path)
 
     if not len(fl):
         raise FileAccessError(path, 'unknown file: %s', (path,))
 
     return fl
 
-def emitfilerevisions(repo, path, revisions, fields):
-    clnode = repo.changelog.node
-
+def emitfilerevisions(repo, path, revisions, linknodes, fields):
     for revision in revisions:
         d = {
             b'node': revision.node,
         }
 
         if b'parents' in fields:
             d[b'parents'] = [revision.p1node, revision.p2node]
 
         if b'linknode' in fields:
-            # TODO by creating the filectx against a specific file revision
-            # instead of changeset, linkrev() is always used. This is wrong for
-            # cases where linkrev() may refer to a hidden changeset. We need an
-            # API for performing linkrev adjustment that takes this into
-            # account.
-            fctx = repo.filectx(path, fileid=revision.node)
-            d[b'linknode'] = clnode(fctx.introrev())
+            d[b'linknode'] = linknodes[revision.node]
 
         followingmeta = []
         followingdata = []
 
         if b'revision' in fields:
             if revision.revision is not None:
                 followingmeta.append((b'revision', len(revision.revision)))
                 followingdata.append(revision.revision)
@@ -1081,33 +1073,44 @@ def filedata(repo, proto, haveparents, n
     # API should be deleted?
 
     try:
         # Extensions may wish to access the protocol handler.
         store = getfilestore(repo, proto, path)
     except FileAccessError as e:
         raise error.WireprotoCommandError(e.msg, e.args)
 
+    clnode = repo.changelog.node
+    linknodes = {}
+
     # Validate requested nodes.
     for node in nodes:
         try:
             store.rev(node)
         except error.LookupError:
             raise error.WireprotoCommandError('unknown file node: %s',
                                               (hex(node),))
 
+        # TODO by creating the filectx against a specific file revision
+        # instead of changeset, linkrev() is always used. This is wrong for
+        # cases where linkrev() may refer to a hidden changeset. But since this
+        # API doesn't know anything about changesets, we're not sure how to
+        # disambiguate the linknode. Perhaps we should delete this API?
+        fctx = repo.filectx(path, fileid=node)
+        linknodes[node] = clnode(fctx.introrev())
+
     revisions = store.emitrevisions(nodes,
                                     revisiondata=b'revision' in fields,
                                     assumehaveparentrevisions=haveparents)
 
     yield {
         b'totalitems': len(nodes),
     }
 
-    for o in emitfilerevisions(repo, path, revisions, fields):
+    for o in emitfilerevisions(repo, path, revisions, linknodes, fields):
         yield o
 
 def filesdatacapabilities(repo, proto):
     batchsize = repo.ui.configint(
         b'experimental', b'server.filesdata.recommended-batch-size')
     return {
         b'recommendedbatchsize': batchsize,
     }
@@ -1149,22 +1152,24 @@ def filesdatacapabilities(repo, proto):
 def filesdata(repo, proto, haveparents, fields, pathfilter, revisions):
     # TODO This should operate on a repo that exposes obsolete changesets. There
     # is a race between a client making a push that obsoletes a changeset and
     # another client fetching files data for that changeset. If a client has a
     # changeset, it should probably be allowed to access files data for that
     # changeset.
 
     cl = repo.changelog
+    clnode = cl.node
     outgoing = resolvenodes(repo, revisions)
     filematcher = makefilematcher(repo, pathfilter)
 
     # Figure out what needs to be emitted.
     changedpaths = set()
-    fnodes = collections.defaultdict(set)
+    # path -> {fnode: linknode}
+    fnodes = collections.defaultdict(dict)
 
     for node in outgoing:
         ctx = repo[node]
         changedpaths.update(ctx.files())
 
     changedpaths = sorted(p for p in changedpaths if filematcher(p))
 
     # If ancestors are known, we send file revisions having a linkrev in the
@@ -1177,27 +1182,27 @@ def filesdata(repo, proto, haveparents, 
                 store = getfilestore(repo, proto, path)
             except FileAccessError as e:
                 raise error.WireprotoCommandError(e.msg, e.args)
 
             for rev in store:
                 linkrev = store.linkrev(rev)
 
                 if linkrev in outgoingclrevs:
-                    fnodes[path].add(store.node(rev))
+                    fnodes[path].setdefault(store.node(rev), clnode(linkrev))
 
     # If ancestors aren't known, we walk the manifests and send all
     # encountered file revisions.
     else:
         for node in outgoing:
             mctx = repo[node].manifestctx()
 
             for path, fnode in mctx.read().items():
                 if filematcher(path):
-                    fnodes[path].add(fnode)
+                    fnodes[path].setdefault(fnode, node)
 
     yield {
         b'totalpaths': len(fnodes),
         b'totalitems': sum(len(v) for v in fnodes.values())
     }
 
     for path, filenodes in sorted(fnodes.items()):
         try:
@@ -1205,21 +1210,21 @@ def filesdata(repo, proto, haveparents, 
         except FileAccessError as e:
             raise error.WireprotoCommandError(e.msg, e.args)
 
         yield {
             b'path': path,
             b'totalitems': len(filenodes),
         }
 
-        revisions = store.emitrevisions(filenodes,
+        revisions = store.emitrevisions(filenodes.keys(),
                                         revisiondata=b'revision' in fields,
                                         assumehaveparentrevisions=haveparents)
 
-        for o in emitfilerevisions(repo, path, revisions, fields):
+        for o in emitfilerevisions(repo, path, revisions, filenodes, fields):
             yield o
 
 @wireprotocommand(
     'heads',
     args={
         'publiconly': {
             'type': 'bool',
             'default': lambda: False,
--- a/tests/test-wireproto-command-filesdata.t
+++ b/tests/test-wireproto-command-filesdata.t
@@ -1234,17 +1234,16 @@ Request for changeset introducing fileno
     {
       b'linknode': b'\xb1l\xce)g\xc1t\x9e\xf4\xf4\xe3\x08j\x80l\xfb\xad\x8a:\xf7',
       b'node': b'.\xd2\xa3\x91*\x0b$P C\xea\xe8N\xe4\xb2y\xc1\x8b\x90\xdd'
     }
   ]
 
 Request for changeset where recorded linknode isn't in DAG ancestry will get
 rewritten accordingly
-TODO this is buggy
 
   $ sendhttpv2peer << EOF
   > command filesdata
   >     revisions eval:[{
   >         b'type': b'changesetexplicit',
   >         b'nodes': [
   >             b'\x47\xfc\x30\x58\x09\x11\x23\x2c\xb2\x64\x67\x5b\x40\x28\x19\xde\xdd\xf6\xc6\xf0',
   >         ]}]
@@ -1258,17 +1257,17 @@ TODO this is buggy
       b'totalitems': 1,
       b'totalpaths': 1
     },
     {
       b'path': b'dupe-file',
       b'totalitems': 1
     },
     {
-      b'linknode': b'\xb1l\xce)g\xc1t\x9e\xf4\xf4\xe3\x08j\x80l\xfb\xad\x8a:\xf7',
+      b'linknode': b'G\xfc0X\t\x11#,\xb2dg[@(\x19\xde\xdd\xf6\xc6\xf0',
       b'node': b'.\xd2\xa3\x91*\x0b$P C\xea\xe8N\xe4\xb2y\xc1\x8b\x90\xdd'
     }
   ]
 
 TODO this is buggy
 
   $ sendhttpv2peer << EOF
   > command filesdata
--- a/tests/test-wireproto-exchangev2-shallow.t
+++ b/tests/test-wireproto-exchangev2-shallow.t
@@ -191,44 +191,44 @@ Shallow clone pulls down latest revision
     flags = 2
   
        id = 3
      path = dir0/c
    revnum = 0
      node = I\x1d\xa1\xbb\x89\xeax\xc0\xc0\xa2s[\x16\xce}\x93\x1d\xc8\xe2\r (esc)
     p1rev = -1
     p2rev = -1
-  linkrev = 4
+  linkrev = 5
     flags = 2
   
        id = 4
      path = dir0/d
    revnum = 0
      node = S\x82\x06\xdc\x97\x1eR\x15@\xd6\x84:\xbf\xe6\xd1`2\xf6\xd4& (esc)
     p1rev = -1
     p2rev = -1
-  linkrev = 1
+  linkrev = 5
     flags = 0
   
        id = 5
      path = dir1/e
    revnum = 0
      node = ]\xf3\xac\xd8\xd0\xc7\xfaP\x98\xd0'\x9a\x044\xc3\x02\x9e+x\xe1 (esc)
     p1rev = -1
     p2rev = -1
-  linkrev = 4
+  linkrev = 5
     flags = 2
   
        id = 6
      path = dir1/f
    revnum = 0
      node = (\xc7v\xae\x08\xd0\xd5^\xb4\x06H\xb4\x01\xb9\x0f\xf5DH4\x8e (esc)
     p1rev = -1
     p2rev = -1
-  linkrev = 4
+  linkrev = 5
     flags = 2
 
 Test a shallow clone with only some files
 
   $ hg --debug clone --depth 1 --include dir0/ http://localhost:$HGPORT client-shallow-narrow-1
   using http://localhost:$HGPORT/
   sending capabilities command
   query 1; heads
@@ -337,26 +337,26 @@ Test a shallow clone with only some file
   > SELECT id, path, revnum, node, p1rev, p2rev, linkrev, flags FROM filedata ORDER BY id ASC;
   > EOF
        id = 1
      path = dir0/c
    revnum = 0
      node = I\x1d\xa1\xbb\x89\xeax\xc0\xc0\xa2s[\x16\xce}\x93\x1d\xc8\xe2\r (esc)
     p1rev = -1
     p2rev = -1
-  linkrev = 4
+  linkrev = 5
     flags = 2
   
        id = 2
      path = dir0/d
    revnum = 0
      node = S\x82\x06\xdc\x97\x1eR\x15@\xd6\x84:\xbf\xe6\xd1`2\xf6\xd4& (esc)
     p1rev = -1
     p2rev = -1
-  linkrev = 1
+  linkrev = 5
     flags = 0
 
 Cloning an old revision with depth=1 works
 
   $ hg --debug clone --depth 1 -r 97765fc3cd624fd1fa0176932c21ffd16adf432e http://localhost:$HGPORT client-shallow-2
   using http://localhost:$HGPORT/
   sending capabilities command
   sending 1 commands