index: don't include nullid in len()
authorMartin von Zweigbergk <martinvonz@google.com>
Fri, 20 Jul 2018 08:10:32 -0700
changeset 47283 781b2720d2ac005e2806b46d8cb91abacfe4b901
parent 47282 6104b203bec89f1fa7fd1c0ab00a1a65e66d2627
child 47284 a3dacabd476b6bed04932b08962d7aa2fd2fad41
push id840
push usergszorc@mozilla.com
push dateMon, 06 Aug 2018 16:06:50 +0000
index: don't include nullid in len() I suspect the reason the nullid is in the index in the last position is that it lets index[i] for regular revision number, even when index was just a regular Python list. An alternative solution would have been to reserve revision number 0 for the null revision. I don't know why that wasn't done. Now that we have classes backing the index, we can easily make index[-1] get the nullid without having to put it last in the list and including it in the len(). This patch just hides the nullid -- it will still be accessible at index[len(index)]. I realize that this will be annoying when checking out across this commit for debugging (including bisection). Differential Revision: https://phab.mercurial-scm.org/D4022
mercurial/cext/parsers.c
mercurial/cext/revlog.c
mercurial/policy.py
mercurial/pure/parsers.py
mercurial/repoview.py
mercurial/revlog.py
--- a/mercurial/cext/parsers.c
+++ b/mercurial/cext/parsers.c
@@ -708,17 +708,17 @@ static PyMethodDef methods[] = {
     {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
      "parse v1 obsolete markers\n"},
     {NULL, NULL}};
 
 void dirs_module_init(PyObject *mod);
 void manifest_module_init(PyObject *mod);
 void revlog_module_init(PyObject *mod);
 
-static const int version = 6;
+static const int version = 7;
 
 static void module_init(PyObject *mod)
 {
 	PyModule_AddIntConstant(mod, "version", version);
 
 	/* This module constant has two purposes.  First, it lets us unit test
 	 * the ImportError raised without hard-coding any error text.  This
 	 * means we can change the text in the future without breaking tests,
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -72,18 +72,18 @@ typedef struct {
 	int ntlookups;         /* # lookups */
 	int ntmisses;          /* # lookups that miss the cache */
 	int inlined;
 } indexObject;
 
 static Py_ssize_t index_length(const indexObject *self)
 {
 	if (self->added == NULL)
-		return self->length;
-	return self->length + PyList_GET_SIZE(self->added);
+		return self->length - 1;
+	return self->length + PyList_GET_SIZE(self->added) - 1;
 }
 
 static PyObject *nullentry;
 static const char nullid[20];
 
 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
 
 #if LONG_MAX == 0x7fffffffL
@@ -150,17 +150,17 @@ static inline int index_get_parents(inde
  *   32 bytes: nodeid (only 20 bytes used)
  */
 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
 {
 	uint64_t offset_flags;
 	int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
 	const char *c_node_id;
 	const char *data;
-	Py_ssize_t length = index_length(self);
+	Py_ssize_t length = index_length(self) + 1;
 	PyObject *entry;
 
 	if (pos == -1 || pos == length - 1) {
 		Py_INCREF(nullentry);
 		return nullentry;
 	}
 
 	if (pos < 0 || pos >= length) {
@@ -220,17 +220,17 @@ static PyObject *index_get(indexObject *
 	return entry;
 }
 
 /*
  * Return the 20-byte SHA of the node corresponding to the given rev.
  */
 static const char *index_node(indexObject *self, Py_ssize_t pos)
 {
-	Py_ssize_t length = index_length(self);
+	Py_ssize_t length = index_length(self) + 1;
 	const char *data;
 
 	if (pos == length - 1 || pos == -1)
 		return nullid;
 
 	if (pos >= length)
 		return NULL;
 
@@ -280,17 +280,17 @@ static PyObject *index_append(indexObjec
 	if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
 		PyErr_SetString(PyExc_TypeError, "8-tuple required");
 		return NULL;
 	}
 
 	if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
 		return NULL;
 
-	len = index_length(self);
+	len = index_length(self) + 1;
 
 	if (self->added == NULL) {
 		self->added = PyList_New(0);
 		if (self->added == NULL)
 			return NULL;
 	}
 
 	if (PyList_Append(self->added, obj) == -1)
@@ -430,17 +430,17 @@ static int check_filter(PyObject *filter
 	}
 }
 
 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
                                     Py_ssize_t marker, char *phases)
 {
 	PyObject *iter = NULL;
 	PyObject *iter_item = NULL;
-	Py_ssize_t min_idx = index_length(self) + 1;
+	Py_ssize_t min_idx = index_length(self) + 2;
 	long iter_item_long;
 
 	if (PyList_GET_SIZE(list) != 0) {
 		iter = PyObject_GetIter(list);
 		if (iter == NULL)
 			return -2;
 		while ((iter_item = PyIter_Next(iter))) {
 			iter_item_long = PyInt_AS_LONG(iter_item);
@@ -472,17 +472,17 @@ static PyObject *reachableroots2(indexOb
 	PyObject *includepatharg = NULL;
 	int includepath = 0;
 	/* heads and roots are lists */
 	PyObject *heads = NULL;
 	PyObject *roots = NULL;
 	PyObject *reachable = NULL;
 
 	PyObject *val;
-	Py_ssize_t len = index_length(self) - 1;
+	Py_ssize_t len = index_length(self);
 	long revnum;
 	Py_ssize_t k;
 	Py_ssize_t i;
 	Py_ssize_t l;
 	int r;
 	int parents[2];
 
 	/* Internal data structure:
@@ -624,17 +624,17 @@ static PyObject *compute_phases_map_sets
 {
 	PyObject *roots = Py_None;
 	PyObject *ret = NULL;
 	PyObject *phasessize = NULL;
 	PyObject *phaseroots = NULL;
 	PyObject *phaseset = NULL;
 	PyObject *phasessetlist = NULL;
 	PyObject *rev = NULL;
-	Py_ssize_t len = index_length(self) - 1;
+	Py_ssize_t len = index_length(self);
 	Py_ssize_t numphase = 0;
 	Py_ssize_t minrevallphases = 0;
 	Py_ssize_t minrevphase = 0;
 	Py_ssize_t i = 0;
 	char *phases = NULL;
 	long phase;
 
 	if (!PyArg_ParseTuple(args, "O", &roots))
@@ -735,17 +735,17 @@ static PyObject *index_headrevs(indexObj
 		filter = PyObject_GetAttrString(filteredrevs, "__contains__");
 		if (!filter) {
 			PyErr_SetString(PyExc_TypeError,
 				"filteredrevs has no attribute __contains__");
 			goto bail;
 		}
 	}
 
-	len = index_length(self) - 1;
+	len = index_length(self);
 	heads = PyList_New(0);
 	if (heads == NULL)
 		goto bail;
 	if (len == 0) {
 		PyObject *nullid = PyInt_FromLong(-1);
 		if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
 			Py_XDECREF(nullid);
 			goto bail;
@@ -839,17 +839,17 @@ static inline int index_baserev(indexObj
 
 static PyObject *index_deltachain(indexObject *self, PyObject *args)
 {
 	int rev, generaldelta;
 	PyObject *stoparg;
 	int stoprev, iterrev, baserev = -1;
 	int stopped;
 	PyObject *chain = NULL, *result = NULL;
-	const Py_ssize_t length = index_length(self);
+	const Py_ssize_t length = index_length(self) + 1;
 
 	if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
 		return NULL;
 	}
 
 	if (PyInt_Check(stoparg)) {
 		stoprev = (int)PyInt_AsLong(stoparg);
 		if (stoprev == -1 && PyErr_Occurred()) {
@@ -1093,17 +1093,17 @@ static int nt_init(indexObject *self)
 			? 4 : (int)self->raw_length / 2;
 
 		self->nt = calloc(self->ntcapacity, sizeof(nodetree));
 		if (self->nt == NULL) {
 			PyErr_NoMemory();
 			return -1;
 		}
 		self->ntlength = 1;
-		self->ntrev = (int)index_length(self) - 1;
+		self->ntrev = (int)index_length(self);
 		self->ntlookups = 1;
 		self->ntmisses = 0;
 		if (nt_insert(self, nullid, -1) == -1) {
 			free(self->nt);
 			self->nt = NULL;
 			return -1;
 		}
 	}
@@ -1388,17 +1388,17 @@ static PyObject *index_m_get(indexObject
 }
 
 static int index_contains(indexObject *self, PyObject *value)
 {
 	char *node;
 
 	if (PyInt_Check(value)) {
 		long rev = PyInt_AS_LONG(value);
-		return rev >= -1 && rev < index_length(self);
+		return rev >= -1 && rev < index_length(self) + 1;
 	}
 
 	if (node_check(value, &node) == -1)
 		return -1;
 
 	switch (index_find_node(self, node, 20)) {
 	case -3:
 		return -1;
@@ -1668,17 +1668,17 @@ static PyObject *index_commonancestorshe
 	bitmask repeat = 0;
 	int revcount = 0;
 	int *revs;
 
 	argcount = PySequence_Length(args);
 	revs = PyMem_Malloc(argcount * sizeof(*revs));
 	if (argcount > 0 && revs == NULL)
 		return PyErr_NoMemory();
-	len = index_length(self) - 1;
+	len = index_length(self);
 
 	for (i = 0; i < argcount; i++) {
 		static const int capacity = 24;
 		PyObject *obj = PySequence_GetItem(args, i);
 		bitmask x;
 		long val;
 
 		if (!PyInt_Check(obj)) {
@@ -1790,17 +1790,17 @@ static void nt_invalidate_added(indexObj
 
 /*
  * Delete a numeric range of revs, which must be at the end of the
  * range, but exclude the sentinel nullid entry.
  */
 static int index_slice_del(indexObject *self, PyObject *item)
 {
 	Py_ssize_t start, stop, step, slicelength;
-	Py_ssize_t length = index_length(self);
+	Py_ssize_t length = index_length(self) + 1;
 	int ret = 0;
 
 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
 #ifdef IS_PY3K
 	if (PySlice_GetIndicesEx(item, length,
 #else
 	if (PySlice_GetIndicesEx((PySliceObject*)item, length,
 #endif
--- a/mercurial/policy.py
+++ b/mercurial/policy.py
@@ -64,17 +64,17 @@ def _importfrom(pkgname, modname):
     return fakelocals[modname]
 
 # keep in sync with "version" in C modules
 _cextversions = {
     (r'cext', r'base85'): 1,
     (r'cext', r'bdiff'): 3,
     (r'cext', r'mpatch'): 1,
     (r'cext', r'osutil'): 4,
-    (r'cext', r'parsers'): 6,
+    (r'cext', r'parsers'): 7,
 }
 
 # map import request to other package or module
 _modredirects = {
     (r'cext', r'charencode'): (r'cext', r'parsers'),
     (r'cffi', r'base85'): (r'pure', r'base85'),
     (r'cffi', r'charencode'): (r'pure', r'charencode'),
     (r'cffi', r'parsers'): (r'pure', r'parsers'),
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -34,30 +34,30 @@ indexsize = struct.calcsize(indexformatn
 def gettype(q):
     return int(q & 0xFFFF)
 
 def offset_type(offset, type):
     return int(int(offset) << 16 | type)
 
 class BaseIndexObject(object):
     def __len__(self):
-        return self._lgt + len(self._extra) + 1
+        return self._lgt + len(self._extra)
 
     def append(self, tup):
         self._extra.append(tup)
 
     def _fix_index(self, i):
         if not isinstance(i, int):
             raise TypeError("expecting int indexes")
-        if i < 0 or i >= len(self):
+        if i < 0 or i >= len(self) + 1:
             raise IndexError
         return i
 
     def __getitem__(self, i):
-        if i == -1 or i == len(self) - 1:
+        if i == -1 or i == len(self):
             return (0, 0, 0, -1, -1, -1, -1, nullid)
         i = self._fix_index(i)
         if i >= self._lgt:
             return self._extra[i - self._lgt]
         index = self._calculate_index(i)
         r = struct.unpack(indexformatng, self._data[index:index + indexsize])
         if i == 0:
             e = list(r)
--- a/mercurial/repoview.py
+++ b/mercurial/repoview.py
@@ -205,17 +205,17 @@ class repoview(object):
         """return a filtered version of the changeset
 
         this changelog must not be used for writing"""
         # some cache may be implemented later
         unfi = self._unfilteredrepo
         unfichangelog = unfi.changelog
         # bypass call to changelog.method
         unfiindex = unfichangelog.index
-        unfilen = len(unfiindex) - 1
+        unfilen = len(unfiindex)
         unfinode = unfiindex[unfilen - 1][7]
 
         revs = filterrevs(unfi, self.filtername, self._visibilityexceptions)
         cl = self._clcache
         newkey = (unfilen, unfinode, hash(revs), unfichangelog._delayed)
         # if cl.index is not unfiindex, unfi.changelog would be
         # recreated, and our clcache refers to garbage object
         if (cl is not None and
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -786,20 +786,18 @@ class _revisioninfo(object):
 # 20 bytes: parent 1 nodeid
 # 20 bytes: parent 2 nodeid
 # 20 bytes: nodeid
 indexformatv0 = struct.Struct(">4l20s20s20s")
 indexformatv0_pack = indexformatv0.pack
 indexformatv0_unpack = indexformatv0.unpack
 
 class revlogoldindex(list):
-    def __len__(self):
-        return list.__len__(self) + 1
     def __getitem__(self, i):
-        if i == -1 or i == len(self) - 1:
+        if i == -1 or i == len(self):
             return (0, 0, 0, -1, -1, -1, -1, nullid)
         return list.__getitem__(self, i)
 
 class revlogoldio(object):
     def __init__(self):
         self.size = indexformatv0.size
 
     def parseindex(self, data, inline):
@@ -1061,21 +1059,21 @@ class revlog(object):
             if self._inline:
                 func = self._indexfp
             else:
                 func = self._datafp
             with func() as fp:
                 yield fp
 
     def tip(self):
-        return self.node(len(self.index) - 2)
+        return self.node(len(self.index) - 1)
     def __contains__(self, rev):
         return 0 <= rev < len(self)
     def __len__(self):
-        return len(self.index) - 1
+        return len(self.index)
     def __iter__(self):
         return iter(pycompat.xrange(len(self)))
     def revs(self, start=0, stop=None):
         """iterate over all rev in this revlog (from start to stop)"""
         step = 1
         length = len(self)
         if stop is not None:
             if start > stop:
@@ -1134,17 +1132,17 @@ class revlog(object):
                 raise error.WdirUnsupported
             raise LookupError(node, self.indexfile, _('no node'))
         except KeyError:
             # pure python cache lookup failed
             n = self._nodecache
             i = self.index
             p = self._nodepos
             if p is None:
-                p = len(i) - 2
+                p = len(i) - 1
             else:
                 assert p < len(i)
             for r in pycompat.xrange(p, -1, -1):
                 v = i[r][7]
                 n[v] = r
                 if v == node:
                     self._nodepos = r - 1
                     return r