Attempt to improve parser performance: rather than iterating one character at a time, move the token-finding code into the iterator functions and use regular expressions to find all the tokens at once. I'm not sure this is a win, but I'm saving it for testing on my other machine. parser-perf
authorBenjamin Smedberg <benjamin@smedbergs.us>
Fri, 13 Feb 2009 09:11:19 -0500
branchparser-perf
changeset 113 df9087ec3e089c3f5126999731d6b40cd1cc318f
parent 112 33c6baf451b8ff7e34f1fc9317c2c61151c95365
child 114 e308b8e36c260cc5cb2103515a7146893fe1656a
push id61
push userbsmedberg@mozilla.com
push dateFri, 13 Feb 2009 14:11:26 +0000
Attempt to improve parser performance: rather than iterating one character at a time, move the token-finding code into the iterator functions and use regular expressions to find all the tokens at once. I'm not sure this is a win, but I'm saving it for testing on my other machine.
pymake/data.py
pymake/parser.py
tests/parsertests.py
--- a/pymake/data.py
+++ b/pymake/data.py
@@ -85,16 +85,19 @@ class Expansion(object):
         e = Expansion()
         e.append(s)
         return e
 
     def append(self, object):
         if not isinstance(object, (str, pymake.functions.Function)):
             raise DataError("Expansions can contain only strings or functions, got %s" % (type(object),))
 
+        if object == '':
+            return
+
         if len(self) and isinstance(object, str) and isinstance(self[-1], str):
             self[-1] += object
         else:
             self._elements.append(object)
 
     def concat(self, o):
         """Concatenate the other expansion on to this one."""
         if len(self) > 0 and len(o) > 0 and isinstance(self[-1], str) and isinstance(o[0], str):
--- a/pymake/parser.py
+++ b/pymake/parser.py
@@ -13,17 +13,17 @@ Lines with command syntax do not condens
 
 Lines with an initial tab are commands if they can be (there is a rule or a command immediately preceding).
 Otherwise, they are parsed as makefile syntax.
 
 After splitting data into parseable chunks, we use a recursive-descent parser to
 nest parenthesized syntax.
 """
 
-import logging
+import logging, re
 from pymake import data, functions
 from cStringIO import StringIO
 
 tabwidth = 4
 
 log = logging.getLogger('pymake.parser')
 
 class SyntaxError(Exception):
@@ -155,111 +155,157 @@ class Data(object):
             if self.data[o:end] == t:
                 if not needws:
                     return t, end
                 elif end == len(self.data) or self.data[end].isspace():
                     end = self.skipwhitespace(end)
                     return t, end
         return None, o
 
-def iterdata(d, offset):
+makefiletokensescaped = [r'\\\\#', r'\\#', '\\\\\n', '\\\\\\s+\\\\\n', r'\\.', '#', '\n']
+continuationtokensescaped = ['\\\\\n', r'\\.', '\n']
+
+class TokenList(object):
     """
-    A Data iterator yielding (char, offset) without any escaping.
+    A list of tokens to search. Because these lists are static, we can perform
+    optimizations (such as escaping and compiling regexes) on construction.
     """
-    while offset < len(d.data):
-        yield d.data[offset], offset
-        offset += 1
+    def __init__(self, tlist):
+        self.emptylist = len(tlist) == 0
+        escapedlist = [re.escape(t) for t in tlist]
+        self.simplere = re.compile('|'.join(escapedlist))
+        self.makefilere = re.compile('|'.join(escapedlist + makefiletokensescaped))
+        self.continuationre = re.compile('|'.join(escapedlist + continuationtokensescaped))
+
+    imap = {}
+
+    @staticmethod
+    def get(s):
+        if s in TokenList.imap:
+            return TokenList.imap[s]
+
+        i = TokenList(s)
+        TokenList.imap[s] = i
+        return i
 
-def itermakefilechars(d, offset):
-    """
-    A Data generator yielding (char, offset). It will escape comments and newline
-    continuations according to makefile syntax rules.
-    """
+emptytokenlist = TokenList.get('')
+
+# The following four iterators handle line continuations and comments in
+# different ways, but share a similar behavior:
+#
+# Called with (data, startoffset, tokenlist)
+#
+# yield 4-tuples (flatstr, token, tokenoffset, afteroffset)
+# flatstr is data, guaranteed to have no tokens (may be '')
+# token, tokenoffset, afteroffset *may be None*. That means there is more text
+# coming.
+
+def iterdata(d, offset, tokenlist):
+    if tokenlist.emptylist:
+        yield d.data, None, None, None
+        return
+
+    s = tokenlist.simplere
 
     while offset < len(d.data):
-        c = d.data[offset]
-        if c == '\n':
-            assert offset == len(d.data) - 1
+        m = s.search(d.data, pos=offset)
+        if m is None:
+            yield d.data[offset:], None, None, None
+            return
+
+        yield d.data[offset:m.start(0)], m.group(0), m.start(0), m.end(0)
+        offset = m.end(0)
+
+def itermakefilechars(d, offset, tokenlist):
+    s = tokenlist.makefilere
+
+    while offset < len(d.data):
+        m = s.search(d.data, pos=offset)
+        if m is None:
+            yield d.data[offset:], None, None, None
             return
 
-        if c == '#':
-            while offset < len(d.data):
-                c = d.data[offset]
-                if c == '\\' and offset < len(d.data) - 1:
-                    offset += 1
-                    c = d.data[offset]
-                    if c == '\n':
-                        assert offset == len(d.data) - 1, 'unexpected newline'
-                        d.readline()
-                offset += 1
+        token = m.group(0)
+        start = m.start(0)
+        end = m.end(0)
+
+        if token == '\n':
+            assert end == len(d.data)
+            yield d.data[offset:start], None, None, None
+            return
+
+        if token == '#':
+            yield d.data[offset:start], None, None, None
+            for s in itermakefilechars(d, end, emptytokenlist): pass
+            return
+
+        if token == '\\\\#':
+            # see escape-chars.mk VARAWFUL
+            yield d.data[offset:start + 1], None, None, None
+            for s in itermakefilechars(d, end, emptytokenlist): pass
             return
-        elif c == '\\' and offset < len(d.data) - 1:
-            c2 = d.data[offset + 1]
-            if c2 == '#':
-                offset += 1
-                yield '#', offset
-                offset += 1
-            elif d[offset:offset + 3] == '\\\\#':
-                # see escape-chars.mk VARAWFUL
-                offset += 1
-                yield '\\', offset
-                offset += 1
-            elif c2 == '\n':
-                yield ' ', offset
-                d.readline()
-                offset = d.skipwhitespace(offset + 2)
-            elif c2 == '\\':
-                yield '\\', offset
-                offset += 1
-                yield '\\', offset
-                offset += 1
-            else:
-                yield c, offset
-                offset += 1
+
+        if token == '\\\n':
+            yield d.data[offset:start].rstrip() + ' ', None, None, None
+            d.readline()
+            offset = d.skipwhitespace(end)
+            continue
+
+        if token.startswith('\\') and token.endswith('\n'):
+            assert end == len(d.data)
+            yield d.data[offset:start] + '\\ ', None, None, None
+            d.readline()
+            offset = d.skipwhitespace(end)
+            continue
+
+        if token == '\\#':
+            yield d.data[offset:start] + '#', None, None, None
+        elif token.startswith('\\'):
+            yield d.data[offset:end], None, None, None
         else:
-            if c.isspace():
-                o = d.skipwhitespace(offset)
-                if d.data[o:o+2] == '\\\n':
-                    offset = o
-                    continue
+            yield d.data[offset:start], token, start, end
 
-            yield c, offset
-            offset += 1
+        offset = end
 
-def itercommandchars(d, offset):
-    """
-    A Data generator yielding (char, offset). It will process escapes and newlines
-    according to command parsing rules.
-    """
+def itercommandchars(d, offset, tokenlist):
+    s = tokenlist.continuationre
 
     while offset < len(d.data):
-        c = d.data[offset]
-        if c == '\n':
-            assert offset == len(d.data) - 1
+        m = s.search(d.data, pos=offset)
+        if m is None:
+            yield d.data[offset:], None, None, None
+            return
+
+        token = m.group(0)
+        start = m.start(0)
+        end = m.end(0)
+
+        if token == '\n':
+            assert end == len(d.data)
+            yield d.data[offset:start], None, None, None
             return
 
-        yield c, offset
-        offset += 1
-
-        if c == '\\':
-            if offset == len(d.data):
-                return
-
-            c = d.data[offset]
-            yield c, offset
+        if token == '\\\n':
+            print "found newline"
+            yield d.data[offset:end], None, None, None
+            d.readline()
+            offset = end
+            print "new len: %s offset: %s" % (len(d.data), offset)
+            if offset < len(d.data) and d.data[offset] == '\t':
+                offset += 1
+            continue
+        
+        if token.startswith('\\'):
+            yield d.data[offset:end], None, None, None
+        else:
+            yield d.data[offset:start], token, start, end
 
-            offset += 1
+        offset = end
 
-            if c == '\n':
-                assert offset == len(d.data)
-                d.readline()
-                if offset < len(d.data) and d.data[offset] == '\t':
-                    offset += 1
-
-def iterdefinechars(d, offset):
+def iterdefinechars(d, offset, tokenlist):
     """
     A Data generator yielding (char, offset). It will process define/endef
     according to define parsing rules.
     """
 
     def checkfortoken(o):
         """
         Check for a define or endef token on the line starting at o.
@@ -281,52 +327,61 @@ def iterdefinechars(d, offset):
         
         return 0
 
     startoffset = offset
     definecount = 1 + checkfortoken(offset)
     if definecount == 0:
         return
 
+    s = tokenlist.continuationre
+
     while offset < len(d.data):
-        c = d.data[offset]
+        m = s.search(d.data, pos=offset)
+        if m is None:
+            yield d.data[offset:], None, None, None
+            break
+
+        token = m.group(0)
+        start = m.start(0)
+        end = m.end(0)
 
-        if c == '\n':
+        if token == '\\\n':
+            yield d.data[offset:start].rstrip() + ' ', None, None, None
             d.readline()
-            definecount += checkfortoken(offset + 1)
+            offset = d.skipwhitespace(end)
+            continue
+
+        if token == '\n':
+            assert end == len(d.data)
+            d.readline()
+            definecount += checkfortoken(end)
             if definecount == 0:
+                yield d.data[offset:start], None, None, None
                 return
 
-        if c == '\\' and offset < len(d.data) - 1 and d.data[offset+1] == '\n':
-            yield ' ', offset
-            d.readline()
-            offset = d.skipwhitespace(offset + 2)
-            continue
+            yield d.data[offset:end], None, None, None
+        elif token.startswith('\\'):
+            yield d.data[offset:end], None, None, None
+        else:
+            yield d.data[offset:start], token, start, end
 
-        if c.isspace():
-            o = d.skipwhitespace(offset)
-            if d.data[o:o+2] == '\\\n':
-                offset = o
-                continue
-
-        yield c, offset
-        offset += 1
-
+        offset = end
 
     # Unlike the other iterators, if you fall off this one there is an unterminated
     # define.
     raise SyntaxError("Unterminated define", d.getloc(startoffset))
 
 def ensureend(d, offset, msg, ifunc=itermakefilechars):
     """
     Ensure that only whitespace remains in this data.
     """
 
-    for c, o in ifunc(d, offset):
-        if not c.isspace():
+    for c, t, o, oo in ifunc(d, offset, emptytokenlist):
+        if c != '' and not c.isspace():
             raise SyntaxError(msg, d.getloc(o))
 
 def iterlines(fd):
     """Yield (lineno, line) for each line in fd"""
 
     lineno = 0
     for line in fd:
         lineno += 1
@@ -346,33 +401,33 @@ def setvariable(resolvevariables, setvar
     assert isinstance(setvariables, data.Variables)
 
     # print "setvariable: %r resvariables: %r setvariables: %r" % (vname, resolvevariables, setvariables)
 
     if len(vname) == 0:
         raise SyntaxError("Empty variable name", loc=d.getloc(offset))
 
     if token == '+=':
-        val = ''.join((c for c, o, in iterfunc(d, offset)))
+        val = ''.join((c for c, t, o, oo in iterfunc(d, offset, emptytokenlist)))
         if skipwhitespace:
             val = val.lstrip()
         setvariables.append(vname, source, val, resolvevariables)
         return
 
     if token == '?=':
         flavor = data.Variables.FLAVOR_RECURSIVE
-        val = ''.join((c for c, o, in iterfunc(d, offset)))
+        val = ''.join((c for c, t, o, oo in iterfunc(d, offset, emptytokenlist)))
         if skipwhitespace:
             val = val.lstrip()
         oldflavor, oldsource, oldval = setvariables.get(vname, expand=False)
         if oldval is not None:
             return
     elif token == '=':
         flavor = data.Variables.FLAVOR_RECURSIVE
-        val = ''.join((c for c, o, in iterfunc(d, offset)))
+        val = ''.join((c for c, t, o, oo in iterfunc(d, offset, emptytokenlist)))
         if skipwhitespace:
             val = val.lstrip()
     else:
         assert token == ':='
 
         flavor = data.Variables.FLAVOR_SIMPLE
         e, t, o = parsemakesyntax(d, offset, (), itermakefilechars)
         if skipwhitespace:
@@ -564,17 +619,17 @@ def parsestream(fd, filename, makefile):
 
             if kword in conditionkeywords:
                 m = conditionkeywords[kword](d, offset, makefile)
                 condstack.append(Condition(m, d.getloc(offset)))
                 continue
 
             if any((not c.active for c in condstack)):
                 log.debug('%s: skipping line because of active conditions' % (d.getloc(0),))
-                for c in itermakefilechars(d, offset):
+                for c in itermakefilechars(d, offset, emptytokenlist):
                     pass
                 continue
 
             if kword == 'endef':
                 raise SyntaxError("Unmatched endef", d.getloc(offset))
 
             if kword == 'define':
                 e, t, i = parsemakesyntax(d, offset, (), itermakefilechars)
@@ -706,17 +761,17 @@ def parsestream(fd, filename, makefile):
                                         vname, token, d, offset)
                 elif token == '|':
                     raise NotImplementedError('order-only prerequisites not implemented')
                 else:
                     assert token == ':'
 
                     # static pattern rule
                     if ispattern:
-                        raise SyntaxError("static pattern rules must have static targets")
+                        raise SyntaxError("static pattern rules must have static targets", d.getloc(0))
 
                     patstr = e.resolve(makefile.variables)
                     patterns = data.splitwords(patstr)
                     if len(patterns) != 1:
                         raise SyntaxError("A static pattern rule may have only one pattern", d.getloc(offset))
 
                     pattern = data.Pattern(patterns[0])
 
@@ -747,20 +802,20 @@ PARSESTATE_TOPLEVEL = 0    # at the top 
 PARSESTATE_FUNCTION = 1    # expanding a function call. data is function
 
 # For the following three, data is a tuple of Expansions: (varname, substfrom, substto)
 PARSESTATE_VARNAME = 2     # expanding a variable expansion.
 PARSESTATE_SUBSTFROM = 3   # expanding a variable expansion substitution "from" value
 PARSESTATE_SUBSTTO = 4     # expanding a variable expansion substitution "to" value
 
 class ParseStackFrame(object):
-    def __init__(self, parsestate, expansion, stopon, closebrace, **kwargs):
+    def __init__(self, parsestate, expansion, tokenlist, closebrace, **kwargs):
         self.parsestate = parsestate
         self.expansion = expansion
-        self.stopon = stopon
+        self.tokenlist = tokenlist
         self.closebrace = closebrace
         for key, value in kwargs.iteritems():
             setattr(self, key, value)
 
 functiontokens = [k for k in functions.functionmap.iterkeys()]
 functiontokens.sort(key=len, reverse=True)
 
 def parsemakesyntax(d, startat, stopon, iterfunc):
@@ -781,134 +836,126 @@ def parsemakesyntax(d, startat, stopon, 
     """
 
     # print "parsemakesyntax(%r)" % d.data
 
     assert callable(iterfunc)
 
     stack = [
         ParseStackFrame(PARSESTATE_TOPLEVEL, data.Expansion(loc=d.getloc(startat)),
-                        stopon, closebrace=None)
+                        tokenlist=TokenList.get(stopon + ('$',)),
+                        stopon=stopon, closebrace=None)
     ]
 
-    di = iterfunc(d, startat)
-    offset = startat
-
+    di = iterfunc(d, startat, stack[-1].tokenlist)
     while True: # this is not a for loop because `di` changes during the function
         stacktop = stack[-1]
         try:
-            c, offset = di.next()
+            s, token, tokenoffset, offset = di.next()
         except StopIteration:
             break
 
-        # print "  %i: stacklen=%i parsestate=%s looking for %r" % (offset, len(stack),
-        #                                                           stacktop.parsestate, stacktop.stopon),
-
-        token, offset = d.findtoken(offset, stacktop.stopon, False)
-        if token is not None:
-            c = 'dangerwillrobinson!'
-            di = iterfunc(d, offset)
-
-            if stacktop.parsestate == PARSESTATE_TOPLEVEL:
-                assert len(stack) == 1
-                return stacktop.expansion, token, offset
-
-            if stacktop.parsestate == PARSESTATE_FUNCTION:
-                if token == ',':
-                    stacktop.expansion = data.Expansion()
-                    stacktop.function.append(stacktop.expansion)
+        stacktop.expansion.append(s)
+        if token is None:
+            continue
 
-                    if len(stacktop.function) == stacktop.function.maxargs:
-                        stacktop.stopon = (stacktop.closebrace,)
-                elif token in (')', '}'):
-                    stacktop.function.setup()
-                    stack.pop()
-                    stack[-1].expansion.append(stacktop.function)
-                else:
-                    assert False, "Not reached, PARSESTATE_FUNCTION"
-            elif stacktop.parsestate == PARSESTATE_VARNAME:
-                if token == ':':
-                    stacktop.varname = stacktop.expansion
-                    stacktop.parsestate = PARSESTATE_SUBSTFROM
-                    stacktop.expansion = data.Expansion()
-                    stacktop.stopon = ('=', stacktop.closebrace)
-                elif token in (')', '}'):
-                    stack.pop()
-                    stack[-1].expansion.append(functions.VariableRef(stacktop.loc, stacktop.expansion))
-                else:
-                    assert False, "Not reached, PARSESTATE_VARNAME"
-            elif stacktop.parsestate == PARSESTATE_SUBSTFROM:
-                if token == '=':
-                    stacktop.substfrom = stacktop.expansion
-                    stacktop.parsestate = PARSESTATE_SUBSTTO
-                    stacktop.expansion = data.Expansion()
-                    stacktop.stopon = (stacktop.closebrace,)
-                elif token in (')', '}'):
-                    # A substitution of the form $(VARNAME:.ee) is probably a mistake, but make
-                    # parses it. Issue a warning. Combine the varname and substfrom expansions to
-                    # make the compatible varname. See tests/var-substitutions.mk SIMPLE3SUBSTNAME
-                    log.warning("%s: Variable reference looks like substitution without =" % (stacktop.loc, ))
-                    stacktop.varname.append(':')
-                    stacktop.varname.concat(stacktop.expansion)
-                    stack.pop()
-                    stack[-1].expansion.append(functions.VariableRef(stacktop.loc, stacktop.varname))
-                else:
-                    assert False, "Not reached, PARSESTATE_SUBSTFROM"
-            elif stacktop.parsestate == PARSESTATE_SUBSTTO:
-                assert token in  (')','}'), "Not reached, PARSESTATE_SUBSTTO"
-
-                stack.pop()
-                stack[-1].expansion.append(functions.SubstitutionRef(stacktop.loc, stacktop.varname,
-                                                                     stacktop.substfrom, stacktop.expansion))
-            else:
-                assert False, "Unexpected parse state %s" % stacktop.parsestate
-
-            continue
-        elif c == '$':
-            loc = d.getloc(offset)
-            try:
-                c, offset = di.next()
-            except StopIteration:
-                # an un-terminated $ expands to nothing
+        if token == '$':
+            if len(d.data) == offset:
+                # an unterminated $ expands to nothing
                 break
 
+            loc = d.getloc(tokenoffset)
+
+            c = d.data[offset]
             if c == '$':
                 stacktop.expansion.append('$')
-                continue
-
-            if c in ('(', '{'):
+                offset = offset + 1
+            elif c in ('(', '{'):
                 closebrace = c == '(' and ')' or '}'
 
                 # look forward for a function name
                 fname, offset = d.findtoken(offset + 1, functiontokens, True)
                 if fname is not None:
                     fn = functions.functionmap[fname](loc)
                     e = data.Expansion()
                     fn.append(e)
                     if len(fn) == fn.maxargs:
-                        stopon = (')',)
+                        tokenlist = TokenList.get((closebrace, '$'))
                     else:
-                        stopon = (',', ')')
+                        tokenlist = TokenList.get((',', closebrace, '$'))
 
                     stack.append(ParseStackFrame(PARSESTATE_FUNCTION,
-                                                 e, stopon, function=fn,
+                                                 e, tokenlist, function=fn,
                                                  closebrace=closebrace))
-                    di = iterfunc(d, offset)
+                    di = iterfunc(d, offset, tokenlist)
                     continue
 
                 e = data.Expansion()
-                stack.append(ParseStackFrame(PARSESTATE_VARNAME, e, (':', closebrace), closebrace=closebrace, loc=loc))
+                tokenlist = TokenList.get((':', closebrace, '$'))
+                stack.append(ParseStackFrame(PARSESTATE_VARNAME, e, tokenlist, closebrace=closebrace, loc=loc))
+                di = iterfunc(d, offset, tokenlist)
                 continue
+            else:
+                e = data.Expansion.fromstring(c)
+                stacktop.expansion.append(functions.VariableRef(loc, e))
+                offset += 1
+        elif stacktop.parsestate == PARSESTATE_TOPLEVEL:
+            assert len(stack) == 1
+            return stacktop.expansion, token, offset
+        elif stacktop.parsestate == PARSESTATE_FUNCTION:
+            if token == ',':
+                stacktop.expansion = data.Expansion()
+                stacktop.function.append(stacktop.expansion)
 
-            fe = data.Expansion()
-            fe.append(c)
-            stacktop.expansion.append(functions.VariableRef(loc, fe))
-            continue
+                if len(stacktop.function) == stacktop.function.maxargs:
+                    tokenlist = TokenList.get((stacktop.closebrace, '$'))
+                    stacktop.tokenlist = tokenlist
+            elif token in (')', '}'):
+                    stacktop.function.setup()
+                    stack.pop()
+                    stack[-1].expansion.append(stacktop.function)
+            else:
+                assert False, "Not reached, PARSESTATE_FUNCTION"
+        elif stacktop.parsestate == PARSESTATE_VARNAME:
+            if token == ':':
+                stacktop.varname = stacktop.expansion
+                stacktop.parsestate = PARSESTATE_SUBSTFROM
+                stacktop.expansion = data.Expansion()
+                stacktop.tokenlist = TokenList.get(('=', stacktop.closebrace, '$'))
+            elif token in (')', '}'):
+                stack.pop()
+                stack[-1].expansion.append(functions.VariableRef(stacktop.loc, stacktop.expansion))
+            else:
+                assert False, "Not reached, PARSESTATE_VARNAME"
+        elif stacktop.parsestate == PARSESTATE_SUBSTFROM:
+            if token == '=':
+                stacktop.substfrom = stacktop.expansion
+                stacktop.parsestate = PARSESTATE_SUBSTTO
+                stacktop.expansion = data.Expansion()
+                stacktop.tokenlist = TokenList.get((stacktop.closebrace, '$'))
+            elif token in (')', '}'):
+                # A substitution of the form $(VARNAME:.ee) is probably a mistake, but make
+                # parses it. Issue a warning. Combine the varname and substfrom expansions to
+                # make the compatible varname. See tests/var-substitutions.mk SIMPLE3SUBSTNAME
+                log.warning("%s: Variable reference looks like substitution without =" % (stacktop.loc, ))
+                stacktop.varname.append(':')
+                stacktop.varname.concat(stacktop.expansion)
+                stack.pop()
+                stack[-1].expansion.append(functions.VariableRef(stacktop.loc, stacktop.varname))
+            else:
+                assert False, "Not reached, PARSESTATE_SUBSTFROM"
+        elif stacktop.parsestate == PARSESTATE_SUBSTTO:
+            assert token in  (')','}'), "Not reached, PARSESTATE_SUBSTTO"
 
+            stack.pop()
+            stack[-1].expansion.append(functions.SubstitutionRef(stacktop.loc, stacktop.varname,
+                                                                 stacktop.substfrom, stacktop.expansion))
         else:
-            stacktop.expansion.append(c)
+            assert False, "Unexpected parse state %s" % stacktop.parsestate
+
+        di = iterfunc(d, offset, stack[-1].tokenlist)
 
     if len(stack) != 1:
         raise SyntaxError("Unterminated function call", d.getloc(offset))
 
     assert stack[0].parsestate == PARSESTATE_TOPLEVEL
 
     return stack[0].expansion, None, None
--- a/tests/parsertests.py
+++ b/tests/parsertests.py
@@ -77,16 +77,21 @@ class IterTest(TestBase):
             "VAR = val\\\\\n",
             "VAR = val\\\\",
             ),
         'makecontinuation': (
             pymake.parser.itermakefilechars,
             "VAR = VAL  \\\n  continuation # comment \\\n  continuation",
             "VAR = VAL continuation "
             ),
+        'makecontinuation2': (
+            pymake.parser.itermakefilechars,
+            "VAR = VAL  \\  \\\n continuation",
+            "VAR = VAL  \\ continuation"
+            ),
         'makeawful': (
             pymake.parser.itermakefilechars,
             "VAR = VAL  \\\\# comment\n",
             "VAR = VAL  \\"
             ),
         'command': (
             pymake.parser.itercommandchars,
             "echo boo # comment\n",
@@ -128,17 +133,17 @@ endef\n""",
 
     def runSingle(self, ifunc, idata, expected):
         fd = StringIO(idata)
         lineiter = pymake.parser.iterlines(fd)
 
         d = pymake.parser.Data(lineiter, 'PlainIterTest-data')
         d.readline()
 
-        actual = ''.join( (c for c, offset, location in ifunc(d, 0)) )
+        actual = ''.join( (c for c, t, o, oo in ifunc(d, 0, pymake.parser.emptytokenlist)) )
         self.assertEqual(actual, expected)
 
         self.assertRaises(StopIteration, lambda: fd.next())
 multitest(IterTest)
 
 class MakeSyntaxTest(TestBase):
     # (string, startat, stopat, stopoffset, expansion
     testdata = {
@@ -155,17 +160,17 @@ class MakeSyntaxTest(TestBase):
                        '[0]': ['FOO']}
                       ]),
         'escapedollar': ('hello$$world', 0, (), None, ['hello$world']),
         'varref': ('echo $(VAR)', 0, (), None,
                    ['echo ',
                     {'type': 'VariableRef',
                      '.vname': ['VAR']}
                     ]),
-        'dynamicvarname': ('echo $($(VARNAME):.c=.o)', 0, (':'), None,
+        'dynamicvarname': ('echo $($(VARNAME):.c=.o)', 0, (':',), None,
                            ['echo ',
                             {'type': 'SubstitutionRef',
                              '.vname': [{'type': 'VariableRef',
                                          '.vname': ['VARNAME']}
                                         ],
                              '.substfrom': ['.c'],
                              '.substto': ['.o']}
                             ]),