This is preparatory work for separating parsemakesyntax from the comment and line-continuation rules. The data iterators here are not actually being used yet anywhere except the tests.
authorBenjamin Smedberg <benjamin@smedbergs.us>
Mon, 09 Feb 2009 12:14:52 -0500
changeset 66 cbeb0eba9087
parent 65 5f3bbfab4910
child 67 63531e755f52
push id39
push userbsmedberg@mozilla.com
push date2009-02-09 21:45 +0000
This is preparatory work for separating parsemakesyntax from the comment and line-continuation rules. The data iterators here are not actually being used yet anywhere except the tests.
pymake/parser.py
tests/parsertests.py
--- a/pymake/parser.py
+++ b/pymake/parser.py
@@ -121,17 +121,185 @@ class Data(object):
         Get the location of an offset within data.
         """
         if offset >= len(self.data):
             offset = len(self.data) - 1
 
         begin, loc = findlast(lambda (o, l): o <= offset, self._locs)
         return loc + self.data[begin:offset]
 
-def _iterlines(fd):
+    def skipwhitespace(self, offset):
+        """
+        Return the offset into data after skipping whitespace.
+        """
+        while offset < len(self.data):
+            c = self.data[offset]
+            if not c.isspace():
+                break
+            offset += 1
+        return offset
+
+    def findtoken(self, o, tlist):
+        """
+        Check data at position o for any of the tokens in tlist followed by whitespace
+        or end-of-data.
+
+        If a token is found, skip trailing whitespace and return (token, newoffset).
+        Otherwise return None, oldoffset
+        """
+        for t in tlist:
+            end = o + len(t)
+            if self.data[o:end] == t and (end == len(self.data) or self.data[end].isspace()):
+                end = self.skipwhitespace(end)
+                return t, end
+        return None, o
+
+def iterdata(d, offset):
+    """
+    A Data iterator yielding (char, offset, location) without any escaping.
+    """
+    while offset < len(d.data):
+        yield d.data[offset], offset, d.getloc(offset)
+        offset += 1
+
+def itermakefilechars(d, offset):
+    """
+    A Data generator yielding (char, offset, location). It will escape comments and newline
+    continuations according to makefile syntax rules.
+    """
+
+    while offset < len(d.data):
+        c = d.data[offset]
+        if c == '\n':
+            assert offset == len(d.data) - 1
+            return
+
+        if c == '#':
+            while offset < len(d.data):
+                c = d.data[offset]
+                if c == '\\' and offset < len(d.data) - 1:
+                    offset += 1
+                    c = d.data[offset]
+                    if c == '\n':
+                        assert offset == len(d.data) - 1, 'unexpected newline'
+                        d.readline()
+                offset += 1
+            return
+        elif c == '\\' and offset < len(d.data) - 1:
+            c2 = d.data[offset + 1]
+            if c2 == '#':
+                offset += 1
+                yield '#', offset, d.getloc(offset)
+                offset += 1
+            elif d[offset:offset + 3] == '\\\\#':
+                # see escape-chars.mk VARAWFUL
+                offset += 1
+                yield '\\', offset, d.getloc(offset)
+                offset += 1
+            elif c2 == '\n':
+                yield ' ', offset, d.getloc(offset)
+                d.readline()
+                offset = d.skipwhitespace(offset + 2)
+            else:
+                yield c, offset, d.getloc(offset)
+                offset += 1
+        else:
+            if c.isspace():
+                o = d.skipwhitespace(offset)
+                if d.data[o:o+2] == '\\\n':
+                    offset = o
+                    continue
+
+            yield c, offset, d.getloc(offset)
+            offset += 1
+
+def itercommandchars(d, offset):
+    """
+    A Data generator yielding (char, offset, location). It will process escapes and newlines
+    according to command parsing rules.
+    """
+
+    while offset < len(d.data):
+        c = d.data[offset]
+        if c == '\n':
+            assert offset == len(d.data) - 1
+            return
+
+        yield c, offset, d.getloc(offset)
+        offset += 1
+
+        if c == '\\':
+            if offset == len(d.data):
+                return
+
+            c = d.data[offset]
+            yield c, offset, d.getloc(offset)
+
+            offset += 1
+
+            if c == '\n':
+                assert offset == len(d.data)
+                d.readline()
+                if offset < len(d.data) and d.data[offset] == '\t':
+                    offset += 1
+
+def iterdefinechars(d, offset):
+    """
+    A Data generator yielding (char, offset, location). It will process define/endef
+    according to define parsing rules.
+    """
+
+    def checkfortoken(o):
+        """
+        Check for a define or endef token on the line starting at o.
+        Return an integer for the direction of definecount.
+        """
+        if o >= len(d.data):
+            return 0
+
+        if d.data[o] == '\t':
+            return 0
+
+        o = d.skipwhitespace(o)
+        token, o = d.findtoken(o, ('define', 'endef'))
+        if token == 'define':
+            return 1
+
+        if token == 'endef':
+            return -1
+        
+        return 0
+
+    startoffset = offset
+    definecount = 1 + checkfortoken(offset)
+    if definecount == 0:
+        return
+
+    while offset < len(d.data):
+        c = d.data[offset]
+
+        if c == '\n':
+            d.readline()
+            definecount += checkfortoken(offset + 1)
+            if definecount == 0:
+                return
+
+        yield c, offset, d.getloc(offset)
+        offset += 1
+
+        if c == '\\' and offset < len(d.data) and d.data[offset] == '\n':
+            yield '\n', offset, d.getloc(offset)
+            d.readline()
+            offset += 1
+
+    # Unlike the other iterators, if you fall off this one there is an unterminated
+    # define.
+    raise SyntaxError("Unterminated define", d.getloc(startoffset))
+
+def iterlines(fd):
     """Yield (lineno, line) for each line in fd"""
 
     lineno = 0
     for line in fd:
         lineno += 1
 
         if line.endswith('\r\n'):
             line = line[:-2] + '\n'
@@ -145,30 +313,19 @@ def getkeyword(d, offset):
     """
     i = offset
     while True:
         c = d[i]
         if c == '-' or (c >= 'a' and c <= 'z'):
             i += 1
             continue
         if i > offset and (c is None or c.isspace()):
-            return d[offset:i], skipwhitespace(d, i)
+            return d[offset:i], d.skipwhitespace(i)
         return None, i
 
-def skipwhitespace(d, offset):
-    """
-    Return the offset into data after skipping whitespace.
-    """
-    while True:
-        c = d[offset]
-        if c is None or not c.isspace():
-            break
-        offset += 1
-    return offset
-
 def setvariable(variables, vname, recursive, value, fromcl=False):
     """
     Parse the remaining data at d[offset] into a variables object.
 
     @param vname an string holding the variable name
     """
     if len(vname) == 0:
         raise SyntaxError("Empty variable name", loc=d.getloc(offset))
@@ -215,32 +372,32 @@ def parsecommandlineargs(makefile, args)
     return r
 
 def ifeq(d, offset, makefile):
     # the variety of formats for this directive is rather maddening
     if d[offset] == '(':
         arg1, offset = parsemakesyntax(d, offset + 1, ',', PARSESTYLE_MAKEFILE)
         if offset == -1:
             raise SyntaxError("Unexpected text in conditional", d.getloc(len(d) - 1))
-        offset = skipwhitespace(d, offset + 1)
+        offset = d.skipwhitespace(offset + 1)
         arg2, offset = parsemakesyntax(d, offset, ')', PARSESTYLE_MAKEFILE)
         if offset == -1:
             raise SyntaxError("Unexpected text in conditional", d.getloc(len(d) - 1))
-        offset = skipwhitespace(d, offset + 1)
+        offset = d.skipwhitespace(offset + 1)
         if d[offset] not in ('#', None):
             raise SyntaxError("Unexpected text after conditional", d.getloc(offset))
     elif d[offset] in '\'"':
         arg1, offset = parsemakesyntax(d, offset + 1, d[offset], PARSESTYLE_MAKEFILE)
         if offset == -1:
             raise SyntaxError("Unexpected text in conditional", d.getloc(len(d) - 1))
-        offset = skipwhitespace(d, offset + 1)
+        offset = d.skipwhitespace(offset + 1)
         if d[offset] not in '\'"':
             raise SyntaxError("Unexpected text in conditional", d.getloc(offset))
         arg2, offset = parsemakesyntax(d, offset + 1, d[offset], PARSESTYLE_MAKEFILE)
-        offset = skipwhitespace(d, offset + 1)
+        offset = d.skipwhitespace(offset + 1)
         if d[offset] not in ('#', None):
             raise SyntaxError("Unexpected text after conditional: %c" % (d[offset],), d.getloc(offset))
 
     val1 = arg1.resolve(makefile.variables, None)
     val2 = arg2.resolve(makefile.variables, None)
     return val1 == val2
 
 def ifneq(d, offset, makefile):
@@ -295,17 +452,17 @@ def parsestream(fd, filename, makefile):
     Parse a stream of makefile into a makefile data structure.
 
     @param fd A file-like object containing the makefile data.
     """
 
     currule = None
     condstack = []
 
-    fdlines = _iterlines(fd)
+    fdlines = iterlines(fd)
 
     for lineno, line in fdlines:
         d = Data(fdlines, filename)
         if line.startswith('\t') and currule is not None:
             if any((not c.active for c in condstack)):
                 log.info('skipping line %i, ifdefed away' % lineno)
                 continue
 
@@ -316,17 +473,17 @@ def parsestream(fd, filename, makefile):
         else:
             # To parse Makefile syntax, we first strip leading whitespace and
             # look for initial keywords. If there are no keywords, it's either
             # setting a variable or writing a rule.
 
             d = Data(fdlines, filename)
             d.append(line, Location(filename, lineno, 0))
 
-            offset = skipwhitespace(d, 0)
+            offset = d.skipwhitespace(0)
 
             kword, kwoffset = getkeyword(d, offset)
             if kword == 'endif':
                 if d[kwoffset] not in ('#', None):
                     raise SyntaxError("Unexpected data after 'endif' directive.",
                                       d.getloc(kwoffset))
 
                 if not len(condstack):
@@ -538,17 +695,17 @@ def parsemakesyntax(d, startat, stopon, 
         c = d[i]
 
         # print "i=%i c=%c parsestate=%i len(d)=%i" % (i, c, stacktop.parsestate, len(d))
 
         if parsestyle == PARSESTYLE_DEFINE and linebegin:
             linebegin = False
             if d[i] != '\t':
                 # look for endef/define
-                j = skipwhitespace(d, i)
+                j = d.skipwhitespace(i)
                 kword, j = getkeyword(d, j)
                 if kword == 'define':
                     print "incrementing definecount at %s" % d.getloc(j)
                     definecount += 1
                 elif kword == 'endef':
                     if not d[j] in ('#', None):
                         raise SyntaxError("Extraneous text after endef directive.",
                                           d.getloc(j))
@@ -605,17 +762,17 @@ def parsemakesyntax(d, startat, stopon, 
 
                 if parsestyle == PARSESTYLE_COMMAND:
                     stacktop.expansion.append('\\\n')
                     if d[i] == '\t':
                         i += 1
                 else:
                     stacktop.expansion.rstrip()
                     stacktop.expansion.append(' ')
-                    i = skipwhitespace(d, i)
+                    i = d.skipwhitespace(i)
                 continue
             else:
                 stacktop.expansion.append(c)
                 i += 1
                 continue
         elif c == '\n':
             assert i + 1 == len(d), "newline isn't last character? i = %i d = %r" % (i, d.data)
             if parsestyle == PARSESTYLE_DEFINE:
--- a/tests/parsertests.py
+++ b/tests/parsertests.py
@@ -1,14 +1,22 @@
 import pymake.data, pymake.parser, pymake.functions
 import unittest
 import logging
 
 from cStringIO import StringIO
 
+def multitest(cls):
+    for i in xrange(0, len(cls.testdata)):
+        def m(self, i=i):
+            return self.runSingle(*self.testdata[i])
+
+        setattr(cls, 'test_%i' % i, m)
+    return cls
+
 class TestBase(unittest.TestCase):
     def assertEqual(self, a, b, msg=""):
         """Actually print the values which weren't equal, if things don't work out!"""
         unittest.TestCase.assertEqual(self, a, b, "%s got %r expected %r" % (msg, a, b))
 
 class DataTest(TestBase):
     testdata = (
         ((("He\tllo", "f", 1, 0),),
@@ -23,16 +31,95 @@ class DataTest(TestBase):
             for line, file, lineno, col in datas:
                 d.append(line, pymake.parser.Location(file, lineno, col))
             for pos, file, lineno, col in results:
                 loc = d.getloc(pos)
                 self.assertEqual(loc.path, file, "data file")
                 self.assertEqual(loc.line, lineno, "data line")
                 self.assertEqual(loc.column, col, "data %r col, got %i expected %i" % (d.data, loc.column, col))
 
+class IterTest(TestBase):
+    testdata = (
+        (
+            pymake.parser.iterdata,
+            "plaindata # test\n",
+            "plaindata # test\n"
+        ),
+        (
+            pymake.parser.itermakefilechars,
+            "VAR = val # comment",
+            "VAR = val "
+        ),
+        (
+            pymake.parser.itermakefilechars,
+            "VAR = val \# escaped hash\n",
+            "VAR = val # escaped hash"
+        ),
+        (
+            pymake.parser.itermakefilechars,
+            "VAR = VAL  \\\n  continuation # comment \\\n  continuation",
+            "VAR = VAL continuation "
+        ),
+        (
+            pymake.parser.itermakefilechars,
+            "VAR = VAL  \\\\# comment\n",
+            "VAR = VAL  \\"
+        ),
+        (
+            pymake.parser.itercommandchars,
+            "echo boo # comment\n",
+            "echo boo # comment",
+        ),
+        (
+            pymake.parser.itercommandchars,
+            "echo boo \# comment\n",
+            "echo boo \# comment",
+        ),
+        (
+            pymake.parser.itercommandchars,
+            "echo boo # \\\n\t  command 2\n",
+            "echo boo # \\\n  command 2"
+        ),
+        (
+            pymake.parser.iterdefinechars,
+            "endef",
+            ""
+        ),
+        (
+            pymake.parser.iterdefinechars,
+            """define BAR # comment
+random text
+endef not what you think!
+endef # comment is ok\n""",
+            """define BAR # comment
+random text
+endef not what you think!"""
+        ),
+        (
+            pymake.parser.iterdefinechars,
+            """value \\
+endef
+endef\n""",
+            "value \\\nendef"
+        ),
+    )
+
+    def runSingle(self, ifunc, idata, expected):
+        fd = StringIO(idata)
+        lineiter = pymake.parser.iterlines(fd)
+
+        d = pymake.parser.Data(lineiter, 'PlainIterTest-data')
+        d.readline()
+
+        actual = ''.join( (c for c, offset, location in ifunc(d, 0)) )
+        self.assertEqual(actual, expected)
+
+        self.assertRaises(StopIteration, lambda: fd.next())
+multitest(IterTest)
+
 class MakeSyntaxTest(TestBase):
     # (string, startat, stopat, stopoffset, expansion
     testdata = (
         ('hello world', 0, '', -1, ['hello world']),
         ('hello $W', 0, '', -1,
          ['hello ',
           {'type': 'VariableRef',
            '.vname': ['W']}
@@ -166,13 +253,12 @@ all:: test test2 $(VAR)
 
         irules = m.implicitrules
         self.assertEqual(len(irules), 1, "Number of implicit rules")
 
         irule = irules[0]
         self.assertEqual(len(irule.targetpatterns), 1, "%.o target pattern count")
         self.assertEqual(len(irule.prerequisites), 1, "%.o prerequisite count")
         self.assertEqual(irule.targetpatterns[0].match('foo.o'), 'foo', "%.o stem")
-        self.assertEqual(irule.prerequisites[0].resolve(irule.targetpatterns[0].match('foo.o')), 'foo.c')
 
 if __name__ == '__main__':
     logging.basicConfig(level=logging.DEBUG)
     unittest.main()