Expansion optimizations:
authorBenjamin Smedberg <benjamin@smedbergs.us>
Thu, 26 Feb 2009 14:59:36 -0500
changeset 188 f9df0708b6d6
parent 187 5d7346eb92cd
child 189 b0864b858e97
push id109
push userbsmedberg@mozilla.com
push dateThu, 26 Feb 2009 19:59:44 +0000
Expansion optimizations: * store expansion elements with an "isfunc" flag, instead of using isinstance() on them, which is slow. * if empty lines don't have any interesting expand data in them, ignore them completely to avoid expansion costs during parsedata execution
pymake/data.py
pymake/parser.py
--- a/pymake/data.py
+++ b/pymake/data.py
@@ -62,109 +62,116 @@ def _if_else(c, t, f):
 
 class Expansion(object):
     """
     A representation of expanded data, such as that for a recursively-expanded variable, a command, etc.
     """
 
     def __init__(self, loc=None):
         # Each element is either a string or a function
-        self._elements = []
+        self._elements = [] # element, isfunc
         self.loc = loc
 
     @staticmethod
     def fromstring(s):
         e = Expansion()
-        e.append(s)
+        e.appendstr(s)
         return e
 
     def clone(self):
         e = Expansion()
         e._elements = list(self._elements)
         return e
 
-    def append(self, object):
-        if not isinstance(object, (str, functions.Function)):
-            raise DataError("Expansions can contain only strings or functions, got %s" % (type(object),))
+    def _lastisstring(self):
+        return len(self._elements) and not self._elements[-1][1]
 
-        if object == '':
+    def _firstisstring(self):
+        return len(self._elements) and not self._elements[0][1]
+
+    def appendstr(self, s):
+        assert isinstance(s, str)
+        if s == '':
             return
 
-        if len(self) and isinstance(object, str) and isinstance(self[-1], str):
-            self[-1] += object
+        if self._lastisstring():
+            s = self._elements[-1][0] + s
+            self._elements[-1] = s, False
         else:
-            self._elements.append(object)
+            self._elements.append((s, False))
+
+    def appendfunc(self, func):
+        assert isinstance(func, functions.Function)
+        self._elements.append((func, True))
 
     def concat(self, o):
         """Concatenate the other expansion on to this one."""
-        if len(self) > 0 and len(o) > 0 and isinstance(self[-1], str) and isinstance(o[0], str):
-            self[-1] += o[0]
-            self._elements.extend(o[1:])
+        if o._firstisstring() and self._lastisstring():
+            mystr = self._elements[-1][0]
+            ostr = o._elements[0][0]
+            self._elements[-1] = mystr + ostr, False
+            self._elements.extend(o._elements[1:])
         else:
-            self._elements.extend(o)
+            self._elements.extend(o._elements)
+
+    def isempty(self):
+        return (not len(self._elements)) or self._elements[0] == ('', False)
 
     def lstrip(self):
         """Strip leading literal whitespace from this expansion."""
-        if len(self) > 0 and isinstance(self[0], str):
-            assert len(self) == 1 or not isinstance(self[1], str), "Strings didn't fold"
-            self[0] = self[0].lstrip()
+        if self._firstisstring():
+            s = self._elements[0][0].lstrip()
+            self._elements[0] = s, False
 
     def rstrip(self):
         """Strip trailing literal whitespace from this expansion."""
-        if len(self) > 0 and isinstance(self[-1], str):
-            assert len(self) == 1 or not isinstance(self[-2], str), "Strings didn't fold"
-            self[-1] = self[-1].rstrip()
-
-    def trimlastnewline(self):
-        """Strip only the last newline, if present."""
-        if len(self) > 0 and isinstance(self[-1], str) and self[-1][-1] == '\n':
-            self[-1] = self[-1][:-1]
+        if self._lastisstring():
+            s = self._elements[-1][0].rstrip()
+            self._elements[-1] = s, False
 
     def resolve(self, makefile, variables, setting=[]):
         """
         Resolve this variable into a value, by interpolating the value
         of other variables.
 
         @param setting (Variable instance) the variable currently
                being set, if any. Setting variables must avoid self-referential
                loops.
         """
         assert isinstance(makefile, Makefile)
         assert isinstance(variables, Variables)
         assert isinstance(setting, list)
 
-        for i in self._elements:
-            if isinstance(i, str):
-                yield i
+        for e, isfunc in self._elements:
+            if isfunc:
+                it = e.resolve(makefile, variables, setting)
+                assert not isinstance(it, str)
+                for j in it:
+                    yield j
             else:
-                for j in i.resolve(makefile, variables, setting):
-                    yield j
+                assert isinstance(e, str)
+                yield e
                     
     def resolvestr(self, makefile, variables, setting=[]):
         s = ''
         for i in self.resolve(makefile, variables, setting):
-            if i != '':
-                s += i
+            try:
+                if i != '':
+                    s += i
+            except:
+                print "error appending i: %r" % (i,)
+                raise
         return s
 
     def resolvesplit(self, makefile, variables, setting=[]):
         return util.itersplit(self.resolve(makefile, variables, setting))
 
     def __len__(self):
         return len(self._elements)
 
-    def __getitem__(self, key):
-        return self._elements[key]
-
-    def __setitem__(self, key, v):
-        self._elements[key] = v
-
-    def __iter__(self):
-        return iter(self._elements)
-
     def __repr__(self):
         return "<Expansion with elements: %r>" % (self._elements,)
 
 class Variables(object):
     """
     A mapping from variable names to variables. Variables have flavor, source, and value. The value is an 
     expansion object.
     """
@@ -218,17 +225,17 @@ class Variables(object):
                     if source > psource:
                         # TODO: log a warning?
                         return pflavor, psource, pvalue
 
                     if not expand:
                         return pflavor, psource, pvalue + ' ' + valuestr
 
                     pvalue = pvalue.clone()
-                    pvalue.append(' ')
+                    pvalue.appendstr(' ')
                     pvalue.concat(valueexp)
 
                     return pflavor, psource, pvalue
                     
             if not expand:
                 return flavor, source, valuestr
 
             if flavor == self.FLAVOR_RECURSIVE:
--- a/pymake/parser.py
+++ b/pymake/parser.py
@@ -581,17 +581,20 @@ def parsestream(fd, filename):
 
             if kword == 'unexport':
                 raise SyntaxError("unexporting variables is not supported", d.getloc(offset))
 
             assert kword is None, "unexpected kword: %r" % (kword,)
 
             e, token, offset = parsemakesyntax(d, offset, varsettokens + ('::', ':'), itermakefilechars)
             if token is None:
-                condstack[-1].append(parserdata.EmptyDirective(e))
+                e.rstrip()
+                e.lstrip()
+                if not e.isempty():
+                    condstack[-1].append(parserdata.EmptyDirective(e))
                 continue
 
             # if we encountered real makefile syntax, the current rule is over
             currule = False
 
             if token in varsettokens:
                 e.lstrip()
                 e.rstrip()
@@ -716,30 +719,30 @@ def parsemakesyntax(d, startat, stopon, 
     di = iterfunc(d, startat, stack[-1].tokenlist)
     while True: # this is not a for loop because `di` changes during the function
         stacktop = stack[-1]
         try:
             s, token, tokenoffset, offset = di.next()
         except StopIteration:
             break
 
-        stacktop.expansion.append(s)
+        stacktop.expansion.appendstr(s)
         if token is None:
             continue
 
         if token == '$':
             if len(d.data) == offset:
                 # an unterminated $ expands to nothing
                 break
 
             loc = d.getloc(tokenoffset)
 
             c = d.data[offset]
             if c == '$':
-                stacktop.expansion.append('$')
+                stacktop.expansion.appendstr('$')
                 offset = offset + 1
             elif c in ('(', '{'):
                 closebrace = _matchingbrace[c]
 
                 # look forward for a function name
                 fname, offset = d.findtoken(offset + 1, _functiontokenlist, True)
                 if fname is not None:
                     fn = functions.functionmap[fname](loc)
@@ -759,81 +762,81 @@ def parsemakesyntax(d, startat, stopon, 
                 e = data.Expansion()
                 tokenlist = TokenList.get((':', c, closebrace, '$'))
                 stack.append(ParseStackFrame(PARSESTATE_VARNAME, e, tokenlist,
                                              openbrace=c, closebrace=closebrace, loc=loc))
                 di = iterfunc(d, offset, tokenlist)
                 continue
             else:
                 e = data.Expansion.fromstring(c)
-                stacktop.expansion.append(functions.VariableRef(loc, e))
+                stacktop.expansion.appendfunc(functions.VariableRef(loc, e))
                 offset += 1
         elif token in ('(', '{'):
             assert token == stacktop.openbrace
 
-            stacktop.expansion.append(token)
+            stacktop.expansion.appendstr(token)
             stack.append(ParseStackFrame(PARSESTATE_PARENMATCH,
                                          stacktop.expansion,
                                          TokenList.get((token, stacktop.closebrace,)),
                                          openbrace=token, closebrace=stacktop.closebrace, loc=d.getloc(tokenoffset)))
         elif stacktop.parsestate == PARSESTATE_PARENMATCH:
             assert token == stacktop.closebrace
-            stacktop.expansion.append(token)
+            stacktop.expansion.appendstr(token)
             stack.pop()
         elif stacktop.parsestate == PARSESTATE_TOPLEVEL:
             assert len(stack) == 1
             return stacktop.expansion, token, offset
         elif stacktop.parsestate == PARSESTATE_FUNCTION:
             if token == ',':
                 stacktop.expansion = data.Expansion()
                 stacktop.function.append(stacktop.expansion)
 
                 if len(stacktop.function) == stacktop.function.maxargs:
                     tokenlist = TokenList.get((stacktop.openbrace, stacktop.closebrace, '$'))
                     stacktop.tokenlist = tokenlist
             elif token in (')', '}'):
                     stacktop.function.setup()
                     stack.pop()
-                    stack[-1].expansion.append(stacktop.function)
+                    stack[-1].expansion.appendfunc(stacktop.function)
             else:
                 assert False, "Not reached, PARSESTATE_FUNCTION"
         elif stacktop.parsestate == PARSESTATE_VARNAME:
             if token == ':':
                 stacktop.varname = stacktop.expansion
                 stacktop.parsestate = PARSESTATE_SUBSTFROM
                 stacktop.expansion = data.Expansion()
                 stacktop.tokenlist = TokenList.get(('=', stacktop.openbrace, stacktop.closebrace, '$'))
             elif token in (')', '}'):
                 stack.pop()
-                stack[-1].expansion.append(functions.VariableRef(stacktop.loc, stacktop.expansion))
+                stack[-1].expansion.appendfunc(functions.VariableRef(stacktop.loc, stacktop.expansion))
             else:
                 assert False, "Not reached, PARSESTATE_VARNAME"
         elif stacktop.parsestate == PARSESTATE_SUBSTFROM:
             if token == '=':
                 stacktop.substfrom = stacktop.expansion
                 stacktop.parsestate = PARSESTATE_SUBSTTO
                 stacktop.expansion = data.Expansion()
                 stacktop.tokenlist = TokenList.get((stacktop.openbrace, stacktop.closebrace, '$'))
             elif token in (')', '}'):
                 # A substitution of the form $(VARNAME:.ee) is probably a mistake, but make
                 # parses it. Issue a warning. Combine the varname and substfrom expansions to
                 # make the compatible varname. See tests/var-substitutions.mk SIMPLE3SUBSTNAME
                 _log.warning("%s: Variable reference looks like substitution without =", stacktop.loc)
-                stacktop.varname.append(':')
+                stacktop.varname.appendstr(':')
                 stacktop.varname.concat(stacktop.expansion)
                 stack.pop()
-                stack[-1].expansion.append(functions.VariableRef(stacktop.loc, stacktop.varname))
+                stack[-1].expansion.appendfunc(functions.VariableRef(stacktop.loc, stacktop.varname))
             else:
                 assert False, "Not reached, PARSESTATE_SUBSTFROM"
         elif stacktop.parsestate == PARSESTATE_SUBSTTO:
             assert token in  (')','}'), "Not reached, PARSESTATE_SUBSTTO"
 
             stack.pop()
-            stack[-1].expansion.append(functions.SubstitutionRef(stacktop.loc, stacktop.varname,
-                                                                 stacktop.substfrom, stacktop.expansion))
+            stack[-1].expansion.appendfunc(functions.SubstitutionRef(stacktop.loc, stacktop.varname,
+                                                                     stacktop.substfrom, stacktop.expansion))
         else:
             assert False, "Unexpected parse state %s" % stacktop.parsestate
 
         di = iterfunc(d, offset, stack[-1].tokenlist)
 
     if len(stack) != 1:
         raise SyntaxError("Unterminated function call", d.getloc(offset))