Bug 769976 - Statement reformatting and equality operators
authorGregory Szorc <gps@mozilla.com>
Mon, 06 Aug 2012 10:23:25 -0700
changeset 317 cf7d1d604b8f1575fc83ca8f33c91c80292e4c2c
parent 316 9c2d6ceb800558cd8efd4ec219e46c6368b262cb
child 318 7f59c27d4ad9bc74f81050407676a70e1e6a076a
push id195
push usergszorc@mozilla.com
push dateMon, 06 Aug 2012 17:25:31 +0000
bugs769976
Bug 769976 - Statement reformatting and equality operators You can now call to_source() on Expansion, StringExpansion, Function (and derived), Statement (and derived), Condition (and derived), and StatementList to obtain make "source code" for that entity. This means you can write out make files by constructing an appropriate set of Statement instances. This also implements __eq__ and __ne__ on all of the above. This is being used in the tests to verify that the reformatting code works properly (produces an equivalent StatementList). khuey gave permission to land without review.
mkformat.py
pymake/data.py
pymake/functions.py
pymake/parserdata.py
tests/datatests.py
tests/formattingtests.py
new file mode 100755
--- /dev/null
+++ b/mkformat.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+
+import sys
+import pymake.parser
+
+filename = sys.argv[1]
+source = None
+
+with open(filename, 'rU') as fh:
+    source = fh.read()
+
+statements = pymake.parser.parsestring(source, filename)
+print statements.to_source()
--- a/pymake/data.py
+++ b/pymake/data.py
@@ -99,16 +99,34 @@ class StringExpansion(object):
 
     def __getitem__(self, i):
         assert i == 0
         return self.s, False
 
     def __repr__(self):
         return "Exp<%s>(%r)" % (self.loc, self.s)
 
+    def __eq__(self, other):
+        """We only compare the string contents."""
+        return self.s == other
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def to_source(self, escape_variables=False, escape_comments=False):
+        s = self.s
+
+        if escape_comments:
+            s = s.replace('#', '\\#')
+
+        if escape_variables:
+            return s.replace('$', '$$')
+
+        return s
+
 class Expansion(list):
     """
     A representation of expanded data, such as that for a recursively-expanded variable, a command, etc.
     """
 
     __slots__ = ('loc',)
     simple = False
 
@@ -230,16 +248,78 @@ class Expansion(list):
         return fd.getvalue()
 
     def resolvesplit(self, makefile, variables, setting=[]):
         return self.resolvestr(makefile, variables, setting).split()
 
     def __repr__(self):
         return "<Expansion with elements: %r>" % ([e for e, isfunc in self],)
 
+    def to_source(self, escape_variables=False, escape_comments=False):
+        parts = []
+        for e, is_func in self:
+            if is_func:
+                parts.append(e.to_source())
+                continue
+
+            if escape_variables:
+                parts.append(e.replace('$', '$$'))
+                continue
+
+            parts.append(e)
+
+        return ''.join(parts)
+
+    def __eq__(self, other):
+        if not isinstance(other, (Expansion, StringExpansion)):
+            return False
+
+        # Expansions are equivalent if adjacent string literals normalize to
+        # the same value. So, we must normalize before any comparisons are
+        # made.
+        a = self.clone().finish()
+
+        if isinstance(other, StringExpansion):
+            if isinstance(a, StringExpansion):
+                return a == other
+
+            # A normalized Expansion != StringExpansion.
+            return False
+
+        b = other.clone().finish()
+
+        # b could be a StringExpansion now.
+        if isinstance(b, StringExpansion):
+            if isinstance(a, StringExpansion):
+                return a == b
+
+            # Our normalized Expansion != normalized StringExpansion.
+            return False
+
+        if len(a) != len(b):
+            return False
+
+        for i in xrange(len(self)):
+            e1, is_func1 = a[i]
+            e2, is_func2 = b[i]
+
+            if is_func1 != is_func2:
+                return False
+
+            if type(e1) != type(e2):
+                return False
+
+            if e1 != e2:
+                return False
+
+        return True
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
 class Variables(object):
     """
     A mapping from variable names to variables. Variables have flavor, source, and value. The value is an 
     expansion object.
     """
 
     __slots__ = ('parent', '_map')
 
--- a/pymake/functions.py
+++ b/pymake/functions.py
@@ -39,26 +39,94 @@ class Function(object):
             raise data.DataError("Not enough arguments to function %s, requires %s" % (self.name, self.minargs), self.loc)
 
         assert self.maxargs == 0 or argc <= self.maxargs, "Parser screwed up, gave us too many args"
 
     def append(self, arg):
         assert isinstance(arg, (data.Expansion, data.StringExpansion))
         self._arguments.append(arg)
 
+    def to_source(self):
+        """Convert the function back to make file "source" code."""
+        if not hasattr(self, 'name'):
+            raise Exception("%s must implement to_source()." % self.__class__)
+
+        # The default implementation simply prints the function name and all
+        # the arguments joined by a comma.
+        # According to the GNU make manual Section 8.1, whitespace around
+        # arguments is *not* part of the argument's value. So, we trim excess
+        # white space so we have consistent behavior.
+        args = []
+        curly = False
+        for i, arg in enumerate(self._arguments):
+            arg = arg.to_source()
+
+            if i == 0:
+                arg = arg.lstrip()
+
+            # Are balanced parens even OK?
+            if arg.count('(') != arg.count(')'):
+                curly = True
+
+            args.append(arg)
+
+        if curly:
+            return '${%s %s}' % (self.name, ','.join(args))
+
+        return '$(%s %s)' % (self.name, ','.join(args))
+
     def __len__(self):
         return len(self._arguments)
 
     def __repr__(self):
         return "%s<%s>(%r)" % (
             self.__class__.__name__, self.loc,
             ','.join([repr(a) for a in self._arguments]),
             )
 
+    def __eq__(self, other):
+        if not hasattr(self, 'name'):
+            raise Exception("%s must implement __eq__." % self.__class__)
+
+        if type(self) != type(other):
+            return False
+
+        if self.name != other.name:
+            return False
+
+        if len(self._arguments) != len(other._arguments):
+            return False
+
+        for i in xrange(len(self._arguments)):
+            # According to the GNU make manual Section 8.1, whitespace around
+            # arguments is *not* part of the argument's value. So, we do a
+            # whitespace-agnostic comparison.
+            if i == 0:
+                a = self._arguments[i]
+                a.lstrip()
+
+                b = other._arguments[i]
+                b.lstrip()
+
+                if a != b:
+                    return False
+
+                continue
+
+            if self._arguments[i] != other._arguments[i]:
+                return False
+
+        return True
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
 class VariableRef(Function):
+    AUTOMATIC_VARIABLES = set(['@', '%', '<', '?', '^', '+', '|', '*'])
+
     __slots__ = ('vname', 'loc')
 
     def __init__(self, loc, vname):
         self.loc = loc
         assert isinstance(vname, (data.Expansion, data.StringExpansion))
         self.vname = vname
         
     def setup(self):
@@ -71,19 +139,34 @@ class VariableRef(Function):
 
         flavor, source, value = variables.get(vname)
         if value is None:
             log.debug("%s: variable '%s' was not set" % (self.loc, vname))
             return
 
         value.resolve(makefile, variables, fd, setting + [vname])
 
+    def to_source(self):
+        if isinstance(self.vname, data.StringExpansion):
+            if self.vname.s in self.AUTOMATIC_VARIABLES:
+                return '$%s' % self.vname.s
+
+            return '$(%s)' % self.vname.s
+
+        return '$(%s)' % self.vname.to_source()
+
     def __repr__(self):
         return "VariableRef<%s>(%r)" % (self.loc, self.vname)
 
+    def __eq__(self, other):
+        if not isinstance(other, VariableRef):
+            return False
+
+        return self.vname == other.vname
+
 class SubstitutionRef(Function):
     """$(VARNAME:.c=.o) and $(VARNAME:%.c=%.o)"""
 
     __slots__ = ('loc', 'vname', 'substfrom', 'substto')
 
     def __init__(self, loc, varname, substfrom, substto):
         self.loc = loc
         self.vname = varname
@@ -109,20 +192,33 @@ class SubstitutionRef(Function):
         f = data.Pattern(substfrom)
         if not f.ispattern():
             f = data.Pattern('%' + substfrom)
             substto = '%' + substto
 
         fd.write(' '.join([f.subst(substto, word, False)
                            for word in value.resolvesplit(makefile, variables, setting + [vname])]))
 
+    def to_source(self):
+        return '$(%s:%s=%s)' % (
+            self.vname.to_source(),
+            self.substfrom.to_source(),
+            self.substto.to_source())
+
     def __repr__(self):
         return "SubstitutionRef<%s>(%r:%r=%r)" % (
             self.loc, self.vname, self.substfrom, self.substto,)
 
+    def __eq__(self, other):
+        if not isinstance(other, SubstitutionRef):
+            return False
+
+        return self.vname == other.vname and self.substfrom == other.substfrom \
+                and self.substto == other.substto
+
 class SubstFunction(Function):
     name = 'subst'
     minargs = 3
     maxargs = 3
 
     __slots__ = Function.__slots__
 
     def resolve(self, makefile, variables, fd, setting):
@@ -347,29 +443,29 @@ class BasenameFunction(Function):
                 base = suffix
 
             yield dir + base
 
     def resolve(self, makefile, variables, fd, setting):
         util.joiniter(fd, self.basenames(self._arguments[0].resolvesplit(makefile, variables, setting)))
 
 class AddSuffixFunction(Function):
-    name = 'addprefix'
+    name = 'addsuffix'
     minargs = 2
     maxargs = 2
 
     __slots__ = Function.__slots__
 
     def resolve(self, makefile, variables, fd, setting):
         suffix = self._arguments[0].resolvestr(makefile, variables, setting)
 
         fd.write(' '.join([w + suffix for w in self._arguments[1].resolvesplit(makefile, variables, setting)]))
 
 class AddPrefixFunction(Function):
-    name = 'addsuffix'
+    name = 'addprefix'
     minargs = 2
     maxargs = 2
 
     def resolve(self, makefile, variables, fd, setting):
         prefix = self._arguments[0].resolvestr(makefile, variables, setting)
 
         fd.write(' '.join([prefix + w for w in self._arguments[1].resolvesplit(makefile, variables, setting)]))
 
@@ -402,18 +498,16 @@ class WildcardFunction(Function):
 
     def resolve(self, makefile, variables, fd, setting):
         patterns = self._arguments[0].resolvesplit(makefile, variables, setting)
 
         fd.write(' '.join([x.replace('\\','/')
                            for p in patterns
                            for x in glob(makefile.workdir, p)]))
 
-    __slots__ = Function.__slots__
-
 class RealpathFunction(Function):
     name = 'realpath'
     minargs = 1
     maxargs = 1
 
     def resolve(self, makefile, variables, fd, setting):
         fd.write(' '.join([os.path.realpath(os.path.join(makefile.workdir, path)).replace('\\', '/')
                            for path in self._arguments[0].resolvesplit(makefile, variables, setting)]))
--- a/pymake/parserdata.py
+++ b/pymake/parserdata.py
@@ -97,21 +97,36 @@ def parsecommandlineargs(args):
 
     return stmts, r, ' '.join(overrides)
 
 class Statement(object):
     """
     Represents parsed make file syntax.
 
     This is an abstract base class. Child classes are expected to implement
-    `execute()`.
+    basic methods defined below.
     """
 
     def execute(self, makefile, context):
-        raise Exception("Must implement execute() in child classes.")
+        """Executes this Statement within a make file execution context."""
+        raise Exception("%s must implement execute()." % self.__class__)
+
+    def to_source(self):
+        """Obtain the make file "source" representation of the Statement.
+
+        This converts an individual Statement back to a string that can again
+        be parsed into this Statement.
+        """
+        raise Exception("%s must implement to_source()." % self.__class__)
+
+    def __eq__(self, other):
+        raise Exception("%s must implement __eq__." % self.__class__)
+
+    def __ne__(self, other):
+        return self.__eq__(other)
 
 class DummyRule(object):
     __slots__ = ()
 
     def addcommand(self, r):
         pass
 
 class Rule(Statement):
@@ -167,16 +182,39 @@ class Rule(Statement):
 
             makefile.foundtarget(targets[0].gettarget())
 
         context.currule = rule
 
     def dump(self, fd, indent):
         print >>fd, "%sRule %s: %s" % (indent, self.targetexp, self.depexp)
 
+    def to_source(self):
+        sep = ':'
+
+        if self.doublecolon:
+            sep = '::'
+
+        deps = self.depexp.to_source()
+        if len(deps) > 0 and not deps[0].isspace():
+            sep += ' '
+
+        return '\n%s%s%s' % (
+            self.targetexp.to_source(escape_variables=True),
+            sep,
+            deps)
+
+    def __eq__(self, other):
+        if not isinstance(other, Rule):
+            return False
+
+        return self.targetexp == other.targetexp \
+                and self.depexp == other.depexp \
+                and self.doublecolon == other.doublecolon
+
 class StaticPatternRule(Statement):
     """
     Static pattern rules are rules which specify multiple targets based on a
     string pattern.
 
     See https://www.gnu.org/software/make/manual/make.html#Static-Pattern
 
     They are like `Rule` instances except an added property, `patternexp` is
@@ -222,16 +260,43 @@ class StaticPatternRule(Statement):
             makefile.gettarget(t).addrule(data.PatternRuleInstance(rule, '', stem, pattern.ismatchany()))
 
         makefile.foundtarget(targets[0])
         context.currule = rule
 
     def dump(self, fd, indent):
         print >>fd, "%sStaticPatternRule %s: %s: %s" % (indent, self.targetexp, self.patternexp, self.depexp)
 
+    def to_source(self):
+        sep = ':'
+
+        if self.doublecolon:
+            sep = '::'
+
+        pattern = self.patternexp.to_source()
+        deps = self.depexp.to_source()
+
+        if len(pattern) > 0 and pattern[0] not in (' ', '\t'):
+            sep += ' '
+
+        return '\n%s%s%s:%s' % (
+            self.targetexp.to_source(escape_variables=True),
+            sep,
+            pattern,
+            deps)
+
+    def __eq__(self, other):
+        if not isinstance(other, StaticPatternRule):
+            return False
+
+        return self.targetexp == other.targetexp \
+                and self.patternexp == other.patternexp \
+                and self.depexp == other.depexp \
+                and self.doublecolon == other.doublecolon
+
 class Command(Statement):
     """
     Commands are things that get executed by a rule.
 
     A rule's recipe is composed of 0 or more Commands.
 
     A command is simply an expansion. Commands typically represent strings to
     be executed in a shell (e.g. via system()). Although, since make files
@@ -249,16 +314,33 @@ class Command(Statement):
         if context.weak:
             raise data.DataError("rules not allowed in includedeps", self.exp.loc)
 
         context.currule.addcommand(self.exp)
 
     def dump(self, fd, indent):
         print >>fd, "%sCommand %s" % (indent, self.exp,)
 
+    def to_source(self):
+        # Commands have some interesting quirks when it comes to source
+        # formatting. First, they can be multi-line. Second, a tab needs to be
+        # inserted at the beginning of every line. Finally, there might be
+        # variable references inside the command. This means we need to escape
+        # variable references inside command strings. Luckily, this is handled
+        # by the Expansion.
+        s = self.exp.to_source(escape_variables=True)
+
+        return '\n'.join(['\t%s' % line for line in s.split('\n')])
+
+    def __eq__(self, other):
+        if not isinstance(other, Command):
+            return False
+
+        return self.exp == other.exp
+
 class SetVariable(Statement):
     """
     Represents a variable assignment.
 
     Variable assignment comes in two different flavors.
 
     Simple assignment has the form:
 
@@ -331,27 +413,93 @@ class SetVariable(Statement):
                 e, t, o = parser.parsemakesyntax(d, 0, (), parser.iterdata)
                 value = e.resolvestr(makefile, makefile.variables)
 
             v.set(vname, flavor, self.source, value)
 
     def dump(self, fd, indent):
         print >>fd, "%sSetVariable<%s> %s %s\n%s %r" % (indent, self.valueloc, self.vnameexp, self.token, indent, self.value)
 
+    def __eq__(self, other):
+        if not isinstance(other, SetVariable):
+            return False
+
+        return self.vnameexp == other.vnameexp \
+                and self.token == other.token \
+                and self.value == other.value \
+                and self.targetexp == other.targetexp \
+                and self.source == other.source
+
+    def to_source(self):
+        chars = []
+        for i in xrange(0, len(self.value)):
+            c = self.value[i]
+
+            # Literal # is escaped in variable assignment otherwise it would be
+            # a comment.
+            if c == '#':
+                # If a backslash precedes this, we need to escape it as well.
+                if i > 0 and self.value[i-1] == '\\':
+                    chars.append('\\')
+
+                chars.append('\\#')
+                continue
+
+            chars.append(c)
+
+        value = ''.join(chars)
+
+        prefix = ''
+        if self.source == data.Variables.SOURCE_OVERRIDE:
+            prefix = 'override '
+
+        # SetVariable come in two flavors: simple and target-specific.
+
+        # We handle the target-specific syntax first.
+        if self.targetexp is not None:
+            return '%s: %s %s %s' % (
+                self.targetexp.to_source(),
+                self.vnameexp.to_source(),
+                self.token,
+                value)
+
+        # The variable could be multi-line or have leading whitespace. For
+        # regular variable assignment, whitespace after the token but before
+        # the value is ignored. If we see leading whitespace in the value here,
+        # the variable must have come from a define.
+        if value.count('\n') > 0 or (len(value) and value[0].isspace()):
+            # The parser holds the token in vnameexp for whatever reason.
+            return '%sdefine %s\n%s\nendef' % (
+                prefix,
+                self.vnameexp.to_source(),
+                value)
+
+        return '%s%s %s %s' % (
+                prefix,
+                self.vnameexp.to_source(),
+                self.token,
+                value)
+
 class Condition(object):
     """
     An abstract "condition", either ifeq or ifdef, perhaps negated.
 
     See https://www.gnu.org/software/make/manual/make.html#Conditional-Syntax
 
     Subclasses must implement:
 
     def evaluate(self, makefile)
     """
 
+    def __eq__(self, other):
+        raise Exception("%s must implement __eq__." % __class__)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
 class EqCondition(Condition):
     """
     Represents an ifeq or ifneq conditional directive.
 
     This directive consists of two Expansions which are compared for equality.
 
     The `expected` field is a bool indicating what the condition must evaluate
     to in order for its body to be executed. If True, this is an "ifeq"
@@ -370,16 +518,24 @@ class EqCondition(Condition):
     def evaluate(self, makefile):
         r1 = self.exp1.resolvestr(makefile, makefile.variables)
         r2 = self.exp2.resolvestr(makefile, makefile.variables)
         return (r1 == r2) == self.expected
 
     def __str__(self):
         return "ifeq (expected=%s) %s %s" % (self.expected, self.exp1, self.exp2)
 
+    def __eq__(self, other):
+        if not isinstance(other, EqCondition):
+            return False
+
+        return self.exp1 == other.exp1 \
+                and self.exp2 == other.exp2 \
+                and self.expected == other.expected
+
 class IfdefCondition(Condition):
     """
     Represents an ifdef or ifndef conditional directive.
 
     This directive consists of a single expansion which represents the name of
     a variable (without the leading '$') which will be checked for definition.
 
     The `expected` field is a bool and has the same behavior as EqCondition.
@@ -399,28 +555,37 @@ class IfdefCondition(Condition):
         if value is None:
             return not self.expected
 
         return (len(value) > 0) == self.expected
 
     def __str__(self):
         return "ifdef (expected=%s) %s" % (self.expected, self.exp)
 
+    def __eq__(self, other):
+        if not isinstance(other, IfdefCondition):
+            return False
+
+        return self.exp == other.exp and self.expected == other.expected
+
 class ElseCondition(Condition):
     """
     Represents the transition between branches in a ConditionBlock.
     """
     __slots__ = ()
 
     def evaluate(self, makefile):
         return True
 
     def __str__(self):
         return "else"
 
+    def __eq__(self, other):
+        return isinstance(other, ElseCondition)
+
 class ConditionBlock(Statement):
     """
     A set of related Conditions.
 
     This is essentially a list of 2-tuples of (Condition, list(Statement)).
 
     The parser creates a ConditionBlock for all statements related to the same
     conditional group. If iterating over the parser's output, where you think
@@ -468,16 +633,116 @@ class ConditionBlock(Statement):
 
         indent2 = indent + '  '
         for c, statements in self._groups:
             print >>fd, "%s Condition %s" % (indent, c)
             statements.dump(fd, indent2)
             print >>fd, "%s ~Condition" % (indent,)
         print >>fd, "%s~ConditionBlock" % (indent,)
 
+    def to_source(self):
+        lines = []
+        index = 0
+        for condition, statements in self:
+            lines.append(ConditionBlock.condition_source(condition, index))
+            index += 1
+
+            for statement in statements:
+                lines.append(statement.to_source())
+
+        lines.append('endif')
+
+        return '\n'.join(lines)
+
+    def __eq__(self, other):
+        if not isinstance(other, ConditionBlock):
+            return False
+
+        if len(self) != len(other):
+            return False
+
+        for i in xrange(0, len(self)):
+            our_condition, our_statements = self[i]
+            other_condition, other_statements = other[i]
+
+            if our_condition != other_condition:
+                return False
+
+            if our_statements != other_statements:
+                return False
+
+        return True
+
+    @staticmethod
+    def condition_source(statement, index):
+        """Convert a condition to its source representation.
+
+        The index argument defines the index of this condition inside a
+        ConditionBlock. If it is greater than 0, an "else" will be prepended
+        to the result, if necessary.
+        """
+        prefix = ''
+        if isinstance(statement, (EqCondition, IfdefCondition)) and index > 0:
+            prefix = 'else '
+
+        if isinstance(statement, IfdefCondition):
+            s = statement.exp.s
+
+            if statement.expected:
+                return '%sifdef %s' % (prefix, s)
+
+            return '%sifndef %s' % (prefix, s)
+
+        if isinstance(statement, EqCondition):
+            args = [
+                statement.exp1.to_source(escape_comments=True),
+                statement.exp2.to_source(escape_comments=True)]
+
+            use_quotes = False
+            single_quote_present = False
+            double_quote_present = False
+            for i, arg in enumerate(args):
+                if len(arg) > 0 and (arg[0].isspace() or arg[-1].isspace()):
+                    use_quotes = True
+
+                    if "'" in arg:
+                        single_quote_present = True
+
+                    if '"' in arg:
+                        double_quote_present = True
+
+            # Quote everything if needed.
+            if single_quote_present and double_quote_present:
+                raise Exception('Cannot format condition with multiple quotes.')
+
+            if use_quotes:
+                for i, arg in enumerate(args):
+                    # Double to single quotes.
+                    if single_quote_present:
+                        args[i] = '"' + arg + '"'
+                    else:
+                        args[i] = "'" + arg + "'"
+
+            body = None
+            if use_quotes:
+                body = ' '.join(args)
+            else:
+                body = '(%s)' % ','.join(args)
+
+            if statement.expected:
+                return '%sifeq %s' % (prefix, body)
+
+            return '%sifneq %s' % (prefix, body)
+
+        if isinstance(statement, ElseCondition):
+            return 'else'
+
+        raise Exception('Unhandled Condition statement: %s' %
+                statement.__class__)
+
     def __iter__(self):
         return iter(self._groups)
 
     def __len__(self):
         return len(self._groups)
 
     def __getitem__(self, i):
         return self._groups[i]
@@ -503,16 +768,30 @@ class Include(Statement):
     def execute(self, makefile, context):
         files = self.exp.resolvesplit(makefile, makefile.variables)
         for f in files:
             makefile.include(f, self.required, loc=self.exp.loc, weak=self.weak)
 
     def dump(self, fd, indent):
         print >>fd, "%sInclude %s" % (indent, self.exp)
 
+    def to_source(self):
+        prefix = ''
+
+        if not self.required:
+            prefix = '-'
+
+        return '%sinclude %s' % (prefix, self.exp.to_source())
+
+    def __eq__(self, other):
+        if not isinstance(other, Include):
+            return False
+
+        return self.exp == other.exp and self.required == other.required
+
 class VPathDirective(Statement):
     """
     Represents the vpath directive.
 
     See https://www.gnu.org/software/make/manual/make.html#Selective-Search
     """
     __slots__ = ('exp',)
 
@@ -536,16 +815,25 @@ class VPathDirective(Statement):
                     dirs.extend((dir for dir in mpath.split(os.pathsep)
                                  if dir != ''))
                 if len(dirs):
                     makefile.addvpath(pattern, dirs)
 
     def dump(self, fd, indent):
         print >>fd, "%sVPath %s" % (indent, self.exp)
 
+    def to_source(self):
+        return 'vpath %s' % self.exp.to_source()
+
+    def __eq__(self, other):
+        if not isinstance(other, VPathDirective):
+            return False
+
+        return self.exp == other.exp
+
 class ExportDirective(Statement):
     """
     Represents the "export" directive.
 
     This is used to control exporting variables to sub makes.
 
     See https://www.gnu.org/software/make/manual/make.html#Variables_002fRecursion
 
@@ -572,16 +860,27 @@ class ExportDirective(Statement):
                 raise data.DataError("Exporting all variables is not supported", self.exp.loc)
 
         for v in vlist:
             makefile.exportedvars[v] = True
 
     def dump(self, fd, indent):
         print >>fd, "%sExport (single=%s) %s" % (indent, self.single, self.exp)
 
+    def to_source(self):
+        return ('export %s' % self.exp.to_source()).rstrip()
+
+    def __eq__(self, other):
+        if not isinstance(other, ExportDirective):
+            return False
+
+        # single is irrelevant because it just says whether the next Statement
+        # contains a variable definition.
+        return self.exp == other.exp
+
 class UnexportDirective(Statement):
     """
     Represents the "unexport" directive.
 
     This is the opposite of ExportDirective.
     """
     __slots__ = ('exp',)
 
@@ -591,16 +890,25 @@ class UnexportDirective(Statement):
     def execute(self, makefile, context):
         vlist = list(self.exp.resolvesplit(makefile, makefile.variables))
         for v in vlist:
             makefile.exportedvars[v] = False
 
     def dump(self, fd, indent):
         print >>fd, "%sUnexport %s" % (indent, self.exp)
 
+    def to_source(self):
+        return 'unexport %s' % self.exp.to_source()
+
+    def __eq__(self, other):
+        if not isinstance(other, UnexportDirective):
+            return False
+
+        return self.exp == other.exp
+
 class EmptyDirective(Statement):
     """
     Represents a standalone statement, usually an Expansion.
 
     You will encounter EmptyDirective instances if there is a function
     or similar at the top-level of a make file (e.g. outside of a rule or
     variable assignment). You can also find them as the bodies of
     ConditionBlock branches.
@@ -614,16 +922,25 @@ class EmptyDirective(Statement):
     def execute(self, makefile, context):
         v = self.exp.resolvestr(makefile, makefile.variables)
         if v.strip() != '':
             raise data.DataError("Line expands to non-empty value", self.exp.loc)
 
     def dump(self, fd, indent):
         print >>fd, "%sEmptyDirective: %s" % (indent, self.exp)
 
+    def to_source(self):
+        return self.exp.to_source()
+
+    def __eq__(self, other):
+        if not isinstance(other, EmptyDirective):
+            return False
+
+        return self.exp == other.exp
+
 class _EvalContext(object):
     __slots__ = ('currule', 'weak')
 
     def __init__(self, weak):
         self.weak = weak
 
 class StatementList(list):
     """
@@ -651,14 +968,17 @@ class StatementList(list):
         for s in self:
             s.dump(fd, indent)
 
     def __str__(self):
         fd = StringIO()
         self.dump(fd, '')
         return fd.getvalue()
 
+    def to_source(self):
+        return '\n'.join([s.to_source() for s in self])
+
 def iterstatements(stmts):
     for s in stmts:
         yield s
         if isinstance(s, ConditionBlock):
             for c, sl in s:
                 for s2 in iterstatments(sl): yield s2
--- a/tests/datatests.py
+++ b/tests/datatests.py
@@ -1,9 +1,9 @@
-import pymake.data, pymake.util
+import pymake.data, pymake.functions, pymake.util
 import unittest
 import re
 from cStringIO import StringIO
 
 def multitest(cls):
     for name in cls.testdata.iterkeys():
         def m(self, name=name):
             return self.runSingle(*self.testdata[name])
@@ -69,10 +69,56 @@ class LRUTest(unittest.TestCase):
             v = c.get(k)
             self.assertEqual(v, e)
             self.assertEqual(self.funccount, fc,
                              "funccount, iteration %i, got %i expected %i" % (i, self.funccount, fc))
             goti = tuple(c.debugitems())
             self.assertEqual(goti, di,
                              "debugitems, iteration %i, got %r expected %r" % (i, goti, di))
 
+class EqualityTest(unittest.TestCase):
+    def test_string_expansion(self):
+        s1 = pymake.data.StringExpansion('foo bar', None)
+        s2 = pymake.data.StringExpansion('foo bar', None)
+
+        self.assertEqual(s1, s2)
+
+    def test_expansion_simple(self):
+        s1 = pymake.data.Expansion(None)
+        s2 = pymake.data.Expansion(None)
+
+        self.assertEqual(s1, s2)
+
+        s1.appendstr('foo')
+        s2.appendstr('foo')
+        self.assertEqual(s1, s2)
+
+    def test_expansion_string_finish(self):
+        """Adjacent strings should normalize to same value."""
+        s1 = pymake.data.Expansion(None)
+        s2 = pymake.data.Expansion(None)
+
+        s1.appendstr('foo')
+        s2.appendstr('foo')
+
+        s1.appendstr(' bar')
+        s1.appendstr(' baz')
+        s2.appendstr(' bar baz')
+
+        self.assertEqual(s1, s2)
+
+    def test_function(self):
+        s1 = pymake.data.Expansion(None)
+        s2 = pymake.data.Expansion(None)
+
+        n1 = pymake.data.StringExpansion('FOO', None)
+        n2 = pymake.data.StringExpansion('FOO', None)
+
+        v1 = pymake.functions.VariableRef(None, n1)
+        v2 = pymake.functions.VariableRef(None, n2)
+
+        s1.appendfunc(v1)
+        s2.appendfunc(v2)
+
+        self.assertEqual(s1, s2)
+
 if __name__ == '__main__':
     unittest.main()
new file mode 100644
--- /dev/null
+++ b/tests/formattingtests.py
@@ -0,0 +1,289 @@
+# This file contains test code for the formatting of parsed statements back to
+# make file "source." It essentially verifies to to_source() functions
+# scattered across the tree.
+
+import glob
+import logging
+import os.path
+import unittest
+
+from pymake.data import Expansion
+from pymake.data import StringExpansion
+from pymake.functions import BasenameFunction
+from pymake.functions import SubstitutionRef
+from pymake.functions import VariableRef
+from pymake.functions import WordlistFunction
+from pymake.parserdata import Include
+from pymake.parserdata import SetVariable
+from pymake.parser import parsestring
+from pymake.parser import SyntaxError
+
+class TestBase(unittest.TestCase):
+    pass
+
+class VariableRefTest(TestBase):
+    def test_string_name(self):
+        e = StringExpansion('foo', None)
+        v = VariableRef(None, e)
+
+        self.assertEqual(v.to_source(), '$(foo)')
+
+    def test_special_variable(self):
+        e = StringExpansion('<', None)
+        v = VariableRef(None, e)
+
+        self.assertEqual(v.to_source(), '$<')
+
+    def test_expansion_simple(self):
+        e = Expansion()
+        e.appendstr('foo')
+        e.appendstr('bar')
+
+        v = VariableRef(None, e)
+
+        self.assertEqual(v.to_source(), '$(foobar)')
+
+class StandardFunctionTest(TestBase):
+    def test_basename(self):
+        e1 = StringExpansion('foo', None)
+        v = VariableRef(None, e1)
+        e2 = Expansion(None)
+        e2.appendfunc(v)
+
+        b = BasenameFunction(None)
+        b.append(e2)
+
+        self.assertEqual(b.to_source(), '$(basename $(foo))')
+
+    def test_wordlist(self):
+        e1 = StringExpansion('foo', None)
+        e2 = StringExpansion('bar ', None)
+        e3 = StringExpansion(' baz', None)
+
+        w = WordlistFunction(None)
+        w.append(e1)
+        w.append(e2)
+        w.append(e3)
+
+        self.assertEqual(w.to_source(), '$(wordlist foo,bar , baz)')
+
+    def test_curly_brackets(self):
+        e1 = Expansion(None)
+        e1.appendstr('foo')
+
+        e2 = Expansion(None)
+        e2.appendstr('foo ( bar')
+
+        f = WordlistFunction(None)
+        f.append(e1)
+        f.append(e2)
+
+        self.assertEqual(f.to_source(), '${wordlist foo,foo ( bar}')
+
+class StringExpansionTest(TestBase):
+    def test_simple(self):
+        e = StringExpansion('foobar', None)
+        self.assertEqual(e.to_source(), 'foobar')
+
+        e = StringExpansion('$var', None)
+        self.assertEqual(e.to_source(), '$var')
+
+    def test_escaping(self):
+        e = StringExpansion('$var', None)
+        self.assertEqual(e.to_source(escape_variables=True), '$$var')
+
+        e = StringExpansion('this is # not a comment', None)
+        self.assertEqual(e.to_source(escape_comments=True),
+                         'this is \# not a comment')
+
+    def test_empty(self):
+        e = StringExpansion('', None)
+        self.assertEqual(e.to_source(), '')
+
+        e = StringExpansion(' ', None)
+        self.assertEqual(e.to_source(), ' ')
+
+class ExpansionTest(TestBase):
+    def test_single_string(self):
+        e = Expansion()
+        e.appendstr('foo')
+
+        self.assertEqual(e.to_source(), 'foo')
+
+    def test_multiple_strings(self):
+        e = Expansion()
+        e.appendstr('hello')
+        e.appendstr('world')
+
+        self.assertEqual(e.to_source(), 'helloworld')
+
+    def test_string_escape(self):
+        e = Expansion()
+        e.appendstr('$var')
+        self.assertEqual(e.to_source(), '$var')
+        self.assertEqual(e.to_source(escape_variables=True), '$$var')
+
+        e = Expansion()
+        e.appendstr('foo')
+        e.appendstr(' $bar')
+        self.assertEqual(e.to_source(escape_variables=True), 'foo $$bar')
+
+class SubstitutionRefTest(TestBase):
+    def test_simple(self):
+        name = StringExpansion('foo', None)
+        c = StringExpansion('%.c', None)
+        o = StringExpansion('%.o', None)
+        s = SubstitutionRef(None, name, c, o)
+
+        self.assertEqual(s.to_source(), '$(foo:%.c=%.o)')
+
+class SetVariableTest(TestBase):
+    def test_simple(self):
+        v = SetVariable(StringExpansion('foo', None), '=', 'bar', None, None)
+        self.assertEqual(v.to_source(), 'foo = bar')
+
+    def test_multiline(self):
+        s = 'hello\nworld'
+        foo = StringExpansion('FOO', None)
+
+        v = SetVariable(foo, '=', s, None, None)
+
+        self.assertEqual(v.to_source(), 'define FOO\nhello\nworld\nendef')
+
+    def test_multiline_immediate(self):
+        source = 'define FOO :=\nhello\nworld\nendef'
+
+        statements = parsestring(source, 'foo.mk')
+        self.assertEqual(statements.to_source(), source)
+
+    def test_target_specific(self):
+        foo = StringExpansion('FOO', None)
+        bar = StringExpansion('BAR', None)
+
+        v = SetVariable(foo, '+=', 'value', None, bar)
+
+        self.assertEqual(v.to_source(), 'BAR: FOO += value')
+
+class IncludeTest(TestBase):
+    def test_include(self):
+        e = StringExpansion('rules.mk', None)
+        i = Include(e, True, False)
+        self.assertEqual(i.to_source(), 'include rules.mk')
+
+        i = Include(e, False, False)
+        self.assertEqual(i.to_source(), '-include rules.mk')
+
+class IfdefTest(TestBase):
+    def test_simple(self):
+        source = 'ifdef FOO\nbar := $(value)\nendif'
+
+        statements = parsestring(source, 'foo.mk')
+        self.assertEqual(statements[0].to_source(), source)
+
+    def test_nested(self):
+        source = 'ifdef FOO\nifdef BAR\nhello = world\nendif\nendif'
+
+        statements = parsestring(source, 'foo.mk')
+        self.assertEqual(statements[0].to_source(), source)
+
+    def test_negation(self):
+        source = 'ifndef FOO\nbar += value\nendif'
+
+        statements = parsestring(source, 'foo.mk')
+        self.assertEqual(statements[0].to_source(), source)
+
+class IfeqTest(TestBase):
+    def test_simple(self):
+        source = 'ifeq ($(foo),bar)\nhello = $(world)\nendif'
+
+        statements = parsestring(source, 'foo.mk')
+        self.assertEqual(statements[0].to_source(), source)
+
+    def test_negation(self):
+        source = 'ifneq (foo,bar)\nhello = world\nendif'
+
+        statements = parsestring(source, 'foo.mk')
+        self.assertEqual(statements.to_source(), source)
+
+class ConditionBlocksTest(TestBase):
+    def test_mixed_conditions(self):
+        source = 'ifdef FOO\nifeq ($(FOO),bar)\nvar += $(value)\nendif\nendif'
+
+        statements = parsestring(source, 'foo.mk')
+        self.assertEqual(statements.to_source(), source)
+
+    def test_extra_statements(self):
+        source = 'ifdef FOO\nF := 1\nifdef BAR\nB += 1\nendif\nC = 1\nendif'
+
+        statements = parsestring(source, 'foo.mk')
+        self.assertEqual(statements.to_source(), source)
+
+    def test_whitespace_preservation(self):
+        source = "ifeq ' x' 'x '\n$(error stripping)\nendif"
+
+        statements = parsestring(source, 'foo.mk')
+        self.assertEqual(statements.to_source(), source)
+
+        source = 'ifneq (x , x)\n$(error stripping)\nendif'
+        statements = parsestring(source, 'foo.mk')
+        self.assertEqual(statements.to_source(),
+                'ifneq (x,x)\n$(error stripping)\nendif')
+
+class MakefileCorupusTest(TestBase):
+    """Runs the make files from the pymake corpus through the formatter.
+
+    All the above tests are child's play compared to this.
+    """
+
+    # Our reformatting isn't perfect. We ignore files with known failures until
+    # we make them work.
+    # TODO Address these formatting corner cases.
+    _IGNORE_FILES = [
+        # We are thrown off by backslashes at end of lines.
+        'comment-parsing.mk',
+        'escape-chars.mk',
+        'include-notfound.mk',
+    ]
+
+    def _get_test_files(self):
+        ourdir = os.path.dirname(os.path.abspath(__file__))
+
+        for makefile in glob.glob(os.path.join(ourdir, '*.mk')):
+            if os.path.basename(makefile) in self._IGNORE_FILES:
+                continue
+
+            source = None
+            with open(makefile, 'rU') as fh:
+                source = fh.read()
+
+            try:
+                yield (makefile, source, parsestring(source, makefile))
+            except SyntaxError:
+                continue
+
+    def test_reparse_consistency(self):
+        for filename, source, statements in self._get_test_files():
+            reformatted = statements.to_source()
+
+            # We should be able to parse the reformatted source fine.
+            new_statements = parsestring(reformatted, filename)
+
+            # If we do the formatting again, the representation shouldn't
+            # change. i.e. the only lossy change should be the original
+            # (whitespace and some semantics aren't preserved).
+            reformatted_again = new_statements.to_source()
+            self.assertEqual(reformatted, reformatted_again,
+                '%s has lossless reformat.' % filename)
+
+            self.assertEqual(len(statements), len(new_statements))
+
+            for i in xrange(0, len(statements)):
+                original = statements[i]
+                formatted = new_statements[i]
+
+                self.assertEqual(original, formatted, '%s %d: %s != %s' % (filename,
+                    i, original, formatted))
+
+if __name__ == '__main__':
+    logging.basicConfig(level=logging.DEBUG)
+    unittest.main()