Bug 1071012 - Extract Sphinx variables via AST reading; r=glandium
authorGregory Szorc <gps@mozilla.com>
Tue, 07 Oct 2014 10:36:27 -0700
changeset 209363 e93c40d4344fcc7a2ce82bef47fc15da770d7786
parent 209362 9c8f4a869327122f7be19fccefbdb0eb992c40d2
child 209364 53a14a9129608b450da3e2b45c863f2e5ad79be6
push id1
push userroot
push dateMon, 20 Oct 2014 17:29:22 +0000
reviewersglandium
bugs1071012, 1041941, 1058359
milestone35.0a1
Bug 1071012 - Extract Sphinx variables via AST reading; r=glandium The in-tree Sphinx docs have been broken since bug 1041941 because processing moz.build files outside their context doesn't work. Specifically, templates aren't loaded (because this information usually comes from a parent moz.build file). A new execution mode is needed. I tried to implement a proper execution mode. However, I kept running into walls. While we should strive for a proper execution mode, this can be a follow-up, tracked in bug 1058359. This patch implements extraction of Sphinx variables from ast walking. It is extremely low-level and definitely a one-off. But it solves the problem at hand: |mach build-docs| will work after this patch is applied.
python/mozbuild/mozbuild/frontend/reader.py
tools/docs/mach_commands.py
--- a/python/mozbuild/mozbuild/frontend/reader.py
+++ b/python/mozbuild/mozbuild/frontend/reader.py
@@ -13,16 +13,17 @@ this file, which is represented by the S
 to fill a Context, representing the output of an individual mozbuild file. The
 
 The BuildReader contains basic logic for traversing a tree of mozbuild files.
 It does this by examining specific variables populated during execution.
 """
 
 from __future__ import print_function, unicode_literals
 
+import ast
 import inspect
 import logging
 import os
 import sys
 import textwrap
 import time
 import tokenize
 import traceback
@@ -731,44 +732,149 @@ class BuildReader(object):
         all linked moz.build files until all relevant files have been evaluated.
 
         This is a generator of Context instances. As each moz.build file is
         read, a new Context is created and emitted.
         """
         path = mozpath.join(self.config.topsrcdir, 'moz.build')
         return self.read_mozbuild(path, self.config, read_tiers=True)
 
-    def walk_topsrcdir(self):
-        """Read all moz.build files in the source tree.
+    def all_mozbuild_paths(self):
+        """Iterator over all available moz.build files.
 
-        This is different from read_topsrcdir() in that this version performs a
-        filesystem walk to discover every moz.build file rather than relying on
-        data from executed moz.build files to drive traversal.
-
-        This is a generator of Context instances.
+        This method has little to do with the reader. It should arguably belong
+        elsewhere.
         """
         # In the future, we may traverse moz.build files by looking
         # for DIRS references in the AST, even if a directory is added behind
         # a conditional. For now, just walk the filesystem.
         ignore = {
             # Ignore fake moz.build files used for testing moz.build.
             'python/mozbuild/mozbuild/test',
 
             # Ignore object directories.
             'obj*',
         }
 
         finder = FileFinder(self.config.topsrcdir, find_executables=False,
             ignore=ignore)
 
+        # The root doesn't get picked up by FileFinder.
+        yield 'moz.build'
+
         for path, f in finder.find('**/moz.build'):
-            path = os.path.join(self.config.topsrcdir, path)
-            for s in self.read_mozbuild(path, self.config, descend=False,
-                read_tiers=True):
-                yield s
+            yield path
+
+    def find_sphinx_variables(self):
+        """This function finds all assignments of Sphinx documentation variables.
+
+        This is a generator of tuples of (moz.build path, var, key, value). For
+        variables that assign to keys in objects, key will be defined.
+
+        With a little work, this function could be made more generic. But if we
+        end up writing a lot of ast code, it might be best to import a
+        high-level AST manipulation library into the tree.
+        """
+        # This function looks for assignments to SPHINX_TREES and
+        # SPHINX_PYTHON_PACKAGE_DIRS variables.
+        #
+        # SPHINX_TREES is a dict. Keys and values should both be strings. The
+        # target of the assignment should be a Subscript node. The value
+        # assigned should be a Str node. e.g.
+        #
+        #  SPHINX_TREES['foo'] = 'bar'
+        #
+        # This is an Assign node with a Subscript target. The Subscript's value
+        # is a Name node with id "SPHINX_TREES." The slice of this target
+        # is an Index node and its value is a Str with value "foo."
+        #
+        # SPHINX_PYTHON_PACKAGE_DIRS is a simple list. The target of the
+        # assignment should be a Name node. Values should be a List node, whose
+        # elements are Str nodes. e.g.
+        #
+        #  SPHINX_PYTHON_PACKAGE_DIRS += ['foo']
+        #
+        # This is an AugAssign node with a Name target with id
+        # "SPHINX_PYTHON_PACKAGE_DIRS." The value is a List node containing 1
+        # Str elt whose value is "foo."
+        relevant = [
+            'SPHINX_TREES',
+            'SPHINX_PYTHON_PACKAGE_DIRS',
+        ]
+
+        def assigned_variable(node):
+            # This is not correct, but we don't care yet.
+            if hasattr(node, 'targets'):
+                # Nothing in moz.build does multi-assignment (yet). So error if
+                # we see it.
+                assert len(node.targets) == 1
+
+                target = node.targets[0]
+            else:
+                target = node.target
+
+            if isinstance(target, ast.Subscript):
+                if not isinstance(target.value, ast.Name):
+                    return None, None
+                name = target.value.id
+            elif isinstance(target, ast.Name):
+                name = target.id
+            else:
+                return None, None
+
+            if name not in relevant:
+                return None, None
+
+            key = None
+            if isinstance(target, ast.Subscript):
+                assert isinstance(target.slice, ast.Index)
+                assert isinstance(target.slice.value, ast.Str)
+                key = target.slice.value.s
+
+            return name, key
+
+        def assigned_values(node):
+            value = node.value
+            if isinstance(value, ast.List):
+                for v in value.elts:
+                    assert isinstance(v, ast.Str)
+                    yield v.s
+            else:
+                assert isinstance(value, ast.Str)
+                yield value.s
+
+        assignments = []
+
+        class Visitor(ast.NodeVisitor):
+            def helper(self, node):
+                name, key = assigned_variable(node)
+                if not name:
+                    return
+
+                for v in assigned_values(node):
+                    assignments.append((name, key, v))
+
+            def visit_Assign(self, node):
+                self.helper(node)
+
+            def visit_AugAssign(self, node):
+                self.helper(node)
+
+        for p in self.all_mozbuild_paths():
+            assignments[:] = []
+            full = os.path.join(self.config.topsrcdir, p)
+
+            with open(full, 'rb') as fh:
+                source = fh.read()
+
+            tree = ast.parse(source, full)
+            Visitor().visit(tree)
+
+            for name, key, value in assignments:
+                yield p, name, key, value
 
     def read_mozbuild(self, path, config, read_tiers=False, descend=True,
             metadata={}):
         """Read and process a mozbuild file, descending into children.
 
         This starts with a single mozbuild file, executes it, and descends into
         other referenced files per our traversal logic.
 
--- a/tools/docs/mach_commands.py
+++ b/tools/docs/mach_commands.py
@@ -41,18 +41,20 @@ class Documentation(MachCommandBase):
         # We don't care about GYP projects, so don't process them. This makes
         # scanning faster and may even prevent an exception.
         def remove_gyp_dirs(context):
             context['GYP_DIRS'][:] = []
 
         reader = BuildReader(self.config_environment,
             sandbox_post_eval_cb=remove_gyp_dirs)
 
-        for context in reader.walk_topsrcdir():
-            for dest_dir, source_dir in context['SPHINX_TREES'].items():
-                manager.add_tree(os.path.join(context.relsrcdir,
-                    source_dir), dest_dir)
+        for path, name, key, value in reader.find_sphinx_variables():
+            reldir = os.path.dirname(path)
 
-            for entry in context['SPHINX_PYTHON_PACKAGE_DIRS']:
-                manager.add_python_package_dir(os.path.join(context.relsrcdir,
-                    entry))
+            if name == 'SPHINX_TREES':
+                assert key
+                manager.add_tree(os.path.join(reldir, value),
+                        os.path.join(reldir, key))
+
+            if name == 'SPHINX_PYTHON_PACKAGE_DIRS':
+                manager.add_python_package_dir(os.path.join(reldir, value))
 
         return manager.generate_docs(format)