Bug 458755 - Automated documentation of string classes on MDC using static analysis, r=taras
authorBenjamin Smedberg <benjamin@smedbergs.us>
Thu, 09 Oct 2008 12:34:33 -0400
changeset 20209 068446ee54932617558b464072e00102c00f5686
parent 20208 765855ec09b08c76075d130f8c72c5655aa42b9a
child 20210 e96a238b497b2140b48cbeabf4ddbe140b51f0bb
push idunknown
push userunknown
push dateunknown
reviewerstaras
bugs458755
milestone1.9.1b2pre
Bug 458755 - Automated documentation of string classes on MDC using static analysis, r=taras
xpcom/analysis/MDC-upload.py
xpcom/analysis/Makefile.in
xpcom/analysis/deki.py
xpcom/analysis/fix-srcrefs.py
xpcom/analysis/type-printer.cpp
xpcom/analysis/type-printer.js
new file mode 100644
--- /dev/null
+++ b/xpcom/analysis/MDC-upload.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+
+"""
+Upload a file to MDC
+
+Usage: python MDC-upload.py <file> <MDC-path>
+Please set MDC_USER and MDC_PASSWORD in the environment
+"""
+
+import os, sys, urllib, urllib2, deki
+
+wikiuser = os.environ['MDC_USER']
+wikipw = os.environ['MDC_PASSWORD']
+
+(file, wikipath) = sys.argv[1:]
+
+wiki = deki.Deki("http://developer.mozilla.org/@api/deki/", wikiuser, wikipw)
+wiki.create_page(wikipath, open(file).read(), overwrite=True)
new file mode 100644
--- /dev/null
+++ b/xpcom/analysis/Makefile.in
@@ -0,0 +1,46 @@
+DEPTH = ../..
+topsrcdir = @top_srcdir@
+srcdir = @srcdir@
+VPATH = @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MOZILLA_INTERNAL_API = 1
+
+REQUIRES = \
+  string \
+  xpcom \
+  $(NULL)
+
+include $(topsrcdir)/config/rules.mk
+
+DUMP_CLASSES = \
+  nsAString_internal \
+  nsACString_internal \
+  nsString \
+  nsCString \
+  nsAutoString \
+  nsCAutoString \
+  nsXPIDLString \
+  nsXPIDLCString \
+  $(NULL)
+
+SPACE = $(NULL) $(NULL)
+COMMA = ,
+
+HGREV = $(shell hg -R $(topsrcdir) id -i)
+
+classapi: DEHYDRA_MODULES = $(srcdir)/type-printer.js
+classapi: TREEHYDRA_MODULES =
+classapi: DEHYDRA_ARGS += --dump-types=$(subst $(SPACE),$(COMMA),$(strip $(DUMP_CLASSES))) --rev=$(HGREV)
+classapi:
+	$(CCC) $(OUTOPTION)/dev/null -c $(COMPILE_CXXFLAGS) $(srcdir)/type-printer.cpp
+	$(EXIT_ON_ERROR) \
+	for class in $(DUMP_CLASSES); do \
+	  $(PYTHON) $(srcdir)/fix-srcrefs.py $(topsrcdir) < $${class}.html > $${class}-fixed.html; \
+	done
+
+upload_classapi:
+	for class in $(DUMP_CLASSES); do \
+	  $(PYTHON) $(srcdir)/MDC-upload.py $${class}-fixed.html en/$${class}; \
+	done
new file mode 100644
--- /dev/null
+++ b/xpcom/analysis/deki.py
@@ -0,0 +1,307 @@
+""" deki.py - Access the wiki pages on a MindTouch Deki server via the API.
+
+Here's what this code can do:
+
+  wiki = deki.Deki("http://developer.mozilla.org/@api/deki/", username, password)
+  page = wiki.get_page("Sheep")
+  print page.title
+  print page.doc.toxml()
+
+  page.title = "Bananas"
+  page.save()
+
+There are also some additional methods:
+  wiki.create_page(path, content, title=, override=)
+  wiki.move_page(old, new)
+  wiki.get_subpages(page)
+
+This module does not try to mimic the MindTouch "Plug" API.  It's meant to be
+higher-level than that.
+"""
+
+import sys
+import urllib2, cookielib
+import xml.dom.minidom as dom
+from urllib import quote as _urllib_quote
+from urllib import urlencode as _urlencode
+import urlparse
+from datetime import datetime
+import re
+
+__all__ = ['Deki']
+
+
+# === Utils
+
+def _check(fact):
+    if not fact:
+        raise AssertionError('check failed')
+
+def _urlquote(s, *args):
+    return _urllib_quote(s.encode('utf-8'), *args)
+
+def _make_url(*dirs, **params):
+    """ dirs must already be url-encoded, params must not """
+    url = '/'.join(dirs)
+    if params:
+        url += '?' + _urlencode(params)
+    return url
+
+# === Dream framework client code
+
+# This handler causes python to "always be logged in" when it's talking to the
+# server.  If you're just accessing public pages, it generates more requests
+# than are strictly needed, but this is the behavior you want for a bot.
+#
+# The users/authenticate request is sent twice: once without any basic auth and
+# once with.  Dumb.  Feel free to fix.
+#
+class _LoginHandler(urllib2.HTTPCookieProcessor):
+    def __init__(self, server):
+        policy = cookielib.DefaultCookiePolicy(rfc2965=True)
+        cookiejar = cookielib.CookieJar(policy)
+        urllib2.HTTPCookieProcessor.__init__(self, cookiejar)
+        self.server = server
+
+    def http_request(self, req):
+        #print "DEBUG- Requesting " + req.get_full_url()
+        s = self.server
+        req = urllib2.HTTPCookieProcessor.http_request(self, req)
+        if ('Cookie' not in req.unredirected_hdrs
+              and req.get_full_url() != s.base + 'users/authenticate'):
+            s.login()
+            # Retry - should have a new cookie.
+            req = urllib2.HTTPCookieProcessor.http_request(self, req)
+            _check('Cookie' in req.unredirected_hdrs)
+        return req
+
+class DreamClient:
+    def __init__(self, base, user, password):
+        """ 
+        base - The base URI of the Deki API, with trailing slash.
+               Typically, 'http://wiki.example.org/@api/deki/'.
+        user, password - Your Deki login information.
+        """
+        self.base = base
+        pm = urllib2.HTTPPasswordMgrWithDefaultRealm()
+        pm.add_password(None, self.base, user, password)
+        ah = urllib2.HTTPBasicAuthHandler(pm)
+        lh = _LoginHandler(self)
+        self._opener = urllib2.build_opener(ah, lh)
+
+    def login(self):
+        response = self._opener.open(self.base + 'users/authenticate')
+        response.close()
+
+    def open(self, url):
+        return self._opener.open(self.base + url)
+
+    def post(self, url, data, type):
+        #print "DEBUG- posting to:", self.base + url
+        req = urllib2.Request(self.base + url, data, {'Content-Type': type})
+        resp = self._opener.open(req)
+        try:
+            ct = resp.headers.get('Content-Type', '(none)')
+            if '/xml' in ct or '+xml' in ct:
+                return dom.parse(resp)
+            else:
+                #print "DEBUG- Content-Type:", ct
+                crud = resp.read()
+                #print 'DEBUG- crud:\n---\n%s\n---' % re.sub(r'(?m)^', '    ', crud)
+                return None
+        finally:
+            resp.close()
+
+    def get_xml(self, url):
+        resp = self.open(url)
+        try:
+            return dom.parse(resp)
+        finally:
+            resp.close()
+
+
+# === DOM
+
+def _text_of(node):
+    if node.nodeType == node.ELEMENT_NODE:
+        return u''.join(_text_of(n) for n in node.childNodes)
+    elif node.nodeType == node.TEXT_NODE:
+        return node.nodeValue
+    else:
+        return u''
+
+def _the_element_by_name(doc, tagName):
+    elts = doc.getElementsByTagName(tagName)
+    if len(elts) != 1:
+        raise ValueError("Expected exactly one <%s> tag, got %d." % (tagName, len(elts)))
+    return elts[0]
+
+def _first_element(node):
+    n = node.firstChild
+    while n is not None:
+        if n.nodeType == n.ELEMENT_NODE:
+            return n
+        n = node.nextSibling
+    return None
+
+def _find_elements(node, path):
+    if u'/' in path:
+        [first, rest] = path.split(u'/', 1)
+        for child in _find_elements(node, first):
+            for desc in _find_elements(child, rest):
+                yield desc
+    else:
+        for n in node.childNodes:
+            if n.nodeType == node.ELEMENT_NODE and n.nodeName == path:
+                yield n
+
+
+# === Deki
+
+def _format_page_id(id):
+    if isinstance(id, int):
+        return str(id)
+    elif id is Deki.HOME:
+        return 'home'
+    elif isinstance(id, basestring):
+        # Double-encoded, per the Deki API reference.
+        return '=' + _urlquote(_urlquote(id, ''))
+
+class Deki(DreamClient):
+    HOME = object()
+
+    def get_page(self, page_id):
+        """ Get the content of a page from the wiki.
+
+        The page_id argument must be one of:
+          an int - The page id (an arbitrary number assigned by Deki)
+          a str - The page name (not the title, the full path that shows up in the URL)
+          Deki.HOME - Refers to the main page of the wiki.
+
+        Returns a Page object.
+        """
+        p = Page(self)
+        p._load(page_id)
+        return p
+
+    def create_page(self, path, content, title=None, overwrite=False):
+        """ Create a new wiki page.
+
+        Parameters:
+          path - str - The page id.
+          content - str - The XML content to put in the new page.
+            The document element must be a <body>.
+          title - str - The page title.  Keyword argument only.
+            Defaults to the last path-segment of path.
+          overwrite - bool - Whether to overwrite an existing page. If false,
+            and the page already exists, the method will throw an error.
+        """
+        if title is None:
+            title = path.split('/')[-1]
+        doc = dom.parseString(content)
+        _check(doc.documentElement.tagName == 'body')
+        p = Page(self)
+        p._create(path, title, doc, overwrite)
+
+    def get_subpages(self, page_id):
+        """ Return the ids of all subpages of the given page. """
+        doc = self.get_xml(_make_url("pages", _format_page_id(page_id),
+                                     "files,subpages"))
+        for elt in _find_elements(doc, u'page/subpages/page.subpage/path'):
+            yield _text_of(elt)
+
+    def move_page(self, page_id, new_title, redirects=True):
+        """ Move an existing page to a new location.
+
+        A page cannot be moved to a destination that already exists, is a
+        descendant, or has a protected title (ex.  Special:xxx, User:,
+        Template:).
+
+        When a page is moved, subpages under the specified page are also moved.
+        For each moved page, the system automatically creates an alias page
+        that redirects from the old to the new destination.
+        """
+        self.post(_make_url("pages", _format_page_id(page_id), "move",
+                            to=new_title,
+                            redirects=redirects and "1" or "0"),
+                  "", "text/plain")
+
+class Page:
+    """ A Deki wiki page.
+
+    To obtain a page, call wiki.get_page(id).
+    Attributes:
+        title : unicode - The page title.
+        doc : Document - The content of the page as a DOM Document.
+          The root element of this document is a <body>.
+        path : unicode - The path.  Use this to detect redirects, as otherwise
+          page.save() will overwrite the redirect with a copy of the content!
+        deki : Deki - The Deki object from which the page was loaded.
+        page_id : str/id/Deki.HOME - The page id used to load the page.
+        load_time : datetime - The time the page was loaded,
+          according to the clock on the client machine.
+    Methods:
+        save() - Save the modified document back to the server.
+          Only the page.title and the contents of page.doc are saved.
+    """
+
+    def __init__(self, deki):
+        self.deki = deki
+
+    def _create(self, path, title, doc, overwrite):
+        self.title = title
+        self.doc = doc
+        self.page_id = path
+        if overwrite:
+            self.load_time = datetime(2500, 1, 1)
+        else:
+            self.load_time = datetime(1900, 1, 1)
+        self.path = path
+        self.save()
+
+    def _load(self, page_id):
+        """ page_id - See comment near the definition of `HOME`. """
+        load_time = datetime.utcnow()
+
+        # Getting the title is a whole separate query!
+        url = 'pages/%s/info' % _format_page_id(page_id)
+        doc = self.deki.get_xml(url)
+        title = _text_of(_the_element_by_name(doc, 'title'))
+        path = _text_of(_the_element_by_name(doc, 'path'))
+
+        # If you prefer to sling regexes, you can request format=raw instead.
+        # The result is an XML document with one big fat text node in the body.
+        url = _make_url('pages', _format_page_id(page_id), 'contents',
+                        format='xhtml', mode='edit')
+        doc = self.deki.get_xml(url)
+
+        content = doc.documentElement
+        _check(content.tagName == u'content')
+        body = _first_element(content)
+        _check(body is not None)
+        _check(body.tagName == u'body')
+
+        doc.removeChild(content)
+        doc.appendChild(body)
+
+        self.page_id = page_id
+        self.load_time = load_time
+        self.title = title
+        self.path = path
+        self.doc = doc
+
+    def save(self):
+        p = {'edittime': _urlquote(self.load_time.strftime('%Y%m%d%H%M%S')),
+             'abort': 'modified'}
+
+        if self.title is not None:
+            p['title'] = _urlquote(self.title)
+
+        url = _make_url('pages', _format_page_id(self.page_id), 'contents', **p)
+
+        body = self.doc.documentElement
+        bodyInnerXML = ''.join(n.toxml('utf-8') for n in body.childNodes)
+
+        reply = self.deki.post(url, bodyInnerXML, 'text/plain; charset=utf-8')
+        _check(reply.documentElement.nodeName == u'edit')
+        _check(reply.documentElement.getAttribute(u'status') == u'success')
new file mode 100644
--- /dev/null
+++ b/xpcom/analysis/fix-srcrefs.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+"""
+Fix references to source files of the form [LOCpath]
+so that they are relative to a given source directory.
+"""
+
+import os, sys, re
+
+(srcdir, ) = sys.argv[1:]
+srcdir = os.path.realpath(srcdir)
+
+f = re.compile(r'\[LOC(.*?)\]')
+
+def replacer(m):
+    file = m.group(1)
+    file = os.path.realpath(file)
+    if not file.startswith(srcdir):
+        raise Exception("File %s doesn't start with %s" % (file, srcdir))
+
+    file = file[len(srcdir) + 1:]
+    return file
+
+for line in sys.stdin:
+    line = f.sub(replacer, line)
+    sys.stdout.write(line)
+
new file mode 100644
--- /dev/null
+++ b/xpcom/analysis/type-printer.cpp
@@ -0,0 +1,3 @@
+#include "nsString.h"
+
+/* do nothing else */
new file mode 100644
--- /dev/null
+++ b/xpcom/analysis/type-printer.js
@@ -0,0 +1,359 @@
+let dumpTypes = options['dump-types'].split(',');
+function interestingType(name) dumpTypes.some(function(n) n == name);
+
+let typelist = {};
+
+function addSubtype(t, subt)
+{
+  if (subt.typedef === undefined &&
+      subt.kind === undefined)
+    throw Error("Unexpected subtype: not class or typedef: " + subt);
+
+  if (t.subtypes === undefined)
+    t.subtypes = [];
+  
+  t.subtypes.push(subt);
+}
+
+function process_type(t)
+{
+  let name = t.name;
+  
+  if (interestingType(t.name))
+    typelist[t.name] = t;
+  
+  if (t.memberOf)
+    addSubtype(t.memberOf, t);
+}
+
+function process_decl(d)
+{
+  if (d.typedef !== undefined && d.memberOf)
+    addSubtype(d.memberOf, d);
+}
+
+function publicBases(t)
+{
+  yield t;
+
+  for each (let base in t.bases)
+    if (base.access == "public")
+      for each (let gbase in publicBases(base.type))
+        yield gbase;
+}
+
+function publicMembers(t)
+{
+  for each (let base in publicBases(t)) {
+    for each (let member in base.members) {
+      if (member.access === undefined)
+        throw Error("Harumph: member without access? " + member);
+
+      if (member.access != "public")
+        continue;
+      
+      yield member;
+    }
+  }
+}
+
+function signaturesMatch(m1, m2)
+{
+  let p1 = m1.type.parameters;
+  let p2 = m2.type.parameters;
+  
+  if (p1.length != p2.length)
+    return false;
+  
+  for (let i = 0; i < p1.length; ++i)
+    if (p1[i] !== p2[i])
+      return false;
+  
+  return true;
+}
+
+/**
+ * Get the short name of a decl name. E.g. turn
+ * "MyNamespace::MyClass::Method(int j) const" into
+ * "Method"
+ */
+function getShortName(decl)
+{
+  let name = decl.name;
+  let lp = name.lastIndexOf('(');
+  if (lp != -1)
+    name = name.slice(0, lp);
+  
+  lp = name.lastIndexOf('::');
+  if (lp != -1)
+    name = name.slice(lp + 2);
+
+  return name;
+}
+
+/**
+ * Remove functions in a base class which were overridden in a derived
+ * class.
+ *
+ * Although really, we should perhaps do this the other way around, or even
+ * group the two together, but that can come later.
+ */ 
+function removeOverrides(members)
+{
+  let overrideMap = {};
+  for (let i = members.length - 1; i >= 0; --i) {
+    let m = members[i];
+    if (!m.isFunction)
+      continue;
+
+    let shortName = getShortName(m);
+
+    let overrides = overrideMap[shortName];
+    if (overrides === undefined) {
+      overrideMap[shortName] = [m];
+      continue;
+    }
+
+    let found = false;
+    for each (let override in overrides) {
+      if (signaturesMatch(override, m)) {
+        // remove members[i], it was overridden
+        members.splice(i, 1);
+        found = true;
+      }
+    }
+    if (found)
+      continue;
+         
+    overrides.push(m);
+  }
+}
+
+/**
+ * Generates the starting position of lines within a file.
+ */
+function getLineLocations(fdata)
+{
+  yield 0;
+  
+  let r = /\n/y;
+  let pos = 0;
+  let i = 1;
+  for (;;) {
+    pos = fdata.indexOf('\n', pos) + 1;
+    if (pos == 0)
+      break;
+
+    yield pos;
+    i++;
+  }
+}
+    
+/**
+ * Find and return the doxygen comment immediately prior to the location
+ * object that was passed in.
+ * 
+ * @todo: parse doccomment data such as @param, @returns
+ * @todo: parse comments for markup
+ */
+function getDocComment(loc)
+{
+  let fdata = read_file(loc.file);
+  let linemap = [l for (l in getLineLocations(fdata))];
+  
+  if (loc.line >= linemap.length) {
+    warning("Location larger than actual header: " + loc);
+    return <></>;
+  }
+  
+  let endpos = linemap[loc.line - 1] + loc.column - 1;
+  let semipos = fdata.lastIndexOf(';', endpos);
+  let bracepos = fdata.lastIndexOf('}', endpos);
+  let searchslice = fdata.slice(Math.max(semipos, bracepos) + 1, endpos);
+
+  let m = searchslice.match(/\/\*\*[\s\S]*?\*\//gm);
+  if (m === null)
+    return <></>;
+  
+  let dc = m[m.length - 1].slice(3, -2);
+  dc = dc.replace(/^\s*(\*+[ \t]*)?/gm, "");
+
+  return <pre class="doccomment">{dc}</pre>;
+}
+
+function typeName(t)
+{
+  if (t.name !== undefined)
+    return t.name;
+
+  if (t.isPointer)
+    return "%s%s*".format(t.isConst ? "const " : "", typeName(t.type));
+  
+  if (t.isReference)
+    return "%s%s&".format(t.isConst ? "const " : "", typeName(t.type));
+
+  return t.toString();
+}
+
+function publicBaseList(t)
+{
+  let l = <ul/>;
+  for each (let b in t.bases) {
+    if (b.access == 'public')
+      l.* += <li><a href={"/en/%s".format(b.type.name)}>{b.type.name}</a></li>;
+  }
+
+  if (l.*.length() == 0)
+    return <></>;
+  
+  return <>
+    <h2>Base Classes</h2>
+    {l}
+  </>;
+}
+
+/**
+ * Get a source-link for a given location.
+ */
+function getLocLink(loc, text)
+{
+  return <a class="loc"
+            href={"http://hg.mozilla.org/mozilla-central/file/%s/[LOC%s]#l%i".format(options.rev, loc.file, loc.line)}>{text}</a>;
+}
+
+function dumpType(t)
+{
+  let methodOverview = <tbody />;
+  let methodList = <div/>;
+  let memberList = <></>;
+
+  let shortNameMap = {};
+
+  let members = [m for (m in publicMembers(t))];
+  
+  removeOverrides(members);
+
+  for each (let m in members) {
+    let qname = m.memberOf.name + '::';
+
+    // we don't inherit constructors from base classes
+    if (m.isConstructor && m.memberOf !== t)
+      continue;
+    
+    if (m.name.indexOf(qname) != 0)
+      throw Error("Member name not qualified?");
+    
+    let name = m.name.slice(qname.length);
+    
+    if (name.indexOf('~') == 0)
+      continue;
+
+    if (m.isFunction) {
+      let innerList;
+
+      let shortName = getShortName(m);
+      if (m.isConstructor)
+        shortName = 'Constructors';
+
+      if (shortNameMap.hasOwnProperty(shortName)) {
+        innerList = shortNameMap[shortName];
+      }
+      else {
+        let overview = 
+          <tr><td>
+            <a href={'#%s'.format(escape(shortName))}>{shortName}</a>
+          </td></tr>;
+
+        if (m.isConstructor)
+          methodOverview.insertChildAfter(null, overview);
+        else
+          methodOverview.appendChild(overview);
+        
+        let shortMarkup =
+          <div>
+            <h3 id={shortName}>{shortName}</h3>
+            <dl/>
+          </div>;
+
+        
+        if (m.isConstructor)
+          methodList.insertChildAfter(null, shortMarkup);
+        else
+          methodList.appendChild(shortMarkup);
+
+        innerList = shortMarkup.dl;
+        shortNameMap[shortName] = innerList;
+      }
+      
+      let parameters = <ul/>;
+      for each (p in m.parameters) {
+        let name = p.name;
+        if (name == 'this')
+          continue;
+        
+        if (/^D_\d+$/.test(name))
+          name = '<anonymous>';
+        
+        parameters.* += <li>{typeName(p.type)} {name}</li>;
+      }
+
+      innerList.* +=
+        <>
+          <dt id={name} class="methodName">
+            <code>{typeName(m.type.type)} {name}</code> - {getLocLink(m.loc, "source")}
+          </dt>
+          <dd>
+            {getDocComment(m.loc)}
+            {parameters.*.length() > 0 ?
+             <>
+               <h4>Parameters</h4>
+               {parameters}
+             </> : <></>}
+          </dd>
+        </>;
+    }
+    else {
+      memberList += <li class="member">{name}</li>;
+    }
+  }
+
+  let r =
+    <body>
+      <p>{getLocLink(t.loc, "Class Declaration")}</p>
+  
+      {getDocComment(t.loc)}
+  
+      {methodOverview.*.length() > 0 ?
+         <>
+           <h2>Method Overview</h2>
+           <table class="standard-table">{methodOverview}</table>
+         </> :
+         ""
+      }
+
+      {publicBaseList(t)}
+  
+      <h2>Data Members</h2>
+
+      {memberList.*.length() > 0 ?
+         memberList :
+         <p><em>No public members.</em></p>
+      }
+
+      <h2>Methods</h2>
+  
+      {methodList.*.length() > 0 ?
+         methodList :
+         <p><em>No public methods.</em></p>
+      }
+  
+    </body>;
+
+  write_file(t.name + ".html", r.toXMLString());
+}
+
+function input_end()
+{
+  for (let p in typelist)
+    dumpType(typelist[p]);
+}