Bug 1139066 part 2: Add support to download configs from git remotes.
authorWander Lairson Costa <wcosta@mozilla.com>
Wed, 11 Mar 2015 13:40:52 -0300 (2015-03-11)
changeset 3742 14efdd6647b528075dc7cf8ea9366223238da47d
parent 3741 6b7d8d086e8a54c9be63de8e5922c6bfbf8e3a92
child 3743 8dc154c5a136771ba3c1b2f4ddc2779399740161
child 3744 27b7e35e1995ac70c3853b4ac88fd81792923d58
push id2944
push usersfink@mozilla.com
push dateWed, 11 Mar 2015 17:12:54 +0000 (2015-03-11)
bugs1139066
Bug 1139066 part 2: Add support to download configs from git remotes. Currently, we are able to download configs for phones only from mercurial remotes. This is awkward because it prevents us from testing patches in our private github repositories. We now detect if the remote is a git or mercurial repository. Bear in mind that git does not support downloads of individual files, so each git provider has its own method for doing that. wE currently support git.mozilla.org and github.com.
external_tools/__init__.py
external_tools/detect_repo.py
mozharness/mozilla/building/buildb2gbase.py
new file mode 100644
new file mode 100644
--- /dev/null
+++ b/external_tools/detect_repo.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+# Stolen from taskcluster-vcs
+# https://github.com/taskcluster/taskcluster-vcs/blob/master/src/vcs/detect_remote.js
+
+from urllib2 import Request, urlopen
+from urlparse import urlsplit, urlunsplit
+from os.path import exists, join
+
+def first(seq):
+    return next(iter(filter(lambda x: x, seq)), '')
+
+def all_first(*sequences):
+    return map(lambda x: first(x), sequences)
+
+# http://codereview.stackexchange.com/questions/13027/joining-url-path-components-intelligently
+# I wonder why this is not a builtin feature in Python
+def urljoin(*parts):
+    schemes, netlocs, paths, queries, fragments = zip(*(urlsplit(part) for part in parts))
+    scheme, netloc, query, fragment = all_first(schemes, netlocs, queries, fragments)
+    path = '/'.join(p.strip('/') for p in paths if p)
+    return urlunsplit((scheme, netloc, path, query, fragment))
+
+def _detect_remote(url, content):
+    try:
+        response = urlopen(url)
+    except Exception:
+        return False
+
+    if response.getcode() != 200:
+        return False
+
+    content_type = response.headers.get('content-type', '')
+    return True if content in content_type else False
+
+def detect_git(url):
+    location = urljoin(url, '/info/refs?service=git-upload-pack')
+    req = Request(location, headers={'User-Agent':'git/2.0.1'})
+    return _detect_remote(req, 'x-git')
+
+def detect_hg(url):
+    location = urljoin(url, '?cmd=lookup&key=0')
+    return _detect_remote(location, 'mercurial')
+
+def detect_local(url):
+    if exists(join(url, '.git')):
+        return 'git'
+
+    if exists(join(url, '.hg')):
+        return 'hg'
+
+    return ''
+
--- a/mozharness/mozilla/building/buildb2gbase.py
+++ b/mozharness/mozilla/building/buildb2gbase.py
@@ -9,16 +9,18 @@
 provides a base class for b2g builds
 
 """
 import os
 import functools
 import time
 import random
 import urlparse
+import os.path
+from external_tools.detect_repo import detect_git, detect_hg, detect_local
 
 try:
     import simplejson as json
     assert json
 except ImportError:
     import json
 
 from mozharness.base.errors import MakefileErrorList
@@ -160,52 +162,67 @@ class B2GBuildBaseScript(BuildbotMixin, 
             return self.config['repo']
 
     def query_revision(self):
         if 'revision' in self.buildbot_properties:
             revision = self.buildbot_properties['revision']
         elif self.buildbot_config and 'sourcestamp' in self.buildbot_config:
             revision = self.buildbot_config['sourcestamp']['revision']
         else:
+            dirs = self.query_abs_dirs()
+            repo = dirs['gecko_src']
+            repo_type = detect_local(repo)
             # Look at what we have checked out
-            dirs = self.query_abs_dirs()
-            hg = self.query_exe('hg', return_type='list')
-            revision = self.get_output_from_command(
-                hg + ['parent', '--template', '{node|short}'], cwd=dirs['gecko_src']
-            )
-
+            if repo_type == 'hg':
+                hg = self.query_exe('hg', return_type='list')
+                revision = self.get_output_from_command(
+                    hg + ['parent', '--template', '{node|short}'], cwd=repo
+                )
+            elif repo_type == 'git':
+                git = self.query_exe('git', return_type='list')
+                revision = self.get_output_from_command(
+                    git + ['rev-parse', 'HEAD'], cwd=repo
+                )
+            else:
+                return None
         return revision[0:12] if revision else None
 
     def query_gecko_config_path(self):
         conf_file = self.config.get('gecko_config')
         if conf_file is None:
             conf_file = os.path.join(
                 'b2g', 'config',
                 self.config.get('b2g_config_dir', self.config['target']),
                 'config.json'
             )
         return conf_file
 
     def query_remote_gecko_config(self):
         repo = self.query_repo()
-        # TODO: Hardcoding this sucks
-        if 'hg.mozilla.org' in repo:
+        if os.path.exists(repo):
+            config_path = self.query_gecko_config_path()
+            config_path = "{repo}/{config_path}".format(repo=repo, config_path=config_path)
+            return json.load(open(config_path, "r"))
+        elif detect_hg(repo):
             rev = self.query_revision()
             if rev is None:
                 rev = 'default'
 
             config_path = self.query_gecko_config_path()
             # Handle local files vs. in-repo files
             url = self.query_hgweb_url(repo, rev, config_path)
             return self.retry(self.load_json_from_url, args=(url,))
-        else:
-            # assume it is a local path
+        elif detect_git(repo):
+            rev = self.query_revision()
+            if rev is None:
+                rev = 'HEAD'
+
             config_path = self.query_gecko_config_path()
-            config_path = "{repo}/{config_path}".format(repo=repo, config_path=config_path)
-            return json.load(open(config_path, "r"))
+            url = self.query_gitweb_url(repo, rev, config_path)
+            return self.retry(self.load_json_from_url, args=(url,))
 
     def load_gecko_config(self):
         if self.gecko_config:
             return self.gecko_config
 
         gecko_config = self._load_gecko_config()
 
         # Set up mock immediately so any later run_command_m doesn't end up
@@ -268,33 +285,47 @@ class B2GBuildBaseScript(BuildbotMixin, 
                 filename=filename)
         else:
             url = "{baseurl}/rev/{rev}".format(
                 baseurl=repo,
                 rev=rev)
         return url
 
     def query_gitweb_url(self, repo, rev, filename=None):
-        bits = urlparse.urlparse(repo)
-        repo = bits.path.lstrip('/')
-        if filename:
-            url = "{scheme}://{host}/?p={repo};a=blob;f={filename};h={rev}".format(
-                scheme=bits.scheme,
-                host=bits.netloc,
-                repo=repo,
-                filename=filename,
-                rev=rev)
+        # Git does not support raw files download, so each git
+        # provider has its own way to make that possible
+        if 'github.com' in repo:
+            if filename:
+                url = '{repo}/raw/{rev}/{filename}'.format(
+                        repo=repo,
+                        rev=rev,
+                        filename=filename)
+            else:
+                url = '{repo}/raw/{rev}'.format(
+                        repo=repo,
+                        rev=rev)
         else:
-            url = "{scheme}://{host}/?p={repo};a=tree;h={rev}".format(
-                scheme=bits.scheme,
-                host=bits.netloc,
-                repo=repo,
-                rev=rev)
+            bits = urlparse.urlparse(repo)
+            repo = bits.path.lstrip('/')
+            if filename:
+                url = "{scheme}://{host}/?p={repo};a=blob_plain;f={filename};hb={rev}".format(
+                    scheme=bits.scheme,
+                    host=bits.netloc,
+                    repo=repo,
+                    filename=filename,
+                    rev=rev)
+            else:
+                url = "{scheme}://{host}/?p={repo};a=tree;h={rev}".format(
+                    scheme=bits.scheme,
+                    host=bits.netloc,
+                    repo=repo,
+                    rev=rev)
         return url
 
+
     # Actions {{{2
     def checkout_tools(self):
         dirs = self.query_abs_dirs()
 
         # We need hg.m.o/build/tools checked out
         self.info("Checking out tools")
         repos = [{
             'repo': self.config['tools_repo'],