Bug 1296503 - Switch config.status to unicode literals. r=ted
authorMike Hommey <mh+mozilla@glandium.org>
Thu, 18 Aug 2016 18:27:39 +0900
changeset 310822 ef5d7142aed05c266b316383c771270ab023bfeb
parent 310821 2dd83ba657140b571e95e7f3768a3f3a0887237a
child 310823 95b9f4f55e5b05ce5ed4bec69e6bf42b2ec0e4f1
push id80972
push usermh@glandium.org
push dateTue, 23 Aug 2016 22:33:36 +0000
treeherdermozilla-inbound@95b9f4f55e5b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersted
bugs1296503
milestone51.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1296503 - Switch config.status to unicode literals. r=ted Ironically, the first thing we do with those unicode literals is convert them to byte strings because the build backends don't like them yet.
configure.py
python/mozbuild/mozbuild/util.py
--- a/configure.py
+++ b/configure.py
@@ -1,26 +1,28 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import print_function, unicode_literals
 
 import codecs
-import json
 import os
 import subprocess
 import sys
-
-from collections import Iterable
+import textwrap
 
 
 base_dir = os.path.abspath(os.path.dirname(__file__))
 sys.path.insert(0, os.path.join(base_dir, 'python', 'mozbuild'))
 from mozbuild.configure import ConfigureSandbox
+from mozbuild.util import (
+    indented_repr,
+    encode,
+)
 
 
 def main(argv):
     config = {}
     sandbox = ConfigureSandbox(config, os.environ, argv)
     sandbox.run(os.path.join(os.path.dirname(__file__), 'moz.configure'))
 
     if sandbox._help:
@@ -55,64 +57,51 @@ def config_status(config):
     sanitized_config['mozconfig'] = config.get('MOZCONFIG')
 
     # Create config.status. Eventually, we'll want to just do the work it does
     # here, when we're able to skip configure tests/use cached results/not rely
     # on autoconf.
     print("Creating config.status", file=sys.stderr)
     encoding = 'mbcs' if sys.platform == 'win32' else 'utf-8'
     with codecs.open('config.status', 'w', encoding) as fh:
-        fh.write('#!%s\n' % config['PYTHON'])
-        fh.write('# coding=%s\n' % encoding)
-        # Because we're serializing as JSON but reading as python, the values
-        # for True, False and None are true, false and null, which don't exist.
-        # Define them.
-        fh.write('true, false, null = True, False, None\n')
+        fh.write(textwrap.dedent('''\
+            #!%(python)s
+            # coding=%(encoding)s
+            from __future__ import unicode_literals
+            from mozbuild.util import encode
+            encoding = '%(encoding)s'
+        ''') % {'python': config['PYTHON'], 'encoding': encoding})
+        # A lot of the build backend code is currently expecting byte
+        # strings and breaks in subtle ways with unicode strings. (bug 1296508)
         for k, v in sanitized_config.iteritems():
-            fh.write('%s = ' % k)
-            json.dump(v, fh, sort_keys=True, indent=4, ensure_ascii=False)
-            fh.write('\n')
+            fh.write('%s = encode(%s, encoding)\n' % (k, indented_repr(v)))
         fh.write("__all__ = ['topobjdir', 'topsrcdir', 'defines', "
                  "'non_global_defines', 'substs', 'mozconfig']")
 
         if config.get('MOZ_BUILD_APP') != 'js' or config.get('JS_STANDALONE'):
-            fh.write('''
-if __name__ == '__main__':
-    args = dict([(name, globals()[name]) for name in __all__])
-    from mozbuild.config_status import config_status
-    config_status(**args)
-''')
-
-    # Running config.status standalone uses byte literals for all the config,
-    # instead of the unicode literals we have in sanitized_config right now.
-    # Some values in sanitized_config also have more complex types, such as
-    # EnumString, which using when calling config_status would currently break
-    # the build, as well as making it inconsistent with re-running
-    # config.status. Fortunately, EnumString derives from unicode, so it's
-    # covered by converting unicode strings.
-    # Moreover, a lot of the build backend code is currently expecting byte
-    # strings and breaks in subtle ways with unicode strings.
-    def encode(v):
-        if isinstance(v, dict):
-            return {
-                encode(k): encode(val)
-                for k, val in v.iteritems()
-            }
-        if isinstance(v, str):
-            return v
-        if isinstance(v, unicode):
-            return v.encode(encoding)
-        if isinstance(v, Iterable):
-            return [encode(i) for i in v]
-        return v
+            fh.write(textwrap.dedent('''
+                if __name__ == '__main__':
+                    from mozbuild.config_status import config_status
+                    args = dict([(name, globals()[name]) for name in __all__])
+                    config_status(**args)
+            '''))
 
     # Other things than us are going to run this file, so we need to give it
     # executable permissions.
     os.chmod('config.status', 0o755)
     if config.get('MOZ_BUILD_APP') != 'js' or config.get('JS_STANDALONE'):
         os.environ[b'WRITE_MOZINFO'] = b'1'
         from mozbuild.config_status import config_status
-        return config_status(args=[], **encode(sanitized_config))
+
+        # Some values in sanitized_config also have more complex types, such as
+        # EnumString, which using when calling config_status would currently
+        # break the build, as well as making it inconsistent with re-running
+        # config.status. Fortunately, EnumString derives from unicode, so it's
+        # covered by converting unicode strings.
+
+        # A lot of the build backend code is currently expecting byte strings
+        # and breaks in subtle ways with unicode strings.
+        return config_status(args=[], **encode(sanitized_config, encoding))
     return 0
 
 
 if __name__ == '__main__':
     sys.exit(main(sys.argv))
--- a/python/mozbuild/mozbuild/util.py
+++ b/python/mozbuild/mozbuild/util.py
@@ -19,16 +19,17 @@ import os
 import re
 import stat
 import sys
 import time
 import types
 
 from collections import (
     defaultdict,
+    Iterable,
     OrderedDict,
 )
 from io import (
     StringIO,
     BytesIO,
 )
 
 
@@ -1239,8 +1240,24 @@ def indented_repr(o, indent=4):
                 for d in recurse_indented_repr(i, level + 1):
                     yield d
                 yield ',\n'
             yield one_indent * level
             yield ']'
         else:
             yield repr(o)
     return ''.join(recurse_indented_repr(o, 0))
+
+
+def encode(obj, encoding='utf-8'):
+    '''Recursively encode unicode strings with the given encoding.'''
+    if isinstance(obj, dict):
+        return {
+            encode(k, encoding): encode(v, encoding)
+            for k, v in obj.iteritems()
+        }
+    if isinstance(obj, bytes):
+        return obj
+    if isinstance(obj, unicode):
+        return obj.encode(encoding)
+    if isinstance(obj, Iterable):
+        return [encode(i, encoding) for i in obj]
+    return obj