Bug 1296503 - Switch config.status to unicode literals. r?ted draft
authorMike Hommey <mh+mozilla@glandium.org>
Thu, 18 Aug 2016 18:27:39 +0900
changeset 403122 702decef49d67294410e42fb4952b31f23692633
parent 403121 a96ad441517cc6502e6f6fa06814521cab3b48a8
child 528832 bc7cf25cb14fe97356004e64e71c67737f244850
push id26832
push userbmo:mh+mozilla@glandium.org
push dateFri, 19 Aug 2016 05:18:13 +0000
reviewersted
bugs1296503
milestone51.0a1
Bug 1296503 - Switch config.status to unicode literals. r?ted
configure.py
python/mozbuild/mozbuild/util.py
--- a/configure.py
+++ b/configure.py
@@ -1,24 +1,28 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import print_function, unicode_literals
 
 import codecs
-import json
 import os
 import subprocess
 import sys
+import textwrap
 
 
 base_dir = os.path.abspath(os.path.dirname(__file__))
 sys.path.insert(0, os.path.join(base_dir, 'python', 'mozbuild'))
 from mozbuild.configure import ConfigureSandbox
+from mozbuild.util import (
+    indented_repr,
+    encode,
+)
 
 
 def main(argv):
     config = {}
     sandbox = ConfigureSandbox(config, os.environ, argv)
     sandbox.run(os.path.join(os.path.dirname(__file__), 'moz.configure'))
 
     if sandbox._help:
@@ -55,62 +59,49 @@ def config_status(config):
     # Create config.status. Eventually, we'll want to just do the work it does
     # here, when we're able to skip configure tests/use cached results/not rely
     # on autoconf.
     print("Creating config.status", file=sys.stderr)
     encoding = 'mbcs' if sys.platform == 'win32' else 'utf-8'
     with codecs.open('config.status', 'w', encoding) as fh:
         fh.write('#!%s\n' % config['PYTHON'])
         fh.write('# coding=%s\n' % encoding)
-        # Because we're serializing as JSON but reading as python, the values
-        # for True, False and None are true, false and null, which don't exist.
-        # Define them.
-        fh.write('true, false, null = True, False, None\n')
+        fh.write('from __future__ import unicode_literals\n')
         for k, v in sanitized_config.iteritems():
-            fh.write('%s = ' % k)
-            json.dump(v, fh, sort_keys=True, indent=4, ensure_ascii=False)
-            fh.write('\n')
+            fh.write('%s = %s\n' % (k, indented_repr(v)))
         fh.write("__all__ = ['topobjdir', 'topsrcdir', 'defines', "
                  "'non_global_defines', 'substs', 'mozconfig']")
 
         if config.get('MOZ_BUILD_APP') != 'js' or config.get('JS_STANDALONE'):
-            fh.write('''
-if __name__ == '__main__':
-    args = dict([(name, globals()[name]) for name in __all__])
-    from mozbuild.config_status import config_status
-    config_status(**args)
-''')
+            fh.write(textwrap.dedent('''
+                # A lot of the build backend code is currently expecting byte
+                # strings and breaks in subtle ways with unicode strings.
+                encoding = '%s'
 
-    # Running config.status standalone uses byte literals for all the config,
-    # instead of the unicode literals we have in sanitized_config right now.
-    # Some values in sanitized_config also have more complex types, such as
-    # EnumString, which using when calling config_status would currently break
-    # the build, as well as making it inconsistent with re-running
-    # config.status. Fortunately, EnumString derives from unicode, so it's
-    # covered by converting unicode strings.
-    # Moreover, a lot of the build backend code is currently expecting byte
-    # strings and breaks in subtle ways with unicode strings.
-    def encode(v):
-        if isinstance(v, dict):
-            return {
-                encode(k): encode(val)
-                for k, val in v.iteritems()
-            }
-        if isinstance(v, str):
-            return v
-        if isinstance(v, unicode):
-            return v.encode(encoding)
-        if hasattr(v, '__iter__'):
-            return [encode(i) for i in v]
-        return v
+                if __name__ == '__main__':
+                    args = dict([(name, encode(globals()[name], encoding))
+                                for name in __all__])
+                    from mozbuild.config_status import config_status
+                    config_status(**args)
+            ''' % encoding))
+
 
     # Other things than us are going to run this file, so we need to give it
     # executable permissions.
     os.chmod('config.status', 0o755)
     if config.get('MOZ_BUILD_APP') != 'js' or config.get('JS_STANDALONE'):
         os.environ[b'WRITE_MOZINFO'] = b'1'
         from mozbuild.config_status import config_status
-        return config_status(args=[], **encode(sanitized_config))
+
+        # Some values in sanitized_config also have more complex types, such as
+        # EnumString, which using when calling config_status would currently
+        # break the build, as well as making it inconsistent with re-running
+        # config.status. Fortunately, EnumString derives from unicode, so it's
+        # covered by converting unicode strings.
+
+        # A lot of the build backend code is currently expecting byte strings
+        # and breaks in subtle ways with unicode strings.
+        return config_status(args=[], **encode(sanitized_config, encoding))
     return 0
 
 
 if __name__ == '__main__':
     sys.exit(main(sys.argv))
--- a/python/mozbuild/mozbuild/util.py
+++ b/python/mozbuild/mozbuild/util.py
@@ -1239,8 +1239,24 @@ def indented_repr(o, indent=4):
                 for d in recurse_indented_repr(i, level + 1):
                     yield d
                 yield ',\n'
             yield one_indent * level
             yield ']'
         else:
             yield repr(o)
     return ''.join(recurse_indented_repr(o, 0))
+
+
+def encode(obj, encoding='utf-8'):
+    '''Recursively encode unicode strings with the given encoding.'''
+    if isinstance(obj, dict):
+        return {
+            encode(k, encoding): encode(v, encoding)
+            for k, v in obj.iteritems()
+        }
+    if isinstance(obj, str):
+        return obj
+    if isinstance(obj, unicode):
+        return obj.encode(encoding)
+    if hasattr(obj, '__iter__'):
+        return [encode(i, encoding) for i in obj]
+    return obj