servo: Merge #12781 - Improve tidy's license validation logic (from UK992:tidycheck-rebased); r=Wafflespeanut
authorUK992 <urbankrajnc92@gmail.com>
Sun, 14 Aug 2016 09:17:34 -0500
changeset 339493 52b556a50b68405d0aef78c35039d23bffe35b42
parent 339492 94f43f3cbfed1b54c82daefbff718dfc48052735
child 339494 52d83fdbed019a820f1be570f2b0fda3aab53817
push id31307
push usergszorc@mozilla.com
push dateSat, 04 Feb 2017 00:59:06 +0000
treeherdermozilla-central@94079d43835f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersWafflespeanut
servo: Merge #12781 - Improve tidy's license validation logic (from UK992:tidycheck-rebased); r=Wafflespeanut Rebased and fixed https://github.com/servo/servo/pull/10721, which is inactive for months. Fixes https://github.com/servo/servo/issues/10716 r? @larsbergstrom or @edunham Source-Repo: https://github.com/servo/servo Source-Revision: 8419f96dc0cb79c83922b62166390f6688f2aae0
servo/python/requirements.txt
servo/python/tidy/HISTORY.rst
servo/python/tidy/servo_tidy/licenseck.py
servo/python/tidy/servo_tidy/tidy.py
servo/python/tidy/servo_tidy_tests/apache2_license.rs
servo/python/tidy/servo_tidy_tests/shebang_license.py
servo/python/tidy/servo_tidy_tests/test_tidy.py
servo/python/tidy/setup.py
servo/support/rust-task_info/Cargo.toml
--- a/servo/python/requirements.txt
+++ b/servo/python/requirements.txt
@@ -9,9 +9,12 @@ toml == 0.9.1
 # For Python linting
 flake8 == 2.4.1
 pep8 == 1.5.7
 pyflakes == 0.8.1
 
 # For test-webidl
 ply == 3.8
 
+# For Cross-platform colored terminal text
+colorama == 0.3.7
+
 -e python/tidy
--- a/servo/python/tidy/HISTORY.rst
+++ b/servo/python/tidy/HISTORY.rst
@@ -1,10 +1,21 @@
 Release History
 ---------------
 
+0.1.0 (2016-08-09)
+++++++++++++++++++
+
+- Improve license checking to disregard comments and line breaks
+- License checking verifies that COPYRIGHT is specified when apache2 is used
+
+0.0.3 (2016-04-19)
+++++++++++++++++++
+
+- Add alternate wording of apache2 license
+
 0.0.2 (2016-04-17)
 ++++++++++++++++++
 - Cleanup Tidy to work on external deps
 
 0.0.1 (2016-04-12)
 ++++++++++++++++++
 - Package Tidy
--- a/servo/python/tidy/servo_tidy/licenseck.py
+++ b/servo/python/tidy/servo_tidy/licenseck.py
@@ -2,107 +2,34 @@
 # file at the top-level directory of this distribution.
 #
 # Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 # http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 # <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 # option. This file may not be copied, modified, or distributed
 # except according to those terms.
 
-
-# These licenses are valid for use in Servo
-licenses = [
-
-"""\
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-""",
-
-"""\
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-""",
-
-"""\
-#!/usr/bin/env bash
-
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-""",
-
-"""\
-#!/usr/bin/env python
-
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-""",
-
-"""\
-#!/usr/bin/env python3
-
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-""",
-
-"""\
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at http://mozilla.org/MPL/2.0/.
-""",
+MPL = """\
+This Source Code Form is subject to the terms of the Mozilla Public \
+License, v. 2.0. If a copy of the MPL was not distributed with this \
+file, You can obtain one at http://mozilla.org/MPL/2.0/.\
+"""
 
-"""\
-// Copyright 2013 The Servo Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-""",
-
-"""\
-# Copyright 2013 The Servo Project Developers. See the COPYRIGHT
-# file at the top-level directory of this distribution.
-#
-# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-# option. This file may not be copied, modified, or distributed
-# except according to those terms.
-""",
+APACHE = """\
+Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or \
+http://www.apache.org/licenses/LICENSE-2.0> or the MIT license \
+<LICENSE-MIT or http://opensource.org/licenses/MIT>, at your \
+option. This file may not be copied, modified, or distributed \
+except according to those terms.\
+"""
 
-"""\
-// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-""",
-
-"""\
-// Copyright 2012-2014 The Rust Project Developers.
-// See http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-""",
-]  # noqa: Indicate to flake8 that we do not want to check indentation here
+COPYRIGHT = [
+    "See the COPYRIGHT file at the top-level directory of this distribution",
+    "See http://rust-lang.org/COPYRIGHT",
+]
 
 # The valid licenses, in the form we'd expect to see them in a Cargo.toml file.
 licenses_toml = [
     'license = "MPL-2.0"',
     'license = "MIT/Apache-2.0"',
 ]
 
 # The valid dependency licenses, in the form we'd expect to see them in a Cargo.toml file.
--- a/servo/python/tidy/servo_tidy/tidy.py
+++ b/servo/python/tidy/servo_tidy/tidy.py
@@ -12,22 +12,20 @@ import fnmatch
 import itertools
 import json
 import os
 import re
 import site
 import StringIO
 import subprocess
 import sys
-from licenseck import licenses, licenses_toml, licenses_dep_toml
+from licenseck import MPL, APACHE, COPYRIGHT, licenses_toml, licenses_dep_toml
+import colorama
 
-# License and header checks
-EMACS_HEADER = "/* -*- Mode:"
-VIM_HEADER = "/* vim:"
-MAX_LICENSE_LINESPAN = max(len(license.splitlines()) for license in licenses)
+COMMENTS = ["// ", "# ", " *", "/* "]
 
 # File patterns to include in the non-WPT tidy check.
 FILE_PATTERNS_TO_CHECK = ["*.rs", "*.rc", "*.cpp", "*.c",
                           "*.h", "Cargo.lock", "*.py", "*.sh",
                           "*.toml", "*.webidl", "*.json", "*.html"]
 
 # File patterns that are ignored for all tidy and lint checks.
 FILE_PATTERNS_TO_IGNORE = ["*.#*", "*.pyc"]
@@ -55,17 +53,16 @@ IGNORED_FILES = [
     # Hidden files
     os.path.join(".", "."),
 ]
 
 # Directories that are ignored for the non-WPT tidy check.
 IGNORED_DIRS = [
     # Upstream
     os.path.join(".", "support", "android", "apk"),
-    os.path.join(".", "support", "rust-task_info"),
     os.path.join(".", "tests", "wpt", "css-tests"),
     os.path.join(".", "tests", "wpt", "harness"),
     os.path.join(".", "tests", "wpt", "update"),
     os.path.join(".", "tests", "wpt", "web-platform-tests"),
     os.path.join(".", "tests", "wpt", "mozilla", "tests", "mozilla", "referrer-policy"),
     os.path.join(".", "tests", "wpt", "sync"),
     os.path.join(".", "tests", "wpt", "sync_css"),
     os.path.join(".", "python", "mach"),
@@ -147,23 +144,55 @@ def filter_files(start_dir, only_changed
         base_name = os.path.basename(file_name)
         if not any(fnmatch.fnmatch(base_name, pattern) for pattern in FILE_PATTERNS_TO_CHECK):
             continue
         if not filter_file(file_name):
             continue
         yield file_name
 
 
+def uncomment(line):
+    for c in COMMENTS:
+        if line.startswith(c):
+            if line.endswith("*/"):
+                return line[len(c):(len(line) - 3)].strip()
+            return line[len(c):].strip()
+
+
+def licensed_mpl(header):
+    return MPL in header
+
+
+def licensed_apache(header):
+    if APACHE in header:
+        return any(c in header for c in COPYRIGHT)
+
+
 def check_license(file_name, lines):
     if any(file_name.endswith(ext) for ext in (".toml", ".lock", ".json", ".html")):
         raise StopIteration
-    while lines and (lines[0].startswith(EMACS_HEADER) or lines[0].startswith(VIM_HEADER)):
-        lines = lines[1:]
-    contents = "".join(lines[:MAX_LICENSE_LINESPAN])
-    valid_license = any(contents.startswith(license) for license in licenses)
+
+    if lines[0].startswith("#!") and lines[1].strip():
+        yield (1, "missing blank line after shebang")
+
+    blank_lines = 0
+    max_blank_lines = 2 if lines[0].startswith("#!") else 1
+    license_block = []
+
+    for l in lines:
+        l = l.rstrip('\n')
+        if not l.strip():
+            blank_lines += 1
+        if blank_lines >= max_blank_lines:
+            break
+        line = uncomment(l)
+        if line is not None:
+            license_block.append(line)
+    contents = " ".join(license_block)
+    valid_license = licensed_mpl(contents) or licensed_apache(contents)
     acknowledged_bad_license = "xfail-license" in contents
     if not (valid_license or acknowledged_bad_license):
         yield (1, "incorrect license")
 
 
 def check_modeline(file_name, lines):
     for idx, line in enumerate(lines[:5]):
         if re.search('^.*[ \t](vi:|vim:|ex:)[ \t]', line):
@@ -305,18 +334,18 @@ duplicate versions for package "{package
 
 def check_toml(file_name, lines):
     if not file_name.endswith(".toml"):
         raise StopIteration
     ok_licensed = False
     for idx, line in enumerate(lines):
         if line.find("*") != -1:
             yield (idx + 1, "found asterisk instead of minimum version number")
-        for license in licenses_toml:
-            ok_licensed |= (license in line)
+        for license_line in licenses_toml:
+            ok_licensed |= (license_line in line)
     if not ok_licensed:
         yield (0, ".toml file should contain a valid license.")
 
 
 def check_shell(file_name, lines):
     if not file_name.endswith(".sh"):
         raise StopIteration
 
@@ -757,13 +786,15 @@ def scan(only_changed_files=False, progr
     # check dependecy licenses
     dep_license_errors = check_dep_license_errors(get_dep_toml_files(only_changed_files), progress)
     # wpt lint checks
     wpt_lint_errors = check_wpt_lint_errors(get_wpt_files(only_changed_files, progress))
     # collect errors
     errors = itertools.chain(errors, dep_license_errors, wpt_lint_errors)
     error = None
     for error in errors:
+        colorama.init()
         print "\r\033[94m{}\033[0m:\033[93m{}\033[0m: \033[91m{}\033[0m".format(*error)
     print
     if error is None:
+        colorama.init()
         print "\033[92mtidy reported no errors.\033[0m"
     return int(error is not None)
new file mode 100644
--- /dev/null
+++ b/servo/python/tidy/servo_tidy_tests/apache2_license.rs
@@ -0,0 +1,5 @@
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
new file mode 100644
--- /dev/null
+++ b/servo/python/tidy/servo_tidy_tests/shebang_license.py
@@ -0,0 +1,4 @@
+#!/usr/bin/env python
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
--- a/servo/python/tidy/servo_tidy_tests/test_tidy.py
+++ b/servo/python/tidy/servo_tidy_tests/test_tidy.py
@@ -43,26 +43,35 @@ class CheckTidiness(unittest.TestCase):
         self.assertTrue('links to WHATWG single-page url, change to multi page:' in errors.next()[2])
         self.assertNoMoreErrors(errors)
 
     def test_licence(self):
         errors = tidy.collect_errors_for_files(iterFile('incorrect_license.rs'), [], [tidy.check_license], print_text=False)
         self.assertEqual('incorrect license', errors.next()[2])
         self.assertNoMoreErrors(errors)
 
+    def test_shebang_license(self):
+        errors = tidy.collect_errors_for_files(iterFile('shebang_license.py'), [], [tidy.check_license], print_text=False)
+        self.assertEqual('missing blank line after shebang', errors.next()[2])
+        self.assertNoMoreErrors(errors)
+
     def test_shell(self):
         errors = tidy.collect_errors_for_files(iterFile('shell_tidy.sh'), [], [tidy.check_shell], print_text=False)
         self.assertEqual('script does not have shebang "#!/usr/bin/env bash"', errors.next()[2])
         self.assertEqual('script is missing options "set -o errexit", "set -o pipefail"', errors.next()[2])
         self.assertEqual('script should not use backticks for command substitution', errors.next()[2])
         self.assertEqual('variable substitutions should use the full \"${VAR}\" form', errors.next()[2])
         self.assertEqual('script should use `[[` instead of `[` for conditional testing', errors.next()[2])
         self.assertEqual('script should use `[[` instead of `[` for conditional testing', errors.next()[2])
         self.assertNoMoreErrors(errors)
 
+    def test_apache2_incomplete(self):
+        errors = tidy.collect_errors_for_files(iterFile('apache2_license.rs'), [], [tidy.check_license])
+        self.assertEqual('incorrect license', errors.next()[2])
+
     def test_rust(self):
         errors = tidy.collect_errors_for_files(iterFile('rust_tidy.rs'), [], [tidy.check_rust], print_text=False)
         self.assertEqual('use statement spans multiple lines', errors.next()[2])
         self.assertEqual('missing space before }', errors.next()[2])
         self.assertTrue('use statement is not in alphabetical order' in errors.next()[2])
         self.assertEqual('use statement contains braces for single import', errors.next()[2])
         self.assertEqual('encountered whitespace following a use statement', errors.next()[2])
         self.assertTrue('mod declaration is not in alphabetical order' in errors.next()[2])
--- a/servo/python/tidy/setup.py
+++ b/servo/python/tidy/setup.py
@@ -6,21 +6,22 @@
 # <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 # option. This file may not be copied, modified, or distributed
 # except according to those terms.
 
 import os
 from setuptools import setup, find_packages
 
 
-VERSION = '0.0.3'
+VERSION = '0.1.0'
 
 install_requires = [
     "flake8==2.4.1",
     "toml==0.9.1",
+    "colorama==0.3.7",
 ]
 
 here = os.path.dirname(os.path.abspath(__file__))
 # get documentation from the README and HISTORY
 try:
     with open(os.path.join(here, 'README.rst')) as doc:
         readme = doc.read()
 except:
@@ -46,12 +47,12 @@ if __name__ == '__main__':
         author_email='dev-servo@lists.mozilla.org',
         url='https://github.com/servo/servo',
         packages=find_packages(exclude=['ez_setup', 'examples', 'tests']),
         package_data={},
         install_requires=install_requires,
         zip_safe=False,
         entry_points={
             'console_scripts': [
-                'servo-tidy=servo_tidy.tidy:scan'
+                'servo-tidy=servo_tidy.tidy:scan',
             ],
         },
     )
--- a/servo/support/rust-task_info/Cargo.toml
+++ b/servo/support/rust-task_info/Cargo.toml
@@ -1,10 +1,11 @@
 [package]
 
 name = "task_info"
 version = "0.0.1"
 authors = ["The Servo Project Developers"]
+license = "MPL-2.0"
 
 build = "build.rs"
 
 [build-dependencies]
 gcc = "0.3.4"