Bug 1433873 - Fix race condition in Marionette for quit and restart. draft
authorHenrik Skupin <mail@hskupin.info>
Mon, 29 Jan 2018 15:11:18 +0100
changeset 789173 ce0932135585b08cb2dee9f10182257a21d9e246
parent 788735 63a0e2f626febb98d87d2543955ab99a653654ff
child 789174 3731da8f5df18648885f00b8d80e541b9babe835
push id108209
push userbmo:hskupin@gmail.com
push dateFri, 27 Apr 2018 18:45:26 +0000
bugs1433873
milestone61.0a1
Bug 1433873 - Fix race condition in Marionette for quit and restart. MozReview-Commit-ID: CvMkGW3gyXe
testing/marionette/client/marionette_driver/decorators.py
testing/marionette/client/marionette_driver/marionette.py
testing/marionette/harness/marionette_harness/tests/unit/test_quit_restart.py
--- a/testing/marionette/client/marionette_driver/decorators.py
+++ b/testing/marionette/client/marionette_driver/decorators.py
@@ -20,19 +20,24 @@ def _find_marionette_in_args(*args, **kw
 
 def do_process_check(func):
     """Decorator which checks the process status after the function has run."""
     @wraps(func)
     def _(*args, **kwargs):
         try:
             return func(*args, **kwargs)
         except (socket.error, socket.timeout):
+            m = _find_marionette_in_args(*args, **kwargs)
+
             # In case of socket failures which will also include crashes of the
-            # application, make sure to handle those correctly.
-            m = _find_marionette_in_args(*args, **kwargs)
+            # application, make sure to handle those correctly. In case of an
+            # active shutdown just let it bubble up.
+            if m.is_shutting_down:
+                raise
+
             m._handle_socket_failure()
 
     return _
 
 
 def uses_marionette(func):
     """Decorator which creates a marionette session and deletes it
     afterwards if one doesn't already exist.
--- a/testing/marionette/client/marionette_driver/marionette.py
+++ b/testing/marionette/client/marionette_driver/marionette.py
@@ -1075,40 +1075,49 @@ class Marionette(object):
                          be used to trigger the shutdown.
         """
         if not self.instance:
             raise errors.MarionetteException("quit() can only be called "
                                              "on Gecko instances launched by Marionette")
 
         cause = None
         if in_app:
-            if callback is not None:
-                if not callable(callback):
-                    raise ValueError("Specified callback '{}' is not callable".format(callback))
+            if callback and not callable(callback):
+                raise ValueError("Specified callback '{}' is not callable".format(callback))
+
+            # Block Marionette from accepting new connections
+            self._send_message("Marionette:AcceptConnections",
+                               {"value": False})
+
+            try:
+                self.is_shutting_down = True
+                if callback is not None:
+                    callback()
+                else:
+                    cause = self._request_in_app_shutdown()
 
-                self._send_message("Marionette:AcceptConnections",
-                                   {"value": False})
-                callback()
-            else:
-                cause = self._request_in_app_shutdown()
+            except IOError:
+                # A possible IOError should be ignored at this point, given that
+                # quit() could have been called inside of `using_context`,
+                # which wants to reset the context but fails sending the message.
+                pass
 
+            returncode = self.instance.runner.wait(timeout=self.DEFAULT_SHUTDOWN_TIMEOUT)
+            if returncode is None:
+                # The process did not shutdown itself, so force-closing it.
+                self.cleanup()
+
+                message = "Process still running after quit request"
+                raise IOError(message.format(self.DEFAULT_SHUTDOWN_TIMEOUT))
+
+            self.is_shutting_down = False
             self.delete_session(send_request=False)
 
-            # Give the application some time to shutdown
-            returncode = self.instance.runner.wait(timeout=self.DEFAULT_SHUTDOWN_TIMEOUT)
-            if returncode is None:
-                # This will force-close the application without sending any other message.
-                self.cleanup()
-
-                message = ("Process killed because a requested application quit did not happen "
-                           "within {}s. Check gecko.log for errors.")
-                raise IOError(message.format(self.DEFAULT_SHUTDOWN_TIMEOUT))
-
         else:
-            self.delete_session()
+            self.delete_session(send_request=False)
             self.instance.close(clean=clean)
 
         if cause not in (None, "shutdown"):
             raise errors.MarionetteException("Unexpected shutdown reason '{}' for "
                                              "quitting the process.".format(cause))
 
     @do_process_check
     def restart(self, clean=False, in_app=False, callback=None):
@@ -1131,36 +1140,64 @@ class Marionette(object):
         context = self._send_message("Marionette:GetContext",
                                      key="value")
 
         cause = None
         if in_app:
             if clean:
                 raise ValueError("An in_app restart cannot be triggered with the clean flag set")
 
-            if callback is not None:
-                if not callable(callback):
-                    raise ValueError("Specified callback '{}' is not callable".format(callback))
+            if callback and not callable(callback):
+                raise ValueError("Specified callback '{}' is not callable".format(callback))
 
-                self._send_message("Marionette:AcceptConnections",
-                                   {"value": False})
-                callback()
-            else:
-                cause = self._request_in_app_shutdown("eRestart")
-
-            self.delete_session(send_request=False)
+            # Block Marionette from accepting new connections
+            self._send_message("Marionette:AcceptConnections",
+                               {"value": False})
 
             try:
+                self.is_shutting_down = True
+                if callback is not None:
+                    callback()
+                else:
+                    cause = self._request_in_app_shutdown("eRestart")
+
+            except IOError:
+                # A possible IOError should be ignored at this point, given that
+                # restart() could have been called inside of `using_context`,
+                # which wants to reset the context but fails sending the message.
+                pass
+
+            try:
+                # Wait for a possible new connection to appear
                 timeout = self.DEFAULT_SHUTDOWN_TIMEOUT + self.DEFAULT_STARTUP_TIMEOUT
                 self.raise_for_port(timeout=timeout)
             except socket.timeout:
-                if self.instance.runner.returncode is not None:
-                    exc, val, tb = sys.exc_info()
+                exc, val, tb = sys.exc_info()
+
+                # Bug 1433905 - Using returncode or poll() via mozrunner currently
+                # always returns None even with the process ended.
+                returncode = self.instance.runner.wait(timeout=0)
+                if returncode is None:
+                    # The process is still running, which means the shutdown
+                    # request was not correct or the application ignored it.
+                    # Allow Marionette to accept connections again.
+                    self._send_message("acceptConnections", {"value": True})
+
+                    reraise(exc, "Process still running after restart request", tb)
+
+                else:
+                    # The process shutdown but didn't start again.
                     self.cleanup()
-                    reraise(exc, "Requested restart of the application was aborted", tb)
+                    msg = "Process unexpectedly quit without restarting (exit code: {})"
+                    reraise(exc, msg.format(self.instance.runner.returncode), tb)
+
+            finally:
+                self.is_shutting_down = False
+
+            self.delete_session(send_request=False)
 
         else:
             self.delete_session()
             self.instance.restart(clean=clean)
             self.raise_for_port(timeout=self.DEFAULT_STARTUP_TIMEOUT)
 
         if cause not in (None, "restart"):
             raise errors.MarionetteException("Unexpected shutdown reason '{}' for "
--- a/testing/marionette/harness/marionette_harness/tests/unit/test_quit_restart.py
+++ b/testing/marionette/harness/marionette_harness/tests/unit/test_quit_restart.py
@@ -1,16 +1,16 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function
 
 from marionette_driver import errors
-from marionette_harness import MarionetteTestCase, skip
+from marionette_harness import MarionetteTestCase
 
 
 class TestServerQuitApplication(MarionetteTestCase):
 
     def tearDown(self):
         if self.marionette.session is None:
             self.marionette.start_session()
 
@@ -103,107 +103,117 @@ class TestQuitRestart(MarionetteTestCase
             Components.utils.import("resource://gre/modules/Services.jsm");
             let flags = Ci.nsIAppStartup.eAttemptQuit;
             if (arguments[0]) {
               flags |= Ci.nsIAppStartup.eRestart;
             }
             Services.startup.quit(flags);
         """, script_args=(restart,))
 
-    def test_force_clean_restart(self):
-        self.marionette.restart(clean=True)
-        self.assertNotEqual(self.marionette.profile, self.profile)
-        self.assertNotEqual(self.marionette.session_id, self.session_id)
-        # A forced restart will cause a new process id
-        self.assertNotEqual(self.marionette.process_id, self.pid)
-        self.assertNotEqual(self.marionette.get_pref("startup.homepage_welcome_url"),
-                            "about:about")
-
     def test_force_restart(self):
         self.marionette.restart()
         self.assertEqual(self.marionette.profile, self.profile)
         self.assertNotEqual(self.marionette.session_id, self.session_id)
         # A forced restart will cause a new process id
         self.assertNotEqual(self.marionette.process_id, self.pid)
         self.assertNotEqual(self.marionette.get_pref("startup.homepage_welcome_url"),
                             "about:about")
 
-    def test_force_clean_quit(self):
-        self.marionette.quit(clean=True)
-
-        self.assertEqual(self.marionette.session, None)
-        with self.assertRaisesRegexp(errors.MarionetteException, "Please start a session"):
-            self.marionette.get_url()
-
-        self.marionette.start_session()
+    def test_force_clean_restart(self):
+        self.marionette.restart(clean=True)
         self.assertNotEqual(self.marionette.profile, self.profile)
         self.assertNotEqual(self.marionette.session_id, self.session_id)
+        # A forced restart will cause a new process id
+        self.assertNotEqual(self.marionette.process_id, self.pid)
         self.assertNotEqual(self.marionette.get_pref("startup.homepage_welcome_url"),
                             "about:about")
 
     def test_force_quit(self):
         self.marionette.quit()
 
         self.assertEqual(self.marionette.session, None)
         with self.assertRaisesRegexp(errors.MarionetteException, "Please start a session"):
             self.marionette.get_url()
 
         self.marionette.start_session()
         self.assertEqual(self.marionette.profile, self.profile)
         self.assertNotEqual(self.marionette.session_id, self.session_id)
         self.assertNotEqual(self.marionette.get_pref("startup.homepage_welcome_url"),
                             "about:about")
 
-    @skip("Bug 1363368 - Wrong window handles after in_app restarts")
-    def test_no_in_app_clean_restart(self):
-        # Test that in_app and clean cannot be used in combination
-        with self.assertRaises(ValueError):
-            self.marionette.restart(in_app=True, clean=True)
+    def test_force_clean_quit(self):
+        self.marionette.quit(clean=True)
+
+        self.assertEqual(self.marionette.session, None)
+        with self.assertRaisesRegexp(errors.MarionetteException, "Please start a session"):
+            self.marionette.get_url()
 
-    @skip("Bug 1363368 - Wrong window handles after in_app restarts")
+        self.marionette.start_session()
+        self.assertNotEqual(self.marionette.profile, self.profile)
+        self.assertNotEqual(self.marionette.session_id, self.session_id)
+        self.assertNotEqual(self.marionette.get_pref("startup.homepage_welcome_url"),
+                            "about:about")
+
     def test_in_app_restart(self):
-        if self.marionette.session_capabilities["platformName"] != "windows_nt":
-            skip("Bug 1363368 - Wrong window handles after in_app restarts")
-
         self.marionette.restart(in_app=True)
         self.assertEqual(self.marionette.profile, self.profile)
-        self.assertEqual(self.marionette.session_id, self.session_id)
+        self.assertNotEqual(self.marionette.session_id, self.session_id)
 
         # An in-app restart will keep the same process id only on Linux
         if self.marionette.session_capabilities["platformName"] == "linux":
             self.assertEqual(self.marionette.process_id, self.pid)
         else:
             self.assertNotEqual(self.marionette.process_id, self.pid)
 
         self.assertNotEqual(self.marionette.get_pref("startup.homepage_welcome_url"),
                             "about:about")
 
-    @skip("Bug 1363368 - Wrong window handles after in_app restarts")
+    def test_in_app_restart_clean_not_allowed(self):
+        # Test that in_app and clean cannot be used in combination
+        with self.assertRaisesRegexp(ValueError, "cannot be triggered with the clean flag set"):
+            self.marionette.restart(in_app=True, clean=True)
+
     def test_in_app_restart_with_callback(self):
-        if self.marionette.session_capabilities["platformName"] != "windows_nt":
-            skip("Bug 1363368 - Wrong window handles after in_app restarts")
-
         self.marionette.restart(in_app=True,
                                 callback=lambda: self.shutdown(restart=True))
 
         self.assertEqual(self.marionette.profile, self.profile)
-        self.assertEqual(self.marionette.session_id, self.session_id)
+        self.assertNotEqual(self.marionette.session_id, self.session_id)
 
         # An in-app restart will keep the same process id only on Linux
         if self.marionette.session_capabilities["platformName"] == "linux":
             self.assertEqual(self.marionette.process_id, self.pid)
         else:
             self.assertNotEqual(self.marionette.process_id, self.pid)
 
         self.assertNotEqual(self.marionette.get_pref("startup.homepage_welcome_url"),
                             "about:about")
 
-    @skip("Bug 1397612 - Hang of Marionette client after the restart")
+    def test_in_app_restart_with_callback_not_callable(self):
+        with self.assertRaisesRegexp(ValueError, "is not callable"):
+            self.marionette.restart(in_app=True, callback=4)
+
+    def test_in_app_restart_with_callback_but_no_shutdown(self):
+        timeout_startup = self.marionette.DEFAULT_STARTUP_TIMEOUT
+        timeout_shutdown = self.marionette.DEFAULT_SHUTDOWN_TIMEOUT
+        self.marionette.DEFAULT_SHUTDOWN_TIMEOUT = 0
+        self.marionette.DEFAULT_STARTUP_TIMEOUT = 0
+
+        try:
+            with self.assertRaisesRegexp(IOError, "Process still running after restart request"):
+                self.marionette.restart(in_app=True, callback=lambda: False)
+        finally:
+            self.marionette.DEFAULT_STARTUP_TIMEOUT = timeout_startup
+            self.marionette.DEFAULT_SHUTDOWN_TIMEOUT = timeout_shutdown
+
+    def test_in_app_restart_with_callback_but_process_quit(self):
+        with self.assertRaisesRegexp(IOError, "Process unexpectedly quit without restarting"):
+            self.marionette.restart(in_app=True, callback=lambda: self.shutdown(restart=False))
+
     def test_in_app_restart_safe_mode(self):
-
         def restart_in_safe_mode():
             with self.marionette.using_context("chrome"):
                 self.marionette.execute_script("""
                   Components.utils.import("resource://gre/modules/Services.jsm");
 
                   let cancelQuit = Cc["@mozilla.org/supports-PRBool;1"]
                                      .createInstance(Ci.nsISupportsPRBool);
                   Services.obs.notifyObservers(cancelQuit,
@@ -214,121 +224,86 @@ class TestQuitRestart(MarionetteTestCase
                   }
                 """)
 
         try:
             self.assertFalse(self.is_safe_mode, "Safe Mode is unexpectedly enabled")
             self.marionette.restart(in_app=True, callback=restart_in_safe_mode)
             self.assertTrue(self.is_safe_mode, "Safe Mode is not enabled")
         finally:
+            if self.marionette.session is None:
+                self.marionette.start_session()
             self.marionette.quit(clean=True)
 
-    def test_in_app_restart_with_callback_not_callable(self):
-        with self.assertRaisesRegexp(ValueError, "is not callable"):
-            self.marionette.restart(in_app=True, callback=4)
-
-    def test_in_app_restart_with_callback_missing_shutdown(self):
-        try:
-            timeout_startup = self.marionette.DEFAULT_STARTUP_TIMEOUT
-            timeout_shutdown = self.marionette.DEFAULT_SHUTDOWN_TIMEOUT
-            self.marionette.DEFAULT_SHUTDOWN_TIMEOUT = 5
-            self.marionette.DEFAULT_STARTUP_TIMEOUT = 5
-
-            with self.assertRaisesRegexp(IOError, "the connection to Marionette server is lost"):
-                self.marionette.restart(in_app=True, callback=lambda: False)
-        finally:
-            self.marionette.DEFAULT_STARTUP_TIMEOUT = timeout_startup
-            self.marionette.DEFAULT_SHUTDOWN_TIMEOUT = timeout_shutdown
-
-    @skip("Bug 1363368 - Wrong window handles after in_app restarts")
     def test_in_app_quit(self):
-        if self.marionette.session_capabilities["platformName"] != "windows_nt":
-            skip("Bug 1363368 - Wrong window handles after in_app restarts")
-
         self.marionette.quit(in_app=True)
 
         self.assertEqual(self.marionette.session, None)
         with self.assertRaisesRegexp(errors.MarionetteException, "Please start a session"):
             self.marionette.get_url()
 
         self.marionette.start_session()
         self.assertEqual(self.marionette.profile, self.profile)
         self.assertNotEqual(self.marionette.session_id, self.session_id)
         self.assertNotEqual(self.marionette.get_pref("startup.homepage_welcome_url"),
                             "about:about")
 
-    @skip("Bug 1363368 - Wrong window handles after in_app restarts")
     def test_in_app_quit_with_callback(self):
-        if self.marionette.session_capabilities["platformName"] != "windows_nt":
-            skip("Bug 1363368 - Wrong window handles after in_app restarts")
-
         self.marionette.quit(in_app=True, callback=self.shutdown)
         self.assertEqual(self.marionette.session, None)
         with self.assertRaisesRegexp(errors.MarionetteException, "Please start a session"):
             self.marionette.get_url()
 
         self.marionette.start_session()
         self.assertEqual(self.marionette.profile, self.profile)
         self.assertNotEqual(self.marionette.session_id, self.session_id)
         self.assertNotEqual(self.marionette.get_pref("startup.homepage_welcome_url"),
                             "about:about")
 
     def test_in_app_quit_with_callback_missing_shutdown(self):
         try:
             timeout = self.marionette.DEFAULT_SHUTDOWN_TIMEOUT
-            self.marionette.DEFAULT_SHUTDOWN_TIMEOUT = 10
+            self.marionette.DEFAULT_SHUTDOWN_TIMEOUT = 0
 
-            with self.assertRaisesRegexp(IOError, "a requested application quit did not happen"):
+            with self.assertRaisesRegexp(IOError, "Process still running after quit request"):
                 self.marionette.quit(in_app=True, callback=lambda: False)
         finally:
             self.marionette.DEFAULT_SHUTDOWN_TIMEOUT = timeout
 
     def test_in_app_quit_with_callback_not_callable(self):
         with self.assertRaisesRegexp(ValueError, "is not callable"):
             self.marionette.restart(in_app=True, callback=4)
 
-    @skip("Bug 1363368 - Wrong window handles after in_app restarts")
     def test_reset_context_after_quit_by_set_context(self):
-        if self.marionette.session_capabilities["platformName"] != "windows_nt":
-            skip("Bug 1363368 - Wrong window handles after in_app restarts")
-
         # Check that we are in content context which is used by default in
         # Marionette
         self.assertNotIn("chrome://", self.marionette.get_url(),
                          "Context does not default to content")
 
         self.marionette.set_context("chrome")
         self.marionette.quit(in_app=True)
         self.assertEqual(self.marionette.session, None)
         self.marionette.start_session()
         self.assertNotIn("chrome://", self.marionette.get_url(),
                          "Not in content context after quit with using_context")
 
-    @skip("Bug 1363368 - Wrong window handles after in_app restarts")
     def test_reset_context_after_quit_by_using_context(self):
-        if self.marionette.session_capabilities["platformName"] != "windows_nt":
-            skip("Bug 1363368 - Wrong window handles after in_app restarts")
-
         # Check that we are in content context which is used by default in
         # Marionette
         self.assertNotIn("chrome://", self.marionette.get_url(),
                          "Context does not default to content")
 
         with self.marionette.using_context("chrome"):
             self.marionette.quit(in_app=True)
             self.assertEqual(self.marionette.session, None)
             self.marionette.start_session()
             self.assertNotIn("chrome://", self.marionette.get_url(),
                              "Not in content context after quit with using_context")
 
-    @skip("Bug 1363368 - Wrong window handles after in_app restarts")
     def test_keep_context_after_restart_by_set_context(self):
-        if self.marionette.session_capabilities["platformName"] != "windows_nt":
-            skip("Bug 1363368 - Wrong window handles after in_app restarts")
-
         # Check that we are in content context which is used by default in
         # Marionette
         self.assertNotIn("chrome://", self.marionette.get_url(),
                          "Context doesn't default to content")
 
         # restart while we are in chrome context
         self.marionette.set_context("chrome")
         self.marionette.restart(in_app=True)
@@ -337,21 +312,17 @@ class TestQuitRestart(MarionetteTestCase
         if self.marionette.session_capabilities["platformName"] == "linux":
             self.assertEqual(self.marionette.process_id, self.pid)
         else:
             self.assertNotEqual(self.marionette.process_id, self.pid)
 
         self.assertIn("chrome://", self.marionette.get_url(),
                       "Not in chrome context after a restart with set_context")
 
-    @skip("Bug 1363368 - Wrong window handles after in_app restarts")
     def test_keep_context_after_restart_by_using_context(self):
-        if self.marionette.session_capabilities["platformName"] != "windows_nt":
-            skip("Bug 1363368 - Wrong window handles after in_app restarts")
-
         # Check that we are in content context which is used by default in
         # Marionette
         self.assertNotIn("chrome://", self.marionette.get_url(),
                          "Context does not default to content")
 
         # restart while we are in chrome context
         with self.marionette.using_context('chrome'):
             self.marionette.restart(in_app=True)