Bug 1166033 - Notice when the process has died, and restart it, r=dburns
authorJonathan Griffin <jgriffin@mozilla.com>
Thu, 18 Jun 2015 14:26:53 -0700
changeset 280447 4c3af36331e06dbb56da0b5d37feb5b97c07db61
parent 280446 9c891ea3949c420f2c89101e51777f09c185e43e
child 280448 9bee343c34ec0bf144f95c277680f7e5263dd24d
push id4932
push userjlund@mozilla.com
push dateMon, 10 Aug 2015 18:23:06 +0000
treeherdermozilla-beta@6dd5a4f5f745 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdburns
bugs1166033
milestone41.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1166033 - Notice when the process has died, and restart it, r=dburns
testing/marionette/driver/marionette_driver/marionette.py
testing/marionette/transport/marionette_transport/transport.py
--- a/testing/marionette/driver/marionette_driver/marionette.py
+++ b/testing/marionette/driver/marionette_driver/marionette.py
@@ -623,17 +623,21 @@ class Marionette(object):
             self.runner = B2GEmulatorRunner(b2g_home=homedir,
                                             logdir=logdir,
                                             process_args=process_args)
             self.emulator = self.runner.device
             self.emulator.connect()
             self.port = self.emulator.setup_port_forwarding(remote_port=self.port)
             assert(self.emulator.wait_for_port(self.port)), "Timed out waiting for port!"
 
-        self.client = MarionetteTransport(self.host, self.port, self.socket_timeout)
+        self.client = MarionetteTransport(
+            self.host,
+            self.port,
+            self.socket_timeout,
+            instance=self.instance)
 
         if emulator:
             if busybox:
                 self.emulator.install_busybox(busybox=busybox)
             self.emulator.wait_for_system_message(self)
 
     def cleanup(self):
         if self.session:
@@ -967,16 +971,19 @@ class Marionette(object):
             # Values here correspond to constants in nsIAppStartup.
             # See https://developer.mozilla.org/en-US/docs/Mozilla/Tech/XPCOM/Reference/Interface/nsIAppStartup
             restart_flags = [
                 "eForceQuit",
                 "eRestart",
             ]
             self._send_message('quitApplication', flags=restart_flags)
             self.client.close()
+            # The instance is restarting itself; we will no longer be able to
+            # track it by pid, so mark it as 'detached'.
+            self.instance.detached = True
         else:
             self.delete_session()
             self.instance.restart(clean=clean)
         assert(self.wait_for_port()), "Timed out waiting for port!"
         self.start_session(session_id=self.session_id)
         self._reset_timeouts()
 
     def absolute_url(self, relative_url):
@@ -994,16 +1001,21 @@ class Marionette(object):
 
         :param desired_capabilities: An optional dict of desired
             capabilities.  This is currently ignored.
         :param timeout: Timeout in seconds for the server to be ready.
         :param session_id: unique identifier for the session. If no session id is
             passed in then one will be generated by the marionette server.
 
         :returns: A dict of the capabilities offered."""
+        if self.instance:
+            returncode = self.instance.runner.process_handler.proc.returncode
+            if returncode is not None:
+                # We're managing a binary which has terminated, so restart it.
+                self.instance.restart()
         self.wait_for_port(timeout=timeout)
         self.session = self._send_message('newSession', 'value', capabilities=desired_capabilities, sessionId=session_id)
         self.b2g = 'b2g' in self.session
         return self.session
 
     @property
     def test_name(self):
         return self._test_name
--- a/testing/marionette/transport/marionette_transport/transport.py
+++ b/testing/marionette/transport/marionette_transport/transport.py
@@ -15,24 +15,25 @@ class MarionetteTransport(object):
         always preceded by the message length and a colon, e.g.,
 
         20:{'command': 'test'}
     """
 
     max_packet_length = 4096
     connection_lost_msg = "Connection to Marionette server is lost. Check gecko.log (desktop firefox) or logcat (b2g) for errors."
 
-    def __init__(self, addr, port, socket_timeout=360.0):
+    def __init__(self, addr, port, socket_timeout=360.0, instance=None):
         self.addr = addr
         self.port = port
         self.socket_timeout = socket_timeout
         self.sock = None
         self.traits = None
         self.applicationType = None
         self.actor = 'root'
+        self.instance = instance
 
     def _recv_n_bytes(self, n):
         """ Convenience method for receiving exactly n bytes from
             self.sock (assuming it's open and connected).
         """
         data = ''
         while len(data) < n:
             chunk = self.sock.recv(n - len(data))
@@ -42,41 +43,54 @@ class MarionetteTransport(object):
         return data
 
     def receive(self):
         """ Receive the next complete response from the server, and return
             it as a dict.  Each response from the server is prepended by
             len(message) + ':'.
         """
         assert(self.sock)
-        response = self.sock.recv(10)
-        initial_size = len(response)
-        sep = response.find(':')
-        length = response[0:sep]
-        if length != '':
-            response = response[sep + 1:]
-            remaining_size = int(length) + 1 + len(length) - initial_size
-            response += self._recv_n_bytes(remaining_size)
-            return json.loads(response)
-        else:
-            raise IOError(self.connection_lost_msg)
+        now = time.time()
+        response = ''
+        bytes_to_recv = 10
+        while time.time() - now < self.socket_timeout:
+            try:
+                response += self.sock.recv(bytes_to_recv)
+            except socket.timeout:
+                pass
+            if self.instance and not hasattr(self.instance, 'detached'):
+                # If we've launched the binary we've connected to, make
+                # sure it hasn't died.
+                poll = self.instance.runner.process_handler.proc.poll()
+                if poll is not None:
+                    # process isn't alive
+                    raise IOError("process has died with return code %d" % poll)
+            sep = response.find(':')
+            if sep > -1:
+                length = response[0:sep]
+                remaining = response[sep + 1:]
+                if len(remaining) == int(length):
+                    return json.loads(remaining)
+                bytes_to_recv = int(length) - len(remaining)
+        raise IOError(self.connection_lost_msg)
 
     def connect(self):
         """ Connect to the server and process the hello message we expect
             to receive in response.
         """
         self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         self.sock.settimeout(self.socket_timeout)
         try:
             self.sock.connect((self.addr, self.port))
         except:
             # Unset self.sock so that the next attempt to send will cause
             # another connection attempt.
             self.sock = None
             raise
+        self.sock.settimeout(2.0)
         hello = self.receive()
         self.traits = hello.get('traits')
         self.applicationType = hello.get('applicationType')
 
         # get the marionette actor id
         response = self.send({'to': 'root', 'name': 'getMarionetteID'})
         self.actor = response['id']