Rollback 2724339a1491 and ade48470874e; r=dustin slaves
authorMorgan Phillips <winter2718@gmail.com>
Mon, 05 Oct 2015 11:18:06 -0700
branchslaves
changeset 1456 eff229ee62abb9c4d6039b9cefa3a5d69947a911
parent 1435 2724339a14919de4f45ca76870f46eb86dca0c7a
child 1477 d5df719e0f992e5df5f096119276d92799b52b6a
push id1219
push usermphillips@mozilla.com
push dateMon, 12 Oct 2015 19:39:55 +0000
reviewersdustin
Rollback 2724339a1491 and ade48470874e; r=dustin These changesets depended on environment variables for configuration/proper usage, unfortunately the buildbot.tac based deployment of idleizer is not a good fit for this.
slave/buildslave/idleizer.py
slave/buildslave/test/test_idleizer.py
--- a/slave/buildslave/idleizer.py
+++ b/slave/buildslave/idleizer.py
@@ -6,18 +6,16 @@ from twisted.internet import reactor
 from buildslave import bot
 
 # states for use below (using symbols prevents typos)
 
 STOPPED = "stopped"
 DISCONNECTED = "disconnected"
 CONNECTED = "connected"
 
-IDLE_MESSAGE = "Idle State Detected: Exiting"
-
 
 class Idleizer(service.Service):
     """
     A service that will monitor a buildslave instance for idleness and
     disconnectedness, and gracefully restart it if either of those goes on for
     longer than a configured time.
 
     This is intended to be instantiated directly in buildbot.tac.
@@ -37,16 +35,18 @@ class Idleizer(service.Service):
         """
         self.buildslave_svc = buildslave_svc
         self.max_idle_time = max_idle_time
         self.max_disconnected_time = max_disconnected_time
 
         self._timer = None
         self._state = STOPPED
 
+        # don't reboot unless this class requested the stop
+        self._reboot_on_shutdown = False
         self._remote_shutdown_requested = False
 
         self._setUpMonkeyPatches()
 
     def startService(self):
         self._monkey.patch()
         self.changeState(DISCONNECTED)
 
@@ -59,61 +59,62 @@ class Idleizer(service.Service):
             self._timer.cancel()
         self._timer = None
 
     def _setTimer(self, when):
         self._clearTimer()
 
         def fired():
             self._timer = None
-            self.doStopClient()
+            self.doRestart()
         self._timer = reactor.callLater(when, fired)
 
     def callRemoteShutdown(self):
-        log.msg(IDLE_MESSAGE)
-        # The graceful shutdown method is nice in buildbot, because it will
-        # only exit the client after no work is being done. In the rare case
-        # that we detect an idle state, but then pick up a job just after
-        # using it will save us from killing a the job.
+        log.msg("Telling the master we want to shutdown after any running builds are finished")
         self._remote_shutdown_requested = True
         d = self.buildslave_svc.bf.perspective.callRemote("shutdown")
 
         def _shutdownfailed(err):
             log.msg('callRemote("shutdown") failed')
             log.err(err)
-            log.msg("stopping client NOW, since the master won't talk to us")
-            # Unset the requested flag for good measure, in case stopping
-            # the reactor fails
+            log.msg("rebooting NOW, since the master won't talk to us")
+            # Unset the requested flag for good measure, in case no reboot
+            # happens
             self._remote_shutdown_requested = False
-            self.stopClient()
+            self.reboot()
         d.addErrback(_shutdownfailed)
         # if this deferred succeeds, then we'll get a call to remote_shutdown,
-        # which will call self.stopClient.
+        # which will call self.reboot.
 
     def changeState(self, new_state):
         if new_state == self._state:
             return
 
         self._clearTimer()
 
         self._state = new_state
         if new_state == CONNECTED:
             self._setTimer(self.max_idle_time)
         elif new_state == DISCONNECTED:
             self._setTimer(self.max_disconnected_time)
 
     def registerActivity(self):
         if self._state == CONNECTED:
-            if sys.platform in ('darwin', 'linux2') and not self._remote_shutdown_requested:
+            if sys.platform == ('darwin', 'linux2') and not self._remote_shutdown_requested:
                 # Force buildbot to shutdown after a single job runs
                 self.callRemoteShutdown()
             self._setTimer(self.max_idle_time)
         else:
             log.msg("Idleizer: activity while not connected??")
 
+    def maybeReboot(self):
+        if self._reboot_on_shutdown:
+            self.reboot()
+        # note that the reactor.stop() will continue when this method returns
+
     def _setUpMonkeyPatches(self):
         self._monkey = monkey.MonkeyPatcher()
 
         def changeStateOn(obj, name, state):
             old_fn = getattr(obj, name)
 
             def new_fn(*args, **kwargs):
                 self.changeState(state)
@@ -135,59 +136,55 @@ class Idleizer(service.Service):
 
             def new_fn(*args, **kwargs):
                 self.registerActivity()
                 return old_fn(*args, **kwargs)
             self._monkey.addPatch(obj, name, new_fn)
 
         registerActivityOn(bot.SlaveBuilder, 'activity')
 
-        def stopClientOn(obj, name):
+        def maybeRebootOn(obj, name):
             old_fn = getattr(obj, name)
 
             def new_fn(*args, **kwargs):
-                self.stopClient()
+                self.maybeReboot()
                 return old_fn(*args, **kwargs)
             self._monkey.addPatch(obj, name, new_fn)
 
-        stopClientOn(bot.SlaveBuilder, 'remote_shutdown')
-        stopClientOn(bot.Bot, 'remote_shutdown')
+        maybeRebootOn(bot.SlaveBuilder, 'remote_shutdown')
+        maybeRebootOn(bot.Bot, 'remote_shutdown')
 
-    def doStopClient(self):
+    def doRestart(self):
+        log.msg("I feel very idle and was thinking of rebooting as soon as "
+                "the buildmaster says it's OK")
+        self._reboot_on_shutdown = True
+
         # this is a re-implementation of the gracefulShutdown method from
         # bot.py, with a few tweaks:
         # - if no perspective, reboot rather than calling reactor.stop
         # - if the callRemote fails, reboot immediately (this will always fail
         #   until we upgrade the masters to 0.8.3 or higher)
 
         if not self.buildslave_svc.bf.perspective:
-            log.msg("No active connection, stopping client NOW")
-            self.stopClient()
+            log.msg("No active connection, rebooting NOW")
+            self.reboot()
             return
 
         self.callRemoteShutdown()
 
-    def reboot_or_halt(self, halt=False):
-        log.msg("Invoking platform-specific reboot/shutdown command")
+    def reboot(self):
+        log.msg("Invoking platform-specific reboot command")
         if sys.platform in ('darwin', 'linux2'):
             # -S means to accept password from stdin, which we then redirect from
             # /dev/null
             # This results in sudo not waiting forever for a password.  If sudoers
             # isn't set up properly, this will fail immediately
-            if not halt:
-                os.system("sudo -S reboot < /dev/null")
-            else:
-                os.system("sudo -S shutdown -h now < /dev/null")
+            os.system("sudo -S reboot < /dev/null")
 
         # Windows
         elif sys.platform == "win32":
-            if not halt:
-                os.system("shutdown -f -r -t 0 -c 'idleizer restart' -d 'p:4:1'")
-            else:
-                os.system("shutdown -f -s -t 0 -c 'idleizer shutdown' -d 'p:4:1'")
+            os.system("shutdown -f -r -t 0")
         else:
             log.msg("unknown platform " + sys.platform)
 
-    def stopClient(self):
-        log.msg(IDLE_MESSAGE)
-        if not os.environ.get('IDLEIZER_DISABLE_SHUTDOWN'):
-            self.reboot_or_halt(os.environ.get('IDLEIZER_HALT_ON_IDLE'))
+        # After starting the shutdown, we just die.  If the shutdown fails, then
+        # this should trigger some extra monitoring alerts
         reactor.stop()
--- a/slave/buildslave/test/test_idleizer.py
+++ b/slave/buildslave/test/test_idleizer.py
@@ -1,105 +1,100 @@
 import mock
-import twisted
 from buildslave import idleizer, bot
 from twisted.trial import unittest
 from twisted.internet import reactor
 from twisted.internet import task
 
-
 class Idleizer(unittest.TestCase):
 
     def setUp(self):
         self.buildslave_svc = mock.Mock()
         self.idl = idleizer.Idleizer(self.buildslave_svc,
-                                     max_idle_time=100, max_disconnected_time=20)
-        self.client_stop_at = None
+                max_idle_time=100, max_disconnected_time=20)
 
-        def fakeClientStop():
-            self.client_stop_at = reactor.seconds()
-        self.patch(self.idl, 'doStopClient', fakeClientStop)
+        self.restart_at = None
+        def fakeRestart():
+            self.restart_at = reactor.seconds()
+        self.patch(self.idl, 'doRestart', fakeRestart)
 
         self.clock = task.Clock()
         self.patch(reactor, 'callLater', self.clock.callLater)
         self.patch(reactor, 'seconds', self.clock.seconds)
 
     def test_initially_disconnected(self):
         self.idl.changeState("disconnected")
         self.clock.advance(10)
         # repeating the same state doesn't hurt
         self.idl.changeState("disconnected")
         self.clock.advance(10)
-        self.assertEqual(self.client_stop_at, 20)
+        self.assertEqual(self.restart_at, 20)
 
     def test_later_disconnected(self):
         self.idl.changeState("connected")
         self.clock.advance(17)
         self.idl.changeState("disconnected")
         self.clock.advance(10)
         self.clock.advance(10)
-        self.assertEqual(self.client_stop_at, 37)
+        self.assertEqual(self.restart_at, 37)
 
     def test_connected(self):
         self.idl.changeState("connected")
         self.clock.advance(10)
         self.idl.changeState("disconnected")
         self.clock.advance(10)
         # should start timing from here, at 20s
         self.idl.changeState("connected")
         self.clock.advance(10)
         # and a second "connected" should not reset the timer
         self.idl.changeState("connected")
         self.clock.pump(10 for _ in range(15))
-        self.assertEqual(self.client_stop_at, 120)
+        self.assertEqual(self.restart_at, 120)
 
     def test_activity(self):
         self.idl.changeState("connected")
         self.clock.advance(10)
         # should start timing from here, at 10s
         self.idl.registerActivity()
         self.clock.advance(10)
         # and a second "connected" should not reset the timer
         self.idl.changeState("connected")
         self.clock.pump(10 for _ in range(15))
-        self.assertEqual(self.client_stop_at, 110)
+        self.assertEqual(self.restart_at, 110)
 
     def test_monkey_gotPerspective(self):
         self.idl.startService()
-        self.buildslave_svc.bf.gotPerspective()  # mark as connected
+        self.buildslave_svc.bf.gotPerspective() # mark as connected
         self.clock.pump(10 for _ in range(15))
-        self.assertEqual(self.client_stop_at, 100)
+        self.assertEqual(self.restart_at, 100)
         return self.idl.stopService()
 
     def test_monkey_startedConnecting(self):
         self.idl.startService()
-        self.buildslave_svc.bf.startedConnecting()  # mark as disconnected
+        self.buildslave_svc.bf.startedConnecting() # mark as disconnected
         self.clock.pump(10 for _ in range(3))
-        self.assertEqual(self.client_stop_at, 20)
+        self.assertEqual(self.restart_at, 20)
         return self.idl.stopService()
 
     def test_monkey_clientConnectionFailed(self):
         self.idl.startService()
-        self.buildslave_svc.bf.clientConnectionFailed()  # mark as disconnected
+        self.buildslave_svc.bf.clientConnectionFailed() # mark as disconnected
         self.clock.pump(10 for _ in range(3))
-        self.assertEqual(self.client_stop_at, 20)
+        self.assertEqual(self.restart_at, 20)
         return self.idl.stopService()
 
     def test_monkey_clientConnectionLost(self):
         self.idl.startService()
-        self.buildslave_svc.bf.clientConnectionLost()  # mark as disconnected
+        self.buildslave_svc.bf.clientConnectionLost() # mark as disconnected
         self.clock.pump(10 for _ in range(3))
-        self.assertEqual(self.client_stop_at, 20)
+        self.assertEqual(self.restart_at, 20)
         return self.idl.stopService()
 
-    @mock.patch('twisted.python.log.msg')
-    def test_monkey_activity(self, args, **kwargs):
+    def test_monkey_activity(self):
         sb = bot.SlaveBuilder('fred')
         self.idl.startService()
         self.idl.changeState("connected")
         self.clock.advance(10)
         # inject some activity now, at 10s
         sb.activity()
         self.clock.pump(10 for _ in range(11))
-        self.assertEqual(self.client_stop_at, 110)
-        # verify that our log message shows up for graceful shutdowns
-        twisted.python.log.msg.assert_called_once_with(idleizer.IDLE_MESSAGE)
+        self.assertEqual(self.restart_at, 110)
         return self.idl.stopService()