Bug 1531611 - Check for ANR and tombstone files in all android tests; r=bc
authorGeoff Brown <gbrown@mozilla.com>
Tue, 05 Mar 2019 14:22:45 +0000
changeset 520256 6da1179f8eff22e45ead3b9b579de6b8a89e7374
parent 520255 84c79296c4ac7442815d6b5841910a3c51b74bd1
child 520257 dd398512a953090eb3055a3008e7bdedd8b692eb
push id10862
push userffxbld-merge
push dateMon, 11 Mar 2019 13:01:11 +0000
treeherdermozilla-beta@a2e7f5c935da [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbc
bugs1531611
milestone67.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1531611 - Check for ANR and tombstone files in all android tests; r=bc The existing ANR and tombstone checks in remoteautomation are used by mochitests and reftests but are awkward for other harnesses like cppunit that do not use remoteautomation...and easily missed. This patch moves that code to the mozharness AndroidMixin, treating ANRs and tombstones like the logcat: Make sure any old logs are deleted when the mozharness script starts, then move any logs found at the end of the run to the upload directory. https://treeherder.mozilla.org/#/jobs?repo=try&tier=1%2C2%2C3&revision=f3de1e9836da2b0c9232f5d92c751b979459e19b demonstrates tombstone artifacts for the Android 7.0 cppunit tests. Differential Revision: https://phabricator.services.mozilla.com/D21774
build/mobile/remoteautomation.py
layout/tools/reftest/remotereftest.py
testing/mochitest/runrobocop.py
testing/mochitest/runtestsremote.py
testing/mozharness/mozharness/mozilla/testing/android.py
--- a/build/mobile/remoteautomation.py
+++ b/build/mobile/remoteautomation.py
@@ -1,21 +1,18 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 import datetime
-import glob
 import time
 import re
-import os
 import posixpath
 import tempfile
 import shutil
-import sys
 
 from automation import Automation
 from mozdevice import ADBTimeoutError
 from mozlog import get_default_logger
 from mozscreenshot import dump_screen, dump_device_screen
 import mozcrash
 
 # signatures for logcat messages that we don't care about much
@@ -136,80 +133,17 @@ class RemoteAutomation(Automation):
                                % self.lastTestSeen)
         if status == 2:
             self.log.error("TEST-UNEXPECTED-FAIL | %s | "
                            "application timed out after %d seconds with no output"
                            % (self.lastTestSeen, int(timeout)))
 
         return status
 
-    def deleteANRs(self):
-        # Remove files from the dalvik stack-trace directory.
-        if not self.device.is_dir(self.device.stack_trace_dir, root=True):
-            return
-        try:
-            for trace_file in self.device.ls(self.device.stack_trace_dir, root=True):
-                trace_path = posixpath.join(self.device.stack_trace_dir, trace_file)
-                self.device.chmod(trace_path, root=True)
-                self.device.rm(trace_path, root=True)
-        except Exception as e:
-            print("Error deleting %s: %s" % (self.device.stack_trace_dir, str(e)))
-
-    def checkForANRs(self):
-        if not self.device.is_dir(self.device.stack_trace_dir):
-            print("%s not found" % self.device.stack_trace_dir)
-            return
-        try:
-            for trace_file in self.device.ls(self.device.stack_trace_dir, root=True):
-                trace_path = posixpath.join(self.device.stack_trace_dir, trace_file)
-                t = self.device.get_file(trace_path)
-                if t:
-                    stripped = t.strip()
-                    if len(stripped) > 0:
-                        print("Contents of %s:" % trace_path)
-                        print(t)
-            # Once reported, delete traces
-            self.deleteANRs()
-        except Exception as e:
-            print("Error pulling %s: %s" % (self.device.stack_trace_dir, str(e)))
-
-    def deleteTombstones(self):
-        # delete any tombstone files from device
-        self.device.rm("/data/tombstones", force=True, recursive=True, root=True)
-
-    def checkForTombstones(self):
-        # pull any tombstones from device and move to MOZ_UPLOAD_DIR
-        remoteDir = "/data/tombstones"
-        uploadDir = os.environ.get('MOZ_UPLOAD_DIR', None)
-        if uploadDir:
-            if not os.path.exists(uploadDir):
-                os.mkdir(uploadDir)
-            if self.device.is_dir(remoteDir):
-                # copy tombstone files from device to local upload directory
-                self.device.chmod(remoteDir, recursive=True, root=True)
-                self.device.pull(remoteDir, uploadDir)
-                self.deleteTombstones()
-                for f in glob.glob(os.path.join(uploadDir, "tombstone_??")):
-                    # add a unique integer to the file name, in case there are
-                    # multiple tombstones generated with the same name, for
-                    # instance, after multiple robocop tests
-                    for i in xrange(1, sys.maxint):
-                        newname = "%s.%d.txt" % (f, i)
-                        if not os.path.exists(newname):
-                            os.rename(f, newname)
-                            break
-            else:
-                print("%s does not exist; tombstone check skipped" % remoteDir)
-        else:
-            print("MOZ_UPLOAD_DIR not defined; tombstone check skipped")
-
     def checkForCrashes(self, symbolsPath):
-        self.checkForANRs()
-        self.checkForTombstones()
-
         logcat = self.device.get_logcat(
             filter_out_regexps=fennecLogcatFilters)
 
         javaException = mozcrash.check_for_java_exception(
             logcat, test_name=self.lastTestSeen)
         if javaException:
             return True
 
--- a/layout/tools/reftest/remotereftest.py
+++ b/layout/tools/reftest/remotereftest.py
@@ -191,18 +191,16 @@ class RemoteReftest(RefTest):
 
         self.remoteCache = os.path.join(options.remoteTestRoot, "cache/")
 
         # Check that Firefox is installed
         expected = options.app.split('/')[-1]
         if not self.device.is_app_installed(expected):
             raise Exception("%s is not installed on this device" % expected)
 
-        self.automation.deleteANRs()
-        self.automation.deleteTombstones()
         self.device.clear_logcat()
 
         self.device.rm(self.remoteCache, force=True, recursive=True)
 
         procName = options.app.split('/')[-1]
         self.device.stop_application(procName)
         if self.device.process_exist(procName):
             self.log.error("unable to kill %s before starting tests!" % procName)
--- a/testing/mochitest/runrobocop.py
+++ b/testing/mochitest/runrobocop.py
@@ -91,18 +91,16 @@ class RobocopTestRunner(MochitestDesktop
         """
         # Despite our efforts to clean up servers started by this script, in practice
         # we still see infrequent cases where a process is orphaned and interferes
         # with future tests, typically because the old server is keeping the port in use.
         # Try to avoid those failures by checking for and killing servers before
         # trying to start new ones.
         self.killNamedProc('ssltunnel')
         self.killNamedProc('xpcshell')
-        self.auto.deleteANRs()
-        self.auto.deleteTombstones()
         procName = self.options.app.split('/')[-1]
         self.device.stop_application(procName)
         if self.device.process_exist(procName):
             self.log.warning("unable to kill %s before running tests!" % procName)
         self.device.rm(self.remoteScreenshots, force=True, recursive=True)
         self.device.rm(self.remoteMozLog, force=True, recursive=True)
         self.device.mkdir(self.remoteMozLog)
         logParent = posixpath.dirname(self.remoteLogFile)
--- a/testing/mochitest/runtestsremote.py
+++ b/testing/mochitest/runtestsremote.py
@@ -64,18 +64,16 @@ class MochiRemote(MochitestDesktop):
                                            self.remoteLogFile, processArgs=process_args)
         self.environment = self.automation.environment
 
         # Check that Firefox is installed
         expected = options.app.split('/')[-1]
         if not self.device.is_app_installed(expected):
             raise Exception("%s is not installed on this device" % expected)
 
-        self.automation.deleteANRs()
-        self.automation.deleteTombstones()
         self.device.clear_logcat()
 
         self.remoteModulesDir = posixpath.join(options.remoteTestRoot, "modules/")
 
         self.remoteCache = posixpath.join(options.remoteTestRoot, "cache/")
         self.device.rm(self.remoteCache, force=True, recursive=True)
 
         # move necko cache to a location that can be cleaned up
--- a/testing/mozharness/mozharness/mozilla/testing/android.py
+++ b/testing/mozharness/mozharness/mozilla/testing/android.py
@@ -3,16 +3,17 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
 # You can obtain one at http://mozilla.org/MPL/2.0/.
 # ***** END LICENSE BLOCK *****
 
 import datetime
 import glob
 import os
+import posixpath
 import re
 import signal
 import subprocess
 import time
 import tempfile
 from threading import Timer
 from mozharness.mozilla.automation import TBPL_RETRY, EXIT_STATUS_DICT
 from mozharness.base.script import PreScriptAction, PostScriptAction
@@ -410,16 +411,80 @@ class AndroidMixin(object):
         self.info("Killing every process called %s" % process_name)
         out = subprocess.check_output(['ps', '-A'])
         for line in out.splitlines():
             if process_name in line:
                 pid = int(line.split(None, 1)[0])
                 self.info("Killing pid %d." % pid)
                 os.kill(pid, signal.SIGKILL)
 
+    def delete_ANRs(self):
+        remote_dir = self.device.stack_trace_dir
+        try:
+            if not self.device.is_dir(remote_dir, root=True):
+                self.mkdir(remote_dir, root=True)
+                self.chmod(remote_dir, root=True)
+                self.info("%s created" % remote_dir)
+                return
+            for trace_file in self.device.ls(remote_dir, root=True):
+                trace_path = posixpath.join(remote_dir, trace_file)
+                self.device.chmod(trace_path, root=True)
+                self.device.rm(trace_path, root=True)
+                self.info("%s deleted" % trace_path)
+        except Exception as e:
+            self.info("failed to delete %s: %s %s" % (remote_dir, type(e).__name__, str(e)))
+
+    def check_for_ANRs(self):
+        """
+        Copy ANR (stack trace) files from device to upload directory.
+        """
+        dirs = self.query_abs_dirs()
+        remote_dir = self.device.stack_trace_dir
+        try:
+            if not self.device.is_dir(remote_dir):
+                self.info("%s not found; ANR check skipped" % remote_dir)
+                return
+            self.device.chmod(remote_dir, recursive=True, root=True)
+            self.device.pull(remote_dir, dirs['abs_blob_upload_dir'])
+            self.delete_ANRs()
+        except Exception as e:
+            self.info("failed to pull %s: %s %s" % (remote_dir, type(e).__name__, str(e)))
+
+    def delete_tombstones(self):
+        remote_dir = "/data/tombstones"
+        try:
+            if not self.device.is_dir(remote_dir, root=True):
+                self.mkdir(remote_dir, root=True)
+                self.chmod(remote_dir, root=True)
+                self.info("%s created" % remote_dir)
+                return
+            for trace_file in self.device.ls(remote_dir, root=True):
+                trace_path = posixpath.join(remote_dir, trace_file)
+                self.device.chmod(trace_path, root=True)
+                self.device.rm(trace_path, root=True)
+                self.info("%s deleted" % trace_path)
+        except Exception as e:
+            self.info("failed to delete %s: %s %s" % (remote_dir, type(e).__name__, str(e)))
+
+    def check_for_tombstones(self):
+        """
+        Copy tombstone files from device to upload directory.
+        """
+        dirs = self.query_abs_dirs()
+        remote_dir = "/data/tombstones"
+        try:
+            if not self.device.is_dir(remote_dir):
+                self.info("%s not found; tombstone check skipped" % remote_dir)
+                return
+            self.device.chmod(remote_dir, recursive=True, root=True)
+            self.device.pull(remote_dir, dirs['abs_blob_upload_dir'])
+            self.delete_tombstones()
+        except Exception as e:
+            self.info("failed to pull %s: %s %s" % (remote_dir, type(e).__name__, str(e)))
+
     # Script actions
 
     def setup_avds(self):
         """
         If tooltool cache mechanism is enabled, the cached version is used by
         the fetch command. If the manifest includes an "unpack" field, tooltool
         will unpack all compressed archives mentioned in the manifest.
         """
@@ -495,16 +560,18 @@ class AndroidMixin(object):
                                       "Check emulator")
             if not emulator_ok:
                 self.fatal('INFRA-ERROR: Unable to start emulator after %d attempts'
                            % max_restarts, EXIT_STATUS_DICT[TBPL_RETRY])
 
         self.mkdir_p(self.query_abs_dirs()['abs_blob_upload_dir'])
         self.dump_perf_info()
         self.logcat_start()
+        self.delete_ANRs()
+        self.delete_tombstones()
         # Get a post-boot device process list for diagnostics
         self.info(self.shell_output('ps'))
 
     @PreScriptAction('run-tests')
     def timed_screenshots(self, action, success=None):
         """
         If configured, start screenshot timers.
         """
@@ -527,11 +594,13 @@ class AndroidMixin(object):
         """
         Stop logcat and kill the emulator, if necessary.
         """
         if not self.is_android:
             return
 
         for t in self.timers:
             t.cancel()
+        self.check_for_ANRs()
+        self.check_for_tombstones()
         self.logcat_stop()
         if self.is_emulator:
             self.kill_processes(self.config["emulator_process_name"])