php/inc/AnnotatedSummaryGenerator.php
author Ed Morley <emorley@mozilla.com>
Wed, 01 Apr 2015 14:04:18 +0400
changeset 1519 134bdfcf7e2d4a46ef468d5d4f687b257504c19a
parent 1511 ec1850f9e430773b6036a65e41d95382ae12ba3b
permissions -rw-r--r--
Bug 1054977 - Revert EOL changes to make future local use easier TBPL has now been switched off in production, so the src repo is no longer being used. However in case anyone in the future wants to try using TBPL locally (eg for a side by side comparison when debugging a Treeherder issue), let's revert a bunch of the EOL changes we made, so it's not quite so limited in functionality.

<?php
/* -*- Mode: PHP; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set sw=2 ts=2 et tw=80 : */

require_once 'inc/ParallelLogGenerating.php';
require_once 'inc/GzipUtils.php';
require_once 'inc/Debug.php';
require_once 'inc/JSON.php';

/**
 * AnnotatedSummaryGenerator
 *
 * Transforms a plain text error summary into a html one where every failure
 * is annotated with orange bug suggestions.
 */

class AnnotatedSummaryGenerator implements LogGenerator {
  public function __construct($rawSummary, $logDescription) {
    $this->rawSummary = $rawSummary;
    $this->logDescription = $logDescription;
    $this->hasLeak = false;
  }

  public function generate($log) {
    $maxAnnotatedFailures = 20;
    $rawSummaryLines = GzipUtils::getLines($this->rawSummary);
    $annotatedLines = array();
    foreach ($rawSummaryLines as $i => $line) {
      // Only display the first N failures from the unannotated summary
      if ($i >= $maxAnnotatedFailures) {
        $totalParsedFailures = count($rawSummaryLines);
        // In extreme cases we will have truncated the raw log too (in Logparser.php),
        // so cannot state the actual number of failures. In which case, the truncation
        // message makes $totalParsedFailures exceed $maxParsedFailures.$maxParsedFailures
        // here should be kept in sync with LogParser.php's $maxParsedFailures.
        // TODO: Move these prefs to config.php so we don't have to keep them in sync
        // (once we have an in-repo config.php with passwords separated out, that isn't
        // a PITA to update).
        $maxParsedFailures = 100;
        if ($totalParsedFailures > $maxParsedFailures)
          $totalParsedFailures = $maxParsedFailures . '+';
        $annotatedLines[] = "Only displaying first $maxAnnotatedFailures of $totalParsedFailures" .
                            " failures - <a href=\"php/getParsedLog.php?id=" . $_GET["id"] .
                            "\" target=\"_blank\">View log</a>.";
        break;
      }
      $this->processLine($annotatedLines, $line);
    }
    if ($this->hasLeak) {
      $annotatedLines[] = "<a href=\"php/getLeakAnalysis.php?id=" . $_GET["id"] . "&ignore_deprecation_notice=1" .
        "\" target=\"_blank\">Analyze the leak.</a>";
    }
    GzipUtils::writeToDb($log, implode("", $annotatedLines));
  }

  public function ensureAnnotatedSummaryExists() {
    $log = array("_id" => $this->rawSummary['_id'], "type" => "annotatedsummary");
    ParallelLogGenerating::ensureLogExists($log, $this);
    return $log;
  }

  protected function generateSuggestion($bug, $line) {
    $bug->summary = htmlspecialchars($bug->summary);
    $line = htmlspecialchars($line);
    return "<span data-bugid=\"$bug->id\" " .
                 "data-summary=\"$bug->summary\" " .
                 "data-signature=\"$this->logDescription\" " .
                 "data-logline=\"$line\" " .
                 "data-status=\"$bug->status $bug->resolution\"" .
           "></span>\n";
  }

  protected function processLine(&$annotatedLines, $line) {
    $bugs = array();
    $annotatedLines[] = htmlspecialchars($line);

    // On windows, $line can have spurious whitespace/line-endings appended
    $trimmedLine = trim($line);

    // Our standard logging format uses the pipe symbol as a delimeter.
    $lineParts = preg_split("#\s\|\s#", $trimmedLine);
    if (count($lineParts) >= 3) {
      // Is in the "FAILURE-TYPE | testNameOrFilePath | message" type format.
      // testNameOrFilePath can either be a test filepath, or an arbitrary
      // step/test name (eg 'Shutdown' or 'leakcheck').
      $testNameOrFilePath = $lineParts[1];
      $message = $lineParts[2];
      // Leak failure messages are of the form:
      // leakcheck | .*leaked \d+ bytes (Object-1, Object-2, Object-3, ...)
      if (preg_match('/\d+ bytes leaked \((.+)\)$/', $message, $matches)) {
        $this->hasLeak = true;
        $searchTerm = $matches[1];
      } else {
        // Split the potential filepath on both forward and backward slashes.
        $filePathParts = preg_split("#[/\\\\]#", $testNameOrFilePath);
        // Remove the path from the search term if present.
        $searchTerm = end($filePathParts);
      }
    }

    // If the failure line was not in the pipe symbol delimited format or the search term 
    // will likely return too many (or irrelevant) results (eg: too short or matches terms
    // on the blacklist), then we fall back to searching for the entire failure line.
    if ((!isset($searchTerm) || !$this->isHelpfulSearchTerm($searchTerm)) &&
        $this->isHelpfulSearchTerm($trimmedLine)) {
      $searchTerm = $trimmedLine;
    }

    if (isset($searchTerm)) {
      $bugs = $this->getBugsForFailure($searchTerm);
    }

    // For crashes with no suggestions, search for the top frame as a fallback.
    // Catches shutdown crashes/anything else that is blacklisted, as well as GC
    // crashes that occur in too many tests to put all of them in bug summaries.
    if (empty($bugs) &&
        preg_match('/application crashed \[@ (.+)\]$/', $trimmedLine, $matches) &&
        $this->isHelpfulSearchTerm($matches[1])) {
      $bugs = $this->getBugsForFailure($matches[1]);
    }

    $numSuggestions = count($bugs);
    if ($numSuggestions > 0) {
      Debug::output('  Bugs returned: '.$numSuggestions);
      // If we had more than 50 bugs suggested, give up
      // TODO: Consider checking how many have resolution==''
      // and showing just those.
      if ($numSuggestions > 50) {
        $annotatedLines[] = "<span class=\"suggestionError\">Exceeded max bug suggestions" .
                            " ($numSuggestions bugs found)</span>";
        return;
      }
    }

    foreach ($bugs as $bug) {
      $annotatedLines[] = $this->generateSuggestion($bug, $line);
    }
  }

  protected function isHelpfulSearchTerm($searchTerm) {
    // Search terms that will match too many bug summaries and so not result in useful suggestions.
    $blacklist = array(
      'automation.py',
      'remoteautomation.py',
      'Shutdown',
      'undefined',
      'Main app process exited normally',
      'Traceback (most recent call last):',
      'Return code: 0',
      'Return code: 1',
      'Return code: 2',
      'Return code: 9',
      'Return code: 10',
      'Exiting 1',
      'Exiting 9',
      'CrashingThread(void *)',
      'libSystem.B.dylib + 0xd7a',
      'linux-gate.so + 0x424',
      'TypeError: content is null',
      'leakcheck'
    );
    $searchTerm = trim($searchTerm);
    return (strlen($searchTerm) > 4 && !in_array($searchTerm, $blacklist));
  }

  protected function getBugsForFailure($searchTerm) {
    // Searching for extremely long search terms is undesirable, since:
    // a) We end up spamming Bugzilla's REST API.
    // b) Bugzilla's max summary length is 256 characters, and once "Intermittent "
    // and platform/suite information is prefixed, there are even fewer characters
    // left for us to use for the failure string against which we need to match.
    // c) For long search terms, the additional length does little to prevent against
    // false positives, but means we're more susceptible to false negatives due to
    // run-to-run variances in the error messages (eg paths, process IDs).
    $searchTerm = trim(substr($searchTerm, 0, 100));
    Debug::output('* Search term: "'.$searchTerm.'"');

    global $db;
    $engine = new Services_JSON();

    $regenerate = isset($_GET['regenerate']) && $_GET['regenerate'] == '1';
    if ($regenerate) {
      Debug::output('  Removing cached bug suggestions');
      $stmt = $db->prepare("
        DELETE FROM bugscache
        WHERE filename=:filename");
      $stmt->execute(array(":filename" => $searchTerm));
    } else {
      $stmt = $db->prepare("
        SELECT json
        FROM bugscache
        WHERE filename=:filename");
      $stmt->execute(array(":filename" => $searchTerm));
      $result = $stmt->fetchColumn();
      if ($result) {
        return $engine->decode($result);
      }
    }

    // else: fetch it from bugzilla
    $debug = new Debug();
    $apiURL = "https://bugzilla.mozilla.org/rest/bug" .
              "?keywords=intermittent-failure" .
              "&keywords_type=allwords" .
              "&chfieldfrom=-3m" .
              "&chfieldto=Now" .
              "&short_desc=" . urlencode($searchTerm) .
              "&short_desc_type=allwordssubstr" .
              "&include_fields=id,summary,status,resolution" .
              "&order=bug_status,bug_id";
    $bugs_json = @file_get_contents($apiURL);
    $debug->outputDuration('  BzAPI query');
    if ($bugs_json === false) {
      Debug::output('  Bugzilla API request failed! URL: '.$apiURL);
      return array();
    }
    $bugs = $engine->decode($bugs_json);
    $bugs = isset($bugs->bugs) ? $bugs->bugs : array();

    // and save it in the database
    $stmt = $db->prepare("
      INSERT INTO bugscache (filename, json)
      VALUES (:filename, :json);");
    try {
      $stmt->execute(array(":filename" => $searchTerm, ":json" => $engine->encode($bugs)));
    } catch (Exception $e) {
      // another process was faster, nevermind
    }

    return $bugs;
  }
}