php/inc/AnnotatedSummaryGenerator.php
author Ed Morley <emorley@mozilla.com>
Sun, 16 Jun 2013 15:55:03 +0100
changeset 1236 bcd877b5c15182373fc2d858dca012e8bc6b7332
parent 1235 612c22f85fd0788c6152978b0b19a74655db2cb5
child 1237 0af2f5a0b555c032f51af805f5c53b88d30d4ec4
permissions -rw-r--r--
Bug 882670 - Part 5: Move the search-term sanity checking into a separate function; r=RyanVM Part 6 needs to perform the search-term sanity checking independently of getBugsForFailure(), so let's break it out into a separate function.

<?php
/* -*- Mode: PHP; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set sw=2 ts=2 et tw=80 : */

require_once 'inc/ParallelLogGenerating.php';
require_once 'inc/GzipUtils.php';
require_once 'inc/Debug.php';
require_once 'inc/JSON.php';

/**
 * AnnotatedSummaryGenerator
 *
 * Transforms a plain text error summary into a html one where every failure
 * is annotated with orange bug suggestions.
 */

class AnnotatedSummaryGenerator implements LogGenerator {
  public function __construct($rawSummary, $logDescription) {
    $this->rawSummary = $rawSummary;
    $this->logDescription = $logDescription;
    $this->hasLeak = false;
  }

  public function generate($log) {
    $maxAnnotatedFailures = 20;
    $rawSummaryLines = GzipUtils::getLines($this->rawSummary);
    $annotatedLines = array();
    foreach ($rawSummaryLines as $i => $line) {
      // Only display the first N failures from the unannotated summary
      if ($i >= $maxAnnotatedFailures) {
        $totalParsedFailures = count($rawSummaryLines);
        // In extreme cases we will have truncated the raw log too (in Logparser.php),
        // so cannot state the actual number of failures. In which case, the truncation
        // message makes $totalParsedFailures exceed $maxParsedFailures.$maxParsedFailures
        // here should be kept in sync with LogParser.php's $maxParsedFailures.
        // TODO: Move these prefs to config.php so we don't have to keep them in sync
        // (once we have an in-repo config.php with passwords separated out, that isn't
        // a PITA to update).
        $maxParsedFailures = 100;
        if ($totalParsedFailures > $maxParsedFailures)
          $totalParsedFailures = $maxParsedFailures . '+';
        $annotatedLines[] = "Only displaying first $maxAnnotatedFailures of $totalParsedFailures" .
                            " failures - <a href=\"php/getParsedLog.php?id=" . $_GET["id"] .
                            "\" target=\"_blank\">View log</a>.";
        break;
      }
      $this->processLine($annotatedLines, $line);
    }
    if ($this->hasLeak) {
      $annotatedLines[] = "<a href=\"php/getLeakAnalysis.php?id=" . $_GET["id"] .
        "\" target=\"_blank\">Analyze the leak.</a>";
    }
    GzipUtils::writeToDb($log, implode("", $annotatedLines));
  }

  public function ensureAnnotatedSummaryExists() {
    $log = array("_id" => $this->rawSummary['_id'], "type" => "annotatedsummary");
    ParallelLogGenerating::ensureLogExists($log, $this);
    return $log;
  }

  protected function generateSuggestion($bug, $line) {
    $bug->summary = htmlspecialchars($bug->summary);
    $line = htmlspecialchars($line);
    return "<span data-bugid=\"$bug->id\" " .
                 "data-summary=\"$bug->summary\" " .
                 "data-signature=\"$this->logDescription\" " .
                 "data-logline=\"$line\" " .
                 "data-status=\"$bug->status $bug->resolution\"" .
           "></span>\n";
  }

  protected function processLine(&$annotatedLines, $line) {
    $annotatedLines[] = htmlspecialchars($line);

    // On windows, $line can have spurious whitespace/line-endings appended
    $trimmedLine = trim($line);

    // Our standard logging format uses the pipe symbol as a delimeter.
    $lineParts = preg_split("#\s\|\s#", $trimmedLine);
    if (count($lineParts) >= 3) {
      // Is in the "FAILURE-TYPE | testNameOrFilePath | message" type format.
      // testNameOrFilePath can either be a test filepath, or an arbitrary
      // step/test name (eg 'Shutdown' or 'leakcheck').
      $testNameOrFilePath = $lineParts[1];
      $message = $lineParts[2];
      // Leak failure messages are of the form:
      // leakcheck | .*leaked \d+ bytes (Object-1, Object-2, Object-3, ...)
      if (preg_match('/\d+ bytes leaked \((.+)\)$/', $message, $matches)) {
        $this->hasLeak = true;
        $searchTerm = $matches[1];
      } else {
        // Split the potential filepath on both forward and backward slashes.
        $filePathParts = preg_split("#[/\\\\]#", $testNameOrFilePath);
        // Remove the path from the search term if present.
        $searchTerm = end($filePathParts);
      }
    } else {
      // Not in the delimited format, fall back to searching for the entire failure line.
      $searchTerm = $trimmedLine;
    }

    $bugs = $this->getBugsForFailure($searchTerm);

    // For crashes with no suggestions, search for the top frame as a fallback.
    // Catches shutdown crashes/anything else that is blacklisted, as well as GC
    // crashes that occur in too many tests to put all of them in bug summaries.
    if (empty($bugs) && preg_match('/application crashed \[@ (.+)\]$/', $trimmedLine, $matches)) {
      Debug::output('Trying topFrame fallback on "'.$matches[1].'"');
      $bugs = $this->getBugsForFailure($matches[1]);
    }

    // If we had more than 50 bugs suggested, give up
    // TODO: Consider checking how many have resolution==''
    // and showing just those.
    $numSuggestions = count($bugs);
    if ($numSuggestions > 50) {
      $annotatedLines[] = "<span class=\"suggestionError\">Exceeded max bug suggestions" .
                          " ($numSuggestions bugs found)</span>";
      return;
    }

    foreach ($bugs as $bug) {
      $annotatedLines[] = $this->generateSuggestion($bug, $line);
    }
  }

  protected function isHelpfulSearchTerm($searchTerm) {
    // Search terms that will match too many bug summaries and so not result in useful suggestions.
    $blacklist = array(
      'automation.py',
      'remoteautomation.py',
      'Shutdown',
      'Main app process exited normally',
      'leakcheck'
    );
    return (strlen(trim($searchTerm)) > 4 && !in_array($searchTerm, $blacklist));
  }

  protected function getBugsForFailure($searchTerm) {
    if (!$this->isHelpfulSearchTerm($searchTerm)) {
      return array();
    }
    // Bugzilla's max summary length is 256 characters, so once "Intermittent "
    // and platform/suite information is prefixed, it often only leaves ~200
    // characters for the failure, so anything more will be truncated in the summary.
    // We limit the length here to (a) prevent crazy length bzapi requests, and (b)
    // in the hope that we might still match the truncated failure in the summary.
    $searchTerm = trim(substr($searchTerm, 0, 200));

    global $db;
    $engine = new Services_JSON();

    $regenerate = isset($_GET['regenerate']) && $_GET['regenerate'] == '1';
    if ($regenerate) {
      Debug::output('Removing cached bug suggestions for "'.$searchTerm.'"');
      $stmt = $db->prepare("
        DELETE FROM bugscache
        WHERE filename=:filename");
      $stmt->execute(array(":filename" => $searchTerm));
    } else {
      $debug = new Debug();
      $stmt = $db->prepare("
        SELECT json
        FROM bugscache
        WHERE filename=:filename");
      $stmt->execute(array(":filename" => $searchTerm));
      $result = $stmt->fetchColumn();
      $debug->outputDuration('Searching bugscache for "'.$searchTerm.'"');
      if ($result)
        return $engine->decode($result);
    }

    // else: fetch it from bugzilla
    $debug = new Debug();
    $apiURL = "https://api-dev.bugzilla.mozilla.org/latest/bug" .
              "?keywords=intermittent-failure" .
              "&include_fields=id,summary,status,resolution" .
              "&summary=" . urlencode($searchTerm);
    $bugs_json = @file_get_contents($apiURL);
    $debug->outputDuration('Performing BzAPI lookup for "'.$searchTerm.'"');
    if ($bugs_json === false)
      return array();
    $bugs = $engine->decode($bugs_json);
    $bugs = isset($bugs->bugs) ? $bugs->bugs : array();

    // and save it in the database
    $stmt = $db->prepare("
      INSERT INTO bugscache (filename, json)
      VALUES (:filename, :json);");
    try {
      $stmt->execute(array(":filename" => $searchTerm, ":json" => $engine->encode($bugs)));
    } catch (Exception $e) {
      // another process was faster, nevermind
    }

    return $bugs;
  }
}