Bug 1252361 - Produce UNEXPECTED-PASS results for fuzzy tests that are overfuzzed. r=dbaron
authorKartikaya Gupta <kgupta@mozilla.com>
Fri, 02 Jun 2017 09:28:33 -0400
changeset 410143 4d98a42a482f6c73e24b2e9535e4e93643284a31
parent 410142 bc72cc13c8f4f857d1123ee930abc4e050513e76
child 410144 99cc18bf162ac13654adbc515d999e6590cce6dc
push id7391
push usermtabara@mozilla.com
push dateMon, 12 Jun 2017 13:08:53 +0000
treeherdermozilla-beta@2191d7f87e2e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdbaron
bugs1252361
milestone55.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1252361 - Produce UNEXPECTED-PASS results for fuzzy tests that are overfuzzed. r=dbaron This catches a common problem where somebody adds a fuzzy annotation on a test to work around some minor differences. Later the differences go away, but since the test harness doesn't catch that, nobody is the wiser. Subsequently a "real" regression can reintroduce differences which are hidden by the stale fuzzy annotations. With this patch, if the annotations are set up properly, the test harness will flag tests as "UNEXPECTED-PASS" when the differences go away. This will require the patch author to reduce the allowed fuzziness parameters, and will make it easier to catch subsequent regressions. MozReview-Commit-ID: B3rGPFLXkCu
layout/reftests/reftest-sanity/reftest.list
layout/tools/reftest/reftest.jsm
--- a/layout/reftests/reftest-sanity/reftest.list
+++ b/layout/reftests/reftest-sanity/reftest.list
@@ -142,21 +142,31 @@ test-pref(font.size.variable.x-western,1
 test-pref(font.size.variable.x-western,16) != font-default.html font-size-24.html
 test-pref(font.size.variable.x-western,24) == font-default.html font-size-24.html
 test-pref(font.size.variable.x-western,24) != font-default.html font-size-16.html
 fails test-pref(font.size.variable.x-western,false) == font-default.html font-size-16.html
 fails test-pref(font.size.variable.x-western,"foo") == font-default.html font-size-16.html
 ref-pref(font.size.variable.x-western,16) test-pref(font.size.variable.x-western,24) skip-if(styloVsGecko) != font-default.html font-default.html
 ref-pref(font.size.variable.x-western,24) test-pref(font.size.variable.x-western,16) skip-if(styloVsGecko) != font-default.html font-default.html
 ref-pref(font.size.variable.x-western,24) test-pref(font.size.variable.x-western,24) skip-if(styloVsGecko) == font-default.html font-default.html
+
 # reftest syntax: fuzzy(maxPixelDifference,maxNumberDifferingPixels)
 fuzzy(1,250000) == fuzzy.html fuzzy-ref.html
 fuzzy(1,250000) != too-fuzzy.html fuzzy-ref.html
 fuzzy-if(true,1,250000) == fuzzy.html fuzzy-ref.html
 fuzzy-if(false,2,1) == fuzzy-ref.html fuzzy-ref.html
+# test some ranged fuzzes
+fuzzy(1-10,1-250000) fuzzy-if(false,5-10,250000) skip-if(styloVsGecko) == fuzzy.html fuzzy-ref.html
+fuzzy(0-0,250000) skip-if(styloVsGecko) != fuzzy.html fuzzy-ref.html
+fuzzy(1,0-2) skip-if(styloVsGecko) != fuzzy.html fuzzy-ref.html
+# If enabled, the following two should result in UNEXPECTED-PASS because
+# they are both overfuzzed
+# fuzzy(3-4,250000) == fuzzy.html fuzzy-ref.html
+# fuzzy(1,250001-250002) == fuzzy.html fuzzy-ref.html
+#
 # When using 565 fuzzy.html and fuzzy-ref.html will compare as equal
 fails-if(!styloVsGecko) fuzzy-if(false,2,1) random-if(Android) == fuzzy.html fuzzy-ref.html
 
 # Test that reftest-no-paint fails correctly. With WR enabled, it will generate color layers instead of updating color on the painted layer.
 fails skip-if(webrender) == reftest-no-paint.html reftest-no-paint-ref.html
 
 skip-if(!asyncPan||!browserIsRemote) == async-scroll-1a.html async-scroll-1-ref.html
 
--- a/layout/tools/reftest/reftest.jsm
+++ b/layout/tools/reftest/reftest.jsm
@@ -1633,17 +1633,18 @@ function RecordResult(testRunTime, error
     outputs[EXPECTED_FAIL] = {
         true:  {s: ["PASS", "FAIL"], n: "UnexpectedPass"},
         false: {s: ["FAIL", "FAIL"], n: "KnownFail"}
     };
     outputs[EXPECTED_RANDOM] = {
         true:  {s: ["PASS", "PASS"], n: "Random"},
         false: {s: ["FAIL", "FAIL"], n: "Random"}
     };
-    outputs[EXPECTED_FUZZY] = outputs[EXPECTED_PASS];
+    // for EXPECTED_FUZZY we need special handling because we can have
+    // Pass, UnexpectedPass, or UnexpectedFail
 
     var output;
     var extra;
 
     if (gURLs[0].type == TYPE_LOAD) {
         ++gTestResults.LoadOnly;
         logger.testEnd(gURLs[0].identifier, "PASS", "PASS", "(LOAD ONLY)");
         gCurrentCanvas = null;
@@ -1735,41 +1736,68 @@ function RecordResult(testRunTime, error
             // if the comparison result matches the expected result specified
             // in the manifest.
 
             // number of different pixels
             var differences;
             // whether the two renderings match:
             var equal;
             var maxDifference = {};
+            // whether the allowed fuzziness from the annotations is exceeded
+            // by the actual comparison results
+            var fuzz_exceeded = false;
 
             differences = gWindowUtils.compareCanvases(gCanvas1, gCanvas2, maxDifference);
             equal = (differences == 0);
 
+            if (maxDifference.value > 0 && equal) {
+                throw "Inconsistent result from compareCanvases.";
+            }
+
             // what is expected on this platform (PASS, FAIL, or RANDOM)
             var expected = gURLs[0].expected;
 
-            if (maxDifference.value > 0 &&
-                maxDifference.value >= gURLs[0].fuzzyMinDelta &&
-                maxDifference.value <= gURLs[0].fuzzyMaxDelta &&
-                differences >= gURLs[0].fuzzyMinPixels &&
-                differences <= gURLs[0].fuzzyMaxPixels) {
-                if (equal) {
-                    throw "Inconsistent result from compareCanvases.";
-                }
-                equal = expected == EXPECTED_FUZZY;
-                logger.info(`REFTEST fuzzy match (${maxDifference.value}, ${differences}) <= (${gURLs[0].fuzzyMaxDelta}, ${gURLs[0].fuzzyMaxPixels})`);
+            if (expected == EXPECTED_FUZZY) {
+                logger.info(`REFTEST fuzzy test ` +
+                            `(${gURLs[0].fuzzyMinDelta}, ${gURLs[0].fuzzyMinPixels}) <= ` +
+                            `(${maxDifference.value}, ${differences}) <= ` +
+                            `(${gURLs[0].fuzzyMaxDelta}, ${gURLs[0].fuzzyMaxPixels})`);
+                fuzz_exceeded = maxDifference.value > gURLs[0].fuzzyMaxDelta ||
+                                differences > gURLs[0].fuzzyMaxPixels;
+                equal = !fuzz_exceeded &&
+                        maxDifference.value >= gURLs[0].fuzzyMinDelta &&
+                        differences >= gURLs[0].fuzzyMinPixels;
             }
 
             var failedExtraCheck = gFailedNoPaint || gFailedOpaqueLayer || gFailedAssignedLayer;
 
             // whether the comparison result matches what is in the manifest
             var test_passed = (equal == (gURLs[0].type == TYPE_REFTEST_EQUAL)) && !failedExtraCheck;
 
-            output = outputs[expected][test_passed];
+            if (expected != EXPECTED_FUZZY) {
+                output = outputs[expected][test_passed];
+            } else if (test_passed) {
+                output = {s: ["PASS", "PASS"], n: "Pass"};
+            } else if (gURLs[0].type == TYPE_REFTEST_EQUAL &&
+                       !failedExtraCheck &&
+                       !fuzz_exceeded) {
+                // If we get here, that means we had an '==' type test where
+                // at least one of the actual difference values was below the
+                // allowed range, but nothing else was wrong. So let's produce
+                // UNEXPECTED-PASS in this scenario. Also, if we enter this
+                // branch, 'equal' must be false so let's assert that to guard
+                // against logic errors.
+                if (equal) {
+                    throw "Logic error in reftest.jsm fuzzy test handling!";
+                }
+                output = {s: ["PASS", "FAIL"], n: "UnexpectedPass"};
+            } else {
+                // In all other cases we fail the test
+                output = {s: ["FAIL", "PASS"], n: "UnexpectedFail"};
+            }
             extra = { status_msg: output.n };
 
             ++gTestResults[output.n];
 
             // It's possible that we failed both an "extra check" and the normal comparison, but we don't
             // have a way to annotate these separately, so just print an error for the extra check failures.
             if (failedExtraCheck) {
                 var failures = [];