bug 1495523 - disable TRR after max-fails number of failed requests r=valentin
authorDaniel Stenberg <daniel@haxx.se>
Wed, 03 Oct 2018 11:53:46 +0000
changeset 495093 f062d23be1811dce1fc21ef72f3772f9acdc5cdf
parent 495092 758cfb5e4ddc3e26584d67e81679ad6a9c3e914e
child 495094 fc316e011bb6647781db75fb01e2845410ca9e3a
push id9984
push userffxbld-merge
push dateMon, 15 Oct 2018 21:07:35 +0000
treeherdermozilla-beta@183d27ea8570 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersvalentin
bugs1495523
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
bug 1495523 - disable TRR after max-fails number of failed requests r=valentin MozReview-Commit-ID: 2dSEY6DuP2A Differential Revision: https://phabricator.services.mozilla.com/D7587
modules/libpref/init/all.js
netwerk/dns/TRR.cpp
netwerk/dns/TRRService.cpp
netwerk/dns/TRRService.h
--- a/modules/libpref/init/all.js
+++ b/modules/libpref/init/all.js
@@ -5418,16 +5418,18 @@ pref("network.trr.bootstrapAddress", "")
 // Meant to survive basically a page load.
 pref("network.trr.blacklist-duration", 60);
 // Single TRR request timeout, in milliseconds
 pref("network.trr.request-timeout", 1500);
 // Allow AAAA entries to be used "early", before the A results are in
 pref("network.trr.early-AAAA", false);
 // Explicitly disable ECS (EDNS Client Subnet, RFC 7871)
 pref("network.trr.disable-ECS", true);
+// After this many failed TRR requests in a row, consider TRR borked
+pref("network.trr.max-fails", 5);
 
 pref("captivedetect.canonicalURL", "http://detectportal.firefox.com/success.txt");
 pref("captivedetect.canonicalContent", "success\n");
 pref("captivedetect.maxWaitingTime", 5000);
 pref("captivedetect.pollingTime", 3000);
 pref("captivedetect.maxRetryCount", 5);
 
 #ifdef RELEASE_OR_BETA
--- a/netwerk/dns/TRR.cpp
+++ b/netwerk/dns/TRR.cpp
@@ -993,16 +993,19 @@ TRR::OnStopRequest(nsIRequest *aRequest,
                    nsresult aStatusCode)
 {
   // The dtor will be run after the function returns
   LOG(("TRR:OnStopRequest %p %s %d failed=%d code=%X\n",
        this, mHost.get(), mType, mFailed, (unsigned int)aStatusCode));
   nsCOMPtr<nsIChannel> channel;
   channel.swap(mChannel);
 
+  // Bad content is still considered "okay" if the HTTP response is okay
+  gTRRService->TRRIsOkay(NS_SUCCEEDED(aStatusCode));
+
   // if status was "fine", parse the response and pass on the answer
   if (!mFailed && NS_SUCCEEDED(aStatusCode)) {
     nsCOMPtr<nsIHttpChannel> httpChannel = do_QueryInterface(aRequest);
     if (!httpChannel) {
       return NS_ERROR_UNEXPECTED;
     }
     nsresult rv = NS_OK;
     nsAutoCString contentType;
@@ -1129,16 +1132,17 @@ TRR::Cancel()
   if (!NS_IsMainThread()) {
     NS_DispatchToMainThread(new ProxyCancel(this));
     return;
   }
   if (mChannel) {
     LOG(("TRR: %p canceling Channel %p %s %d\n", this,
          mChannel.get(), mHost.get(), mType));
     mChannel->Cancel(NS_ERROR_ABORT);
+    gTRRService->TRRIsOkay(false);
   }
 }
 
 #undef LOG
 
 // namespace
 }
 }
--- a/netwerk/dns/TRRService.cpp
+++ b/netwerk/dns/TRRService.cpp
@@ -40,19 +40,21 @@ TRRService::TRRService()
   , mTRRTimeout(3000)
   , mLock("trrservice")
   , mConfirmationNS(NS_LITERAL_CSTRING("example.com"))
   , mWaitForCaptive(true)
   , mRfc1918(false)
   , mCaptiveIsPassed(false)
   , mUseGET(false)
   , mDisableECS(true)
+  , mDisableAfterFails(5)
   , mClearTRRBLStorage(false)
   , mConfirmationState(CONFIRM_INIT)
   , mRetryConfirmInterval(1000)
+  , mTRRFailures(0)
 {
   MOZ_ASSERT(NS_IsMainThread(), "wrong thread");
 }
 
 nsresult
 TRRService::Init()
 {
   MOZ_ASSERT(NS_IsMainThread(), "wrong thread");
@@ -260,16 +262,22 @@ TRRService::ReadPrefs(const char *name)
     }
   }
   if (!name || !strcmp(name, TRR_PREF("disable-ECS"))) {
     bool tmp;
     if (NS_SUCCEEDED(Preferences::GetBool(TRR_PREF("disable-ECS"), &tmp))) {
       mDisableECS = tmp;
     }
   }
+  if (!name || !strcmp(name, TRR_PREF("max-fails"))) {
+    uint32_t fails;
+    if (NS_SUCCEEDED(Preferences::GetUint(TRR_PREF("max-fails"), &fails))) {
+      mDisableAfterFails = fails;
+    }
+  }
 
   return NS_OK;
 }
 
 nsresult
 TRRService::GetURI(nsCString &result)
 {
   MutexAutoLock lock(mLock);
@@ -584,16 +592,36 @@ TRRService::Notify(nsITimer *aTimer)
   } else {
     MOZ_CRASH("Unknown timer");
   }
 
   return NS_OK;
 }
 
 
+void
+TRRService::TRRIsOkay(bool aWorks)
+{
+  if (aWorks) {
+    mTRRFailures = 0;
+  } else if ((mMode == MODE_TRRFIRST) && (mConfirmationState == CONFIRM_OK)) {
+    // only count failures while in OK state
+    uint32_t fails = ++mTRRFailures;
+    if (fails >= mDisableAfterFails) {
+      LOG(("TRRService goes FAILED after %u failures in a row\n", fails));
+      mConfirmationState = CONFIRM_FAILED;
+      // Fire off a timer and start re-trying the NS domain again
+      NS_NewTimerWithCallback(getter_AddRefs(mRetryConfirmTimer),
+                              this, mRetryConfirmInterval,
+                              nsITimer::TYPE_ONE_SHOT);
+      mTRRFailures = 0; // clear it again
+    }
+  }
+}
+
 AHostResolver::LookupStatus
 TRRService::CompleteLookup(nsHostRecord *rec, nsresult status, AddrInfo *aNewRRSet, bool pb)
 {
   // this is an NS check for the TRR blacklist or confirmationNS check
 
   MOZ_ASSERT(NS_IsMainThread());
   MOZ_ASSERT(!rec);
 
@@ -602,18 +630,18 @@ TRRService::CompleteLookup(nsHostRecord 
 
   MOZ_ASSERT(!mConfirmer || (mConfirmationState == CONFIRM_TRYING));
   if (mConfirmationState == CONFIRM_TRYING) {
     MOZ_ASSERT(mConfirmer);
     mConfirmationState = NS_SUCCEEDED(status) ? CONFIRM_OK : CONFIRM_FAILED;
     LOG(("TRRService finishing confirmation test %s %d %X\n",
          mPrivateURI.get(), (int)mConfirmationState, (unsigned int)status));
     mConfirmer = nullptr;
-    if ((mConfirmationState == CONFIRM_FAILED) && (mMode == MODE_TRRONLY)) {
-      // in TRR-only mode; retry failed confirmations
+    if (mConfirmationState == CONFIRM_FAILED) {
+      // retry failed NS confirmation
       NS_NewTimerWithCallback(getter_AddRefs(mRetryConfirmTimer),
                               this, mRetryConfirmInterval,
                               nsITimer::TYPE_ONE_SHOT);
       if (mRetryConfirmInterval < 64000) {
         // double the interval up to this point
         mRetryConfirmInterval *= 2;
       }
     } else {
--- a/netwerk/dns/TRRService.h
+++ b/netwerk/dns/TRRService.h
@@ -44,16 +44,17 @@ public:
   uint32_t GetRequestTimeout() { return mTRRTimeout; }
 
   LookupStatus CompleteLookup(nsHostRecord *, nsresult, mozilla::net::AddrInfo *, bool pb) override;
   LookupStatus CompleteLookupByType(nsHostRecord *, nsresult, const nsTArray<nsCString> *, uint32_t, bool pb) override;
   void TRRBlacklist(const nsACString &host, bool privateBrowsing, bool aParentsToo);
   bool IsTRRBlacklisted(const nsACString &host, bool privateBrowsing, bool fullhost);
 
   bool MaybeBootstrap(const nsACString &possible, nsACString &result);
+  void TRRIsOkay(bool aWorks);
 
 private:
   virtual  ~TRRService();
   nsresult ReadPrefs(const char *name);
   void GetPrefBranch(nsIPrefBranch **result);
   void MaybeConfirm();
 
   bool                      mInitialized;
@@ -69,31 +70,33 @@ private:
 
   Atomic<bool, Relaxed> mWaitForCaptive; // wait for the captive portal to say OK before using TRR
   Atomic<bool, Relaxed> mRfc1918; // okay with local IP addresses in DOH responses?
   Atomic<bool, Relaxed> mCaptiveIsPassed; // set when captive portal check is passed
   Atomic<bool, Relaxed> mUseGET; // do DOH using GET requests (instead of POST)
   Atomic<bool, Relaxed> mEarlyAAAA; // allow use of AAAA results before A is in
   Atomic<bool, Relaxed> mDisableIPv6; // don't even try
   Atomic<bool, Relaxed> mDisableECS;  // disable EDNS Client Subnet in requests
+  Atomic<uint32_t, Relaxed> mDisableAfterFails;  // this many fails in a row means failed TRR service
 
   // TRR Blacklist storage
   RefPtr<DataStorage> mTRRBLStorage;
   Atomic<bool, Relaxed> mClearTRRBLStorage;
 
   enum ConfirmationState {
     CONFIRM_INIT = 0,
     CONFIRM_TRYING = 1,
     CONFIRM_OK = 2,
     CONFIRM_FAILED = 3
   };
   Atomic<ConfirmationState, Relaxed>  mConfirmationState;
   RefPtr<TRR> mConfirmer;
   nsCOMPtr<nsITimer> mRetryConfirmTimer;
   uint32_t mRetryConfirmInterval; // milliseconds until retry
+  Atomic<uint32_t, Relaxed> mTRRFailures;
 };
 
 extern TRRService *gTRRService;
 
 } // namespace net
 } // namespace mozilla
 
 #endif // TRRService_h_