Bug 641937 - Blacklist non-responding IP addresses in DNS r=bz
authorPatrick McManus <mcmanus@ducksong.com>
Thu, 21 Jul 2011 09:18:01 -0400
changeset 73152 a1487dacb9e783ffb3072c1f0a8910007951aedd
parent 73151 8670ac4f18b1679a49ca2b36c7b37f7bd5f99f92
child 73153 ed2b2c4b453a3787d03fad2e92f269576e3c30b8
push id689
push usermcmanus@ducksong.com
push dateThu, 21 Jul 2011 13:18:45 +0000
treeherdermozilla-inbound@a1487dacb9e7 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbz
bugs641937
milestone8.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 641937 - Blacklist non-responding IP addresses in DNS r=bz Blacklist non-responding IP addresses for a hostname so the next time we access that hostname we don't have to wait for a timeout again
netwerk/base/src/nsSocketTransport2.cpp
netwerk/dns/nsDNSService2.cpp
netwerk/dns/nsHostResolver.cpp
netwerk/dns/nsHostResolver.h
netwerk/dns/nsIDNSRecord.idl
netwerk/socket/nsSOCKSIOLayer.cpp
--- a/netwerk/base/src/nsSocketTransport2.cpp
+++ b/netwerk/base/src/nsSocketTransport2.cpp
@@ -1268,16 +1268,18 @@ nsSocketTransport::RecoverFromError()
         mCondition != NS_ERROR_UNKNOWN_HOST &&
         mCondition != NS_ERROR_UNKNOWN_PROXY_HOST)
         return PR_FALSE;
 
     PRBool tryAgain = PR_FALSE;
 
     // try next ip address only if past the resolver stage...
     if (mState == STATE_CONNECTING && mDNSRecord) {
+        mDNSRecord->ReportUnusable(SocketPort());
+        
         nsresult rv = mDNSRecord->GetNextAddr(SocketPort(), &mNetAddr);
         if (NS_SUCCEEDED(rv)) {
             SOCKET_LOG(("  trying again with next ip address\n"));
             tryAgain = PR_TRUE;
         }
     }
 
 #if defined(XP_WIN) || defined(MOZ_PLATFORM_MAEMO)
--- a/netwerk/dns/nsDNSService2.cpp
+++ b/netwerk/dns/nsDNSService2.cpp
@@ -75,24 +75,27 @@ class nsDNSRecord : public nsIDNSRecord
 {
 public:
     NS_DECL_ISUPPORTS
     NS_DECL_NSIDNSRECORD
 
     nsDNSRecord(nsHostRecord *hostRecord)
         : mHostRecord(hostRecord)
         , mIter(nsnull)
+        , mLastIter(nsnull)
         , mIterGenCnt(-1)
         , mDone(PR_FALSE) {}
 
 private:
     virtual ~nsDNSRecord() {}
 
     nsRefPtr<nsHostRecord>  mHostRecord;
-    void                   *mIter;
+    void                   *mIter;       // enum ptr for PR_EnumerateAddrInfo
+    void                   *mLastIter;   // previous enum ptr, for use in
+                                         // getting addrinfo in ReportUnusable
     int                     mIterGenCnt; // the generation count of
                                          // mHostRecord->addr_info when we
                                          // start iterating
     PRBool                  mDone;
 };
 
 NS_IMPL_THREADSAFE_ISUPPORTS1(nsDNSRecord, nsIDNSRecord)
 
@@ -102,17 +105,17 @@ nsDNSRecord::GetCanonicalName(nsACString
     // this method should only be called if we have a CNAME
     NS_ENSURE_TRUE(mHostRecord->flags & nsHostResolver::RES_CANON_NAME,
                    NS_ERROR_NOT_AVAILABLE);
 
     // if the record is for an IP address literal, then the canonical
     // host name is the IP address literal.
     const char *cname;
     {
-        MutexAutoLock lock(*mHostRecord->addr_info_lock);
+        MutexAutoLock lock(mHostRecord->addr_info_lock);
         if (mHostRecord->addr_info)
             cname = PR_GetCanonNameFromAddrInfo(mHostRecord->addr_info);
         else
             cname = mHostRecord->host;
         result.Assign(cname);
     }
     return NS_OK;
 }
@@ -121,35 +124,55 @@ NS_IMETHODIMP
 nsDNSRecord::GetNextAddr(PRUint16 port, PRNetAddr *addr)
 {
     // not a programming error to poke the DNS record when it has no more
     // entries.  just fail without any debug warnings.  this enables consumers
     // to enumerate the DNS record without calling HasMore.
     if (mDone)
         return NS_ERROR_NOT_AVAILABLE;
 
-    mHostRecord->addr_info_lock->Lock();
+    mHostRecord->addr_info_lock.Lock();
+    PRBool startedFresh = !mIter;
+
     if (mHostRecord->addr_info) {
         if (!mIter)
             mIterGenCnt = mHostRecord->addr_info_gencnt;
         else if (mIterGenCnt != mHostRecord->addr_info_gencnt) {
             // mHostRecord->addr_info has changed, so mIter is invalid.
             // Restart the iteration.  Alternatively, we could just fail.
             mIter = nsnull;
             mIterGenCnt = mHostRecord->addr_info_gencnt;
+            startedFresh = PR_TRUE;
         }
-        mIter = PR_EnumerateAddrInfo(mIter, mHostRecord->addr_info, port, addr);
-        mHostRecord->addr_info_lock->Unlock();
+
+        do {
+            mLastIter = mIter;
+            mIter = PR_EnumerateAddrInfo(mIter, mHostRecord->addr_info,
+                                         port, addr);
+        }
+        while (mIter && mHostRecord->Blacklisted(addr));
+
+        if (startedFresh && !mIter) {
+            // if everything was blacklisted we want to reset the blacklist (and
+            // likely relearn it) and return the first address. That is better
+            // than nothing
+            mHostRecord->ResetBlacklist();
+            mLastIter = nsnull;
+            mIter = PR_EnumerateAddrInfo(nsnull, mHostRecord->addr_info,
+                                         port, addr);
+        }
+            
+        mHostRecord->addr_info_lock.Unlock();
         if (!mIter) {
             mDone = PR_TRUE;
             return NS_ERROR_NOT_AVAILABLE;
         }
     }
     else {
-        mHostRecord->addr_info_lock->Unlock();
+        mHostRecord->addr_info_lock.Unlock();
         if (!mHostRecord->addr) {
             // Both mHostRecord->addr_info and mHostRecord->addr are null.
             // This can happen if mHostRecord->addr_info expired and the
             // attempt to reresolve it failed.
             return NS_ERROR_NOT_AVAILABLE;
         }
         memcpy(addr, mHostRecord->addr, sizeof(PRNetAddr));
         // set given port
@@ -184,33 +207,60 @@ NS_IMETHODIMP
 nsDNSRecord::HasMore(PRBool *result)
 {
     if (mDone)
         *result = PR_FALSE;
     else {
         // unfortunately, NSPR does not provide a way for us to determine if
         // there is another address other than to simply get the next address.
         void *iterCopy = mIter;
+        void *iterLastCopy = mLastIter;
         PRNetAddr addr;
         *result = NS_SUCCEEDED(GetNextAddr(0, &addr));
         mIter = iterCopy; // backup iterator
+        mLastIter = iterLastCopy; // backup iterator
         mDone = PR_FALSE;
     }
     return NS_OK;
 }
 
 NS_IMETHODIMP
 nsDNSRecord::Rewind()
 {
     mIter = nsnull;
+    mLastIter = nsnull;
     mIterGenCnt = -1;
     mDone = PR_FALSE;
     return NS_OK;
 }
 
+NS_IMETHODIMP
+nsDNSRecord::ReportUnusable(PRUint16 aPort)
+{
+    // right now we don't use the port in the blacklist
+
+    mHostRecord->addr_info_lock.Lock();
+
+    // Check that we are using a real addr_info (as opposed to a single
+    // constant address), and that the generation count is valid. Otherwise,
+    // ignore the report.
+
+    if (mHostRecord->addr_info &&
+        mIterGenCnt == mHostRecord->addr_info_gencnt) {
+        PRNetAddr addr;
+        void *id = PR_EnumerateAddrInfo(mLastIter, mHostRecord->addr_info,
+                                        aPort, &addr);
+        if (id)
+            mHostRecord->ReportUnusable(&addr);
+    }
+    
+    mHostRecord->addr_info_lock.Unlock();
+    return NS_OK;
+}
+
 //-----------------------------------------------------------------------------
 
 class nsDNSAsyncRequest : public nsResolveHostCallback
                         , public nsICancelable
 {
 public:
     NS_DECL_ISUPPORTS
     NS_DECL_NSICANCELABLE
--- a/netwerk/dns/nsHostResolver.cpp
+++ b/netwerk/dns/nsHostResolver.cpp
@@ -173,54 +173,105 @@ private:
 
 //----------------------------------------------------------------------------
 
 // this macro filters out any flags that are not used when constructing the
 // host key.  the significant flags are those that would affect the resulting
 // host record (i.e., the flags that are passed down to PR_GetAddrInfoByName).
 #define RES_KEY_FLAGS(_f) ((_f) & nsHostResolver::RES_CANON_NAME)
 
+nsHostRecord::nsHostRecord(const nsHostKey *key)
+    : _refc(1)
+    , addr_info_lock("nsHostRecord.addr_info_lock")
+    , addr_info_gencnt(0)
+    , addr_info(nsnull)
+    , addr(nsnull)
+    , negative(PR_FALSE)
+    , resolving(PR_FALSE)
+    , onQueue(PR_FALSE)
+    , usingAnyThread(PR_FALSE)
+{
+    host = ((char *) this) + sizeof(nsHostRecord);
+    memcpy((char *) host, key->host, strlen(key->host) + 1);
+    flags = key->flags;
+    af = key->af;
+
+    NS_LOG_ADDREF(this, 1, "nsHostRecord", sizeof(nsHostRecord));
+    expiration = NowInMinutes();
+
+    PR_INIT_CLIST(this);
+    PR_INIT_CLIST(&callbacks);
+}
+
 nsresult
 nsHostRecord::Create(const nsHostKey *key, nsHostRecord **result)
 {
     size_t hostLen = strlen(key->host) + 1;
     size_t size = hostLen + sizeof(nsHostRecord);
 
-    nsHostRecord *rec = (nsHostRecord*) ::operator new(size);
-
-    rec->host = ((char *) rec) + sizeof(nsHostRecord);
-    rec->flags = key->flags;
-    rec->af = key->af;
-
-    rec->_refc = 1; // addref
-    NS_LOG_ADDREF(rec, 1, "nsHostRecord", sizeof(nsHostRecord));
-    rec->addr_info_lock = new Mutex("nsHostRecord.addr_info_lock");
-    rec->addr_info = nsnull;
-    rec->addr_info_gencnt = 0;
-    rec->addr = nsnull;
-    rec->expiration = NowInMinutes();
-    rec->resolving = PR_FALSE;
-    rec->onQueue = PR_FALSE;
-    rec->usingAnyThread = PR_FALSE;
-    PR_INIT_CLIST(rec);
-    PR_INIT_CLIST(&rec->callbacks);
-    rec->negative = PR_FALSE;
-    memcpy((char *) rec->host, key->host, hostLen);
-
-    *result = rec;
+    // Use placement new to create the object with room for the hostname
+    // allocated after it.
+    void *place = ::operator new(size);
+    *result = new(place) nsHostRecord(key);
     return NS_OK;
 }
 
 nsHostRecord::~nsHostRecord()
 {
-    delete addr_info_lock;
     if (addr)
         free(addr);
 }
 
+PRBool
+nsHostRecord::Blacklisted(PRNetAddr *aQuery)
+{
+    // must call locked
+    LOG(("nsHostRecord::Blacklisted() %p %s\n", this, host));
+
+    // skip the string conversion for the common case of no blacklist
+    if (!mBlacklistedItems.Length())
+        return PR_FALSE;
+    
+    char buf[64];
+    if (PR_NetAddrToString(aQuery, buf, sizeof(buf)) != PR_SUCCESS)
+        return PR_FALSE;
+
+    nsDependentCString strQuery(buf);
+    LOG(("nsHostRecord::Blacklisted() query %s\n", buf));
+    
+    for (PRUint32 i = 0; i < mBlacklistedItems.Length(); i++)
+        if (mBlacklistedItems.ElementAt(i).Equals(strQuery)) {
+            LOG(("nsHostRecord::Blacklisted() %s blacklist confirmed\n", buf));
+            return PR_TRUE;
+        }
+
+    return PR_FALSE;
+}
+
+void
+nsHostRecord::ReportUnusable(PRNetAddr *aAddress)
+{
+    // must call locked
+    LOG(("nsHostRecord::ReportUnusable() %p %s\n", this, host));
+
+    char buf[64];
+    if (PR_NetAddrToString(aAddress, buf, sizeof(buf)) == PR_SUCCESS) {
+        LOG(("nsHostrecord::ReportUnusable addr %s\n",buf));
+        mBlacklistedItems.AppendElement(nsCString(buf));
+    }
+}
+
+void
+nsHostRecord::ResetBlacklist()
+{
+    // must call locked
+    LOG(("nsHostRecord::ResetBlacklist() %p %s\n", this, host));
+    mBlacklistedItems.Clear();
+}
+
 //----------------------------------------------------------------------------
 
 struct nsHostDBEnt : PLDHashEntryHdr
 {
     nsHostRecord *rec;
 };
 
 static PLDHashNumber
@@ -782,17 +833,17 @@ nsHostResolver::OnLookupComplete(nsHostR
 
         // grab list of callbacks to notify
         MoveCList(rec->callbacks, cbs);
 
         // update record fields.  We might have a rec->addr_info already if a
         // previous lookup result expired and we're reresolving it..
         PRAddrInfo  *old_addr_info;
         {
-            MutexAutoLock lock(*rec->addr_info_lock);
+            MutexAutoLock lock(rec->addr_info_lock);
             old_addr_info = rec->addr_info;
             rec->addr_info = result;
             rec->addr_info_gencnt++;
         }
         if (old_addr_info)
             PR_FreeAddrInfo(old_addr_info);
         rec->expiration = NowInMinutes();
         if (result) {
--- a/netwerk/dns/nsHostResolver.h
+++ b/netwerk/dns/nsHostResolver.h
@@ -41,16 +41,18 @@
 #include "nscore.h"
 #include "nsAtomicRefcnt.h"
 #include "prclist.h"
 #include "prnetdb.h"
 #include "pldhash.h"
 #include "mozilla/CondVar.h"
 #include "mozilla/Mutex.h"
 #include "nsISupportsImpl.h"
+#include "nsString.h"
+#include "nsTArray.h"
 
 class nsHostResolver;
 class nsHostRecord;
 class nsResolveHostCallback;
 
 /* XXX move this someplace more generic */
 #define NS_DECL_REFCOUNTED_THREADSAFE(classname)                             \
   private:                                                                   \
@@ -106,42 +108,52 @@ public:
     /* the lock protects |addr_info| and |addr_info_gencnt| because they
      * are mutable and accessed by the resolver worker thread and the
      * nsDNSService2 class.  |addr| doesn't change after it has been
      * assigned a value.  only the resolver worker thread modifies
      * nsHostRecord (and only in nsHostResolver::OnLookupComplete);
      * the other threads just read it.  therefore the resolver worker
      * thread doesn't need to lock when reading |addr_info|.
      */
-    Mutex       *addr_info_lock;
+    Mutex        addr_info_lock;
     int          addr_info_gencnt; /* generation count of |addr_info| */
     PRAddrInfo  *addr_info;
     PRNetAddr   *addr;
     PRBool       negative;   /* True if this record is a cache of a failed lookup.
                                 Negative cache entries are valid just like any other
                                 (though never for more than 60 seconds), but a use
                                 of that negative entry forces an asynchronous refresh. */
 
     PRUint32     expiration; /* measured in minutes since epoch */
 
     PRBool HasResult() const { return addr_info || addr || negative; }
 
+    // hold addr_info_lock when calling the blacklist functions
+    PRBool Blacklisted(PRNetAddr *query);
+    void   ResetBlacklist();
+    void   ReportUnusable(PRNetAddr *addr);
+
 private:
     friend class nsHostResolver;
 
     PRCList callbacks; /* list of callbacks */
 
     PRBool  resolving; /* true if this record is being resolved, which means
                         * that it is either on the pending queue or owned by
                         * one of the worker threads. */ 
     
     PRBool  onQueue;  /* true if pending and on the queue (not yet given to getaddrinfo())*/
     PRBool  usingAnyThread; /* true if off queue and contributing to mActiveAnyThreadCount */
-    
 
+    // a list of addresses associated with this record that have been reported
+    // as unusable. the list is kept as a set of strings to make it independent
+    // of gencnt.
+    nsTArray<nsCString> mBlacklistedItems;
+
+    nsHostRecord(const nsHostKey *key);           /* use Create() instead */
    ~nsHostRecord();
 };
 
 /**
  * ResolveHost callback object.  It's PRCList members are used by
  * the nsHostResolver and should not be used by anything else.
  */
 class NS_NO_VTABLE nsResolveHostCallback : public PRCList
--- a/netwerk/dns/nsIDNSRecord.idl
+++ b/netwerk/dns/nsIDNSRecord.idl
@@ -41,17 +41,17 @@ native PRNetAddr(union PRNetAddr);
 /**
  * nsIDNSRecord
  *
  * this interface represents the result of a DNS lookup.  since a DNS
  * query may return more than one resolved IP address, the record acts
  * like an enumerator, allowing the caller to easily step through the
  * list of IP addresses.
  */
-[scriptable, uuid(31c9c52e-1100-457d-abac-d2729e43f506)]
+[scriptable, uuid(ead9e9d8-7eef-4dae-a7f0-a1edcfb20478)]
 interface nsIDNSRecord : nsISupports
 {
     /**
      * @return the canonical hostname for this record.  this value is empty if
      * the record was not fetched with the RESOLVE_CANONICAL_NAME flag.
      *
      * e.g., www.mozilla.org --> rheet.mozilla.org
      */
@@ -83,9 +83,19 @@ interface nsIDNSRecord : nsISupports
      */
     boolean hasMore();
 
     /**
      * this function resets the internal address iterator to the first
      * address in the record.
      */
     void rewind();
+
+    /**
+     * This function indicates that the last address obtained via getNextAddr*()
+     * was not usuable and should be skipped in future uses of this
+     * record if other addresses are available.
+     *
+     * @param aPort is the port number associated with the failure, if any.
+     *        It may be zero if not applicable.
+     */
+    void reportUnusable(in PRUint16 aPort);
 };
--- a/netwerk/socket/nsSOCKSIOLayer.cpp
+++ b/netwerk/socket/nsSOCKSIOLayer.cpp
@@ -271,17 +271,21 @@ nsSOCKSSocketInfo::ConnectToProxy(PRFile
         rv = dns->Resolve(mProxyHost, 0, getter_AddRefs(mDnsRec));
         if (NS_FAILED(rv)) {
             LOGERROR(("socks: DNS lookup for SOCKS proxy %s failed",
                      mProxyHost.get()));
             return PR_FAILURE;
         }
     }
 
+    PRInt32 addresses = 0;
     do {
+        if (addresses++)
+            mDnsRec->ReportUnusable(mProxyPort);
+        
         rv = mDnsRec->GetNextAddr(mProxyPort, &mInternalProxyAddr);
         // No more addresses to try? If so, we'll need to bail
         if (NS_FAILED(rv)) {
             LOGERROR(("socks: unable to connect to SOCKS proxy, %s",
                      mProxyHost.get()));
             return PR_FAILURE;
         }