Bug 1397223 - Gracefully handle failures in SurfaceCacheImpl::StartTracking. r=tnikkel
authorAndrew Osmond <aosmond@mozilla.com>
Wed, 06 Sep 2017 20:17:04 -0700
changeset 428898 1e6956da301f796a885e0810090d104eab13a097
parent 428897 354981c2ee1ff9102350393ac2c262691cf72c3d
child 428899 31ba8f169c5d1d4b118fdd7bd627a991544e7f5a
push id7761
push userjlund@mozilla.com
push dateFri, 15 Sep 2017 00:19:52 +0000
treeherdermozilla-beta@c38455951db4 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstnikkel
bugs1397223
milestone57.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1397223 - Gracefully handle failures in SurfaceCacheImpl::StartTracking. r=tnikkel When the surface cache starts tracking an unlocked surface, it must insert it into the expiration tracker, so that it can be freed later if it is remains unused. ExpirationTrackerImpl::AddObjectLocked can fail due to out-of-memory conditions or during shutdown, which we previously ignored, and could leave us in a state where we think the surface is in the tracker but is not. When we later try to mark the surface as used in the tracker, it will hit a release assert because it doesn't exist. Now we handle the insertion failure by discarding the surface. Marking the surface as used can itself encounter a similar issue, and we handle it the same way. MozReview-Commit-ID: Kv6l0znnG48
image/SurfaceCache.cpp
--- a/image/SurfaceCache.cpp
+++ b/image/SurfaceCache.cpp
@@ -724,17 +724,17 @@ public:
       return InsertOutcome::FAILURE;
     }
 
     // Remove elements in order of cost until we can fit this in the cache. Note
     // that locked surfaces aren't in mCosts, so we never remove them here.
     while (cost > mAvailableCost) {
       MOZ_ASSERT(!mCosts.IsEmpty(),
                  "Removed everything and it still won't fit");
-      Remove(mCosts.LastElement().Surface(), aAutoLock);
+      Remove(mCosts.LastElement().Surface(), /* aStopTracking */ true, aAutoLock);
     }
 
     // Locate the appropriate per-image cache. If there's not an existing cache
     // for this image, create it.
     RefPtr<ImageSurfaceCache> cache = GetImageCache(aProvider->GetImageKey());
     if (!cache) {
       cache = new ImageSurfaceCache;
       mImageCaches.Put(aProvider->GetImageKey(), cache);
@@ -763,86 +763,105 @@ public:
     MOZ_ASSERT(cost <= mAvailableCost, "Inserting despite too large a cost");
     if (!cache->Insert(surface)) {
       if (mustLock) {
         surface->SetLocked(false);
       }
       return InsertOutcome::FAILURE;
     }
 
-    StartTracking(surface, aAutoLock);
+    if (MOZ_UNLIKELY(!StartTracking(surface, aAutoLock))) {
+      MOZ_ASSERT(!mustLock);
+      Remove(surface, /* aStopTracking */ false, aAutoLock);
+      return InsertOutcome::FAILURE;
+    }
+
     return InsertOutcome::SUCCESS;
   }
 
   void Remove(NotNull<CachedSurface*> aSurface,
+              bool aStopTracking,
               const StaticMutexAutoLock& aAutoLock)
   {
     ImageKey imageKey = aSurface->GetImageKey();
 
     RefPtr<ImageSurfaceCache> cache = GetImageCache(imageKey);
     MOZ_ASSERT(cache, "Shouldn't try to remove a surface with no image cache");
 
     // If the surface was not a placeholder, tell its image that we discarded it.
     if (!aSurface->IsPlaceholder()) {
       static_cast<Image*>(imageKey)->OnSurfaceDiscarded(aSurface->GetSurfaceKey());
     }
 
-    StopTracking(aSurface, aAutoLock);
+    // If we failed during StartTracking, we can skip this step.
+    if (aStopTracking) {
+      StopTracking(aSurface, /* aIsTracked */ true, aAutoLock);
+    }
 
     // Individual surfaces must be freed outside the lock.
     mCachedSurfacesDiscard.AppendElement(cache->Remove(aSurface));
 
     // Remove the per-image cache if it's unneeded now. Keep it if the image is
     // locked, since the per-image cache is where we store that state. Note that
     // we don't push it into mImageCachesDiscard because all of its surfaces
     // have been removed, so it is safe to free while holding the lock.
     if (cache->IsEmpty() && !cache->IsLocked()) {
       mImageCaches.Remove(imageKey);
     }
   }
 
-  void StartTracking(NotNull<CachedSurface*> aSurface,
+  bool StartTracking(NotNull<CachedSurface*> aSurface,
                      const StaticMutexAutoLock& aAutoLock)
   {
     CostEntry costEntry = aSurface->GetCostEntry();
     MOZ_ASSERT(costEntry.GetCost() <= mAvailableCost,
                "Cost too large and the caller didn't catch it");
 
-    mAvailableCost -= costEntry.GetCost();
-
     if (aSurface->IsLocked()) {
       mLockedCost += costEntry.GetCost();
       MOZ_ASSERT(mLockedCost <= mMaxCost, "Locked more than we can hold?");
     } else {
-      mCosts.InsertElementSorted(costEntry);
+      if (NS_WARN_IF(!mCosts.InsertElementSorted(costEntry, fallible))) {
+        return false;
+      }
+
       // This may fail during XPCOM shutdown, so we need to ensure the object is
       // tracked before calling RemoveObject in StopTracking.
-      mExpirationTracker.AddObjectLocked(aSurface, aAutoLock);
+      nsresult rv = mExpirationTracker.AddObjectLocked(aSurface, aAutoLock);
+      if (NS_WARN_IF(NS_FAILED(rv))) {
+        DebugOnly<bool> foundInCosts = mCosts.RemoveElementSorted(costEntry);
+        MOZ_ASSERT(foundInCosts, "Lost track of costs for this surface");
+        return false;
+      }
     }
+
+    mAvailableCost -= costEntry.GetCost();
+    return true;
   }
 
   void StopTracking(NotNull<CachedSurface*> aSurface,
+                    bool aIsTracked,
                     const StaticMutexAutoLock& aAutoLock)
   {
     CostEntry costEntry = aSurface->GetCostEntry();
 
     if (aSurface->IsLocked()) {
       MOZ_ASSERT(mLockedCost >= costEntry.GetCost(), "Costs don't balance");
       mLockedCost -= costEntry.GetCost();
       // XXX(seth): It'd be nice to use an O(log n) lookup here. This is O(n).
       MOZ_ASSERT(!mCosts.Contains(costEntry),
                  "Shouldn't have a cost entry for a locked surface");
     } else {
       if (MOZ_LIKELY(aSurface->GetExpirationState()->IsTracked())) {
+        MOZ_ASSERT(aIsTracked, "Expiration-tracking a surface unexpectedly!");
         mExpirationTracker.RemoveObjectLocked(aSurface, aAutoLock);
       } else {
         // Our call to AddObject must have failed in StartTracking; most likely
         // we're in XPCOM shutdown right now.
-        NS_ASSERTION(ShutdownTracker::ShutdownHasStarted(),
-                     "Not expiration-tracking an unlocked surface!");
+        MOZ_ASSERT(!aIsTracked, "Not expiration-tracking an unlocked surface!");
       }
 
       DebugOnly<bool> foundInCosts = mCosts.RemoveElementSorted(costEntry);
       MOZ_ASSERT(foundInCosts, "Lost track of costs for this surface");
     }
 
     mAvailableCost += costEntry.GetCost();
     MOZ_ASSERT(mAvailableCost <= mMaxCost,
@@ -869,22 +888,24 @@ public:
     if (surface->IsPlaceholder()) {
       return LookupResult(MatchType::PENDING);
     }
 
     DrawableSurface drawableSurface = surface->GetDrawableSurface();
     if (!drawableSurface) {
       // The surface was released by the operating system. Remove the cache
       // entry as well.
-      Remove(WrapNotNull(surface), aAutoLock);
+      Remove(WrapNotNull(surface), /* aStopTracking */ true, aAutoLock);
       return LookupResult(MatchType::NOT_FOUND);
     }
 
-    if (aMarkUsed) {
-      MarkUsed(WrapNotNull(surface), WrapNotNull(cache), aAutoLock);
+    if (aMarkUsed &&
+        !MarkUsed(WrapNotNull(surface), WrapNotNull(cache), aAutoLock)) {
+      Remove(WrapNotNull(surface), /* aStopTracking */ false, aAutoLock);
+      return LookupResult(MatchType::NOT_FOUND);
     }
 
     MOZ_ASSERT(surface->GetSurfaceKey() == aSurfaceKey,
                "Lookup() not returning an exact match?");
     return LookupResult(Move(drawableSurface), MatchType::EXACT);
   }
 
   LookupResult LookupBestMatch(const ImageKey         aImageKey,
@@ -917,30 +938,32 @@ public:
 
       drawableSurface = surface->GetDrawableSurface();
       if (drawableSurface) {
         break;
       }
 
       // The surface was released by the operating system. Remove the cache
       // entry as well.
-      Remove(WrapNotNull(surface), aAutoLock);
+      Remove(WrapNotNull(surface), /* aStopTracking */ true, aAutoLock);
     }
 
     MOZ_ASSERT_IF(matchType == MatchType::EXACT,
                   surface->GetSurfaceKey() == aSurfaceKey);
     MOZ_ASSERT_IF(matchType == MatchType::SUBSTITUTE_BECAUSE_NOT_FOUND ||
                   matchType == MatchType::SUBSTITUTE_BECAUSE_PENDING,
       surface->GetSurfaceKey().SVGContext() == aSurfaceKey.SVGContext() &&
       surface->GetSurfaceKey().Playback() == aSurfaceKey.Playback() &&
       surface->GetSurfaceKey().Flags() == aSurfaceKey.Flags());
 
     if (matchType == MatchType::EXACT ||
         matchType == MatchType::SUBSTITUTE_BECAUSE_BEST) {
-      MarkUsed(WrapNotNull(surface), WrapNotNull(cache), aAutoLock);
+      if (!MarkUsed(WrapNotNull(surface), WrapNotNull(cache), aAutoLock)) {
+        Remove(WrapNotNull(surface), /* aStopTracking */ false, aAutoLock);
+      }
     }
 
     // When the caller may choose to decode at an uncached size because there is
     // no pending decode at the requested size, we should give it the alternative
     // size it should decode at.
     if (matchType == MatchType::SUBSTITUTE_BECAUSE_NOT_FOUND ||
         matchType == MatchType::NOT_FOUND) {
       return LookupResult(Move(drawableSurface), matchType, suggestedSize);
@@ -1023,17 +1046,18 @@ public:
     }
 
     // Discard all of the cached surfaces for this image.
     // XXX(seth): This is O(n^2) since for each item in the cache we are
     // removing an element from the costs array. Since n is expected to be
     // small, performance should be good, but if usage patterns change we should
     // change the data structure used for mCosts.
     for (auto iter = cache->ConstIter(); !iter.Done(); iter.Next()) {
-      StopTracking(WrapNotNull(iter.UserData()), aAutoLock);
+      StopTracking(WrapNotNull(iter.UserData()),
+                   /* aIsTracked */ true, aAutoLock);
     }
 
     // The per-image cache isn't needed anymore, so remove it as well.
     // This implicitly unlocks the image if it was locked.
     mImageCaches.Remove(aImageKey);
 
     // Since we did not actually remove any of the surfaces from the cache
     // itself, only stopped tracking them, we should free it outside the lock.
@@ -1043,27 +1067,27 @@ public:
   void PruneImage(const ImageKey aImageKey, const StaticMutexAutoLock& aAutoLock)
   {
     RefPtr<ImageSurfaceCache> cache = GetImageCache(aImageKey);
     if (!cache) {
       return;  // No cached surfaces for this image, so nothing to do.
     }
 
     cache->Prune([this, &aAutoLock](NotNull<CachedSurface*> aSurface) -> void {
-      StopTracking(aSurface, aAutoLock);
+      StopTracking(aSurface, /* aIsTracked */ true, aAutoLock);
     });
   }
 
   void DiscardAll(const StaticMutexAutoLock& aAutoLock)
   {
     // Remove in order of cost because mCosts is an array and the other data
     // structures are all hash tables. Note that locked surfaces are not
     // removed, since they aren't present in mCosts.
     while (!mCosts.IsEmpty()) {
-      Remove(mCosts.LastElement().Surface(), aAutoLock);
+      Remove(mCosts.LastElement().Surface(), /* aStopTracking */ true, aAutoLock);
     }
   }
 
   void DiscardForMemoryPressure(const StaticMutexAutoLock& aAutoLock)
   {
     // Compute our discardable cost. Since locked surfaces aren't discardable,
     // we exclude them.
     const Cost discardableCost = (mMaxCost - mAvailableCost) - mLockedCost;
@@ -1079,17 +1103,17 @@ public:
       MOZ_ASSERT_UNREACHABLE("Target cost is more than we can discard");
       DiscardAll(aAutoLock);
       return;
     }
 
     // Discard surfaces until we've reduced our cost to our target cost.
     while (mAvailableCost < targetCost) {
       MOZ_ASSERT(!mCosts.IsEmpty(), "Removed everything and still not done");
-      Remove(mCosts.LastElement().Surface(), aAutoLock);
+      Remove(mCosts.LastElement().Surface(), /* aStopTracking */ true, aAutoLock);
     }
   }
 
   void TakeDiscard(nsTArray<RefPtr<CachedSurface>>& aDiscard,
                    const StaticMutexAutoLock& aAutoLock)
   {
     MOZ_ASSERT(aDiscard.IsEmpty());
     aDiscard = Move(mCachedSurfacesDiscard);
@@ -1097,21 +1121,22 @@ public:
 
   void LockSurface(NotNull<CachedSurface*> aSurface,
                    const StaticMutexAutoLock& aAutoLock)
   {
     if (aSurface->IsPlaceholder() || aSurface->IsLocked()) {
       return;
     }
 
-    StopTracking(aSurface, aAutoLock);
+    StopTracking(aSurface, /* aIsTracked */ true, aAutoLock);
 
     // Lock the surface. This can fail.
     aSurface->SetLocked(true);
-    StartTracking(aSurface, aAutoLock);
+    DebugOnly<bool> tracking = StartTracking(aSurface, aAutoLock);
+    MOZ_ASSERT(tracking);
   }
 
   NS_IMETHOD
   CollectReports(nsIHandleReportCallback* aHandleReport,
                  nsISupports*             aData,
                  bool                     aAnonymize) override
   {
     StaticMutexAutoLock lock(sInstanceMutex);
@@ -1164,42 +1189,60 @@ private:
   // publicly because we permit multithreaded access to the surface cache, which
   // means that the result would be meaningless: another thread could insert a
   // surface or lock an image at any time.
   bool CanHoldAfterDiscarding(const Cost aCost) const
   {
     return aCost <= mMaxCost - mLockedCost;
   }
 
-  void MarkUsed(NotNull<CachedSurface*> aSurface,
+  bool MarkUsed(NotNull<CachedSurface*> aSurface,
                 NotNull<ImageSurfaceCache*> aCache,
                 const StaticMutexAutoLock& aAutoLock)
   {
     if (aCache->IsLocked()) {
       LockSurface(aSurface, aAutoLock);
-    } else {
-      mExpirationTracker.MarkUsedLocked(aSurface, aAutoLock);
+      return true;
     }
+
+    nsresult rv = mExpirationTracker.MarkUsedLocked(aSurface, aAutoLock);
+    if (NS_WARN_IF(NS_FAILED(rv))) {
+      // If mark used fails, it is because it failed to reinsert the surface
+      // after removing it from the tracker. Thus we need to update our
+      // own accounting but otherwise expect it to be untracked.
+      StopTracking(aSurface, /* aIsTracked */ false, aAutoLock);
+      return false;
+    }
+    return true;
   }
 
   void DoUnlockSurfaces(NotNull<ImageSurfaceCache*> aCache, bool aStaticOnly,
                         const StaticMutexAutoLock& aAutoLock)
   {
+    AutoTArray<NotNull<CachedSurface*>, 8> discard;
+
     // Unlock all the surfaces the per-image cache is holding.
     for (auto iter = aCache->ConstIter(); !iter.Done(); iter.Next()) {
       NotNull<CachedSurface*> surface = WrapNotNull(iter.UserData());
       if (surface->IsPlaceholder() || !surface->IsLocked()) {
         continue;
       }
       if (aStaticOnly && surface->GetSurfaceKey().Playback() != PlaybackType::eStatic) {
         continue;
       }
-      StopTracking(surface, aAutoLock);
+      StopTracking(surface, /* aIsTracked */ true, aAutoLock);
       surface->SetLocked(false);
-      StartTracking(surface, aAutoLock);
+      if (MOZ_UNLIKELY(!StartTracking(surface, aAutoLock))) {
+        discard.AppendElement(surface);
+      }
+    }
+
+    // Discard any that we failed to track.
+    for (auto iter = discard.begin(); iter != discard.end(); ++iter) {
+      Remove(*iter, /* aStopTracking */ false, aAutoLock);
     }
   }
 
   void RemoveEntry(const ImageKey    aImageKey,
                    const SurfaceKey& aSurfaceKey,
                    const StaticMutexAutoLock& aAutoLock)
   {
     RefPtr<ImageSurfaceCache> cache = GetImageCache(aImageKey);
@@ -1208,17 +1251,17 @@ private:
     }
 
     RefPtr<CachedSurface> surface =
       cache->Lookup(aSurfaceKey, /* aForAccess = */ false);
     if (!surface) {
       return;  // Lookup in the per-image cache missed.
     }
 
-    Remove(WrapNotNull(surface), aAutoLock);
+    Remove(WrapNotNull(surface), /* aStopTracking */ true, aAutoLock);
   }
 
   class SurfaceTracker final :
     public ExpirationTrackerImpl<CachedSurface, 2,
                                  StaticMutex,
                                  StaticMutexAutoLock>
   {
   public:
@@ -1228,17 +1271,17 @@ private:
           aSurfaceCacheExpirationTimeMS, "SurfaceTracker",
           SystemGroup::EventTargetFor(TaskCategory::Other))
     { }
 
   protected:
     void NotifyExpiredLocked(CachedSurface* aSurface,
                              const StaticMutexAutoLock& aAutoLock) override
     {
-      sInstance->Remove(WrapNotNull(aSurface), aAutoLock);
+      sInstance->Remove(WrapNotNull(aSurface), /* aStopTracking */ true, aAutoLock);
     }
 
     void NotifyHandlerEndLocked(const StaticMutexAutoLock& aAutoLock) override
     {
       sInstance->TakeDiscard(mDiscard, aAutoLock);
     }
 
     void NotifyHandlerEnd() override