Bug 784859 - Part 2: Avoid locking to store the computed result in the global variable in CalibratedPerformanceCounter; r=bbondy
authorEhsan Akhgari <ehsan@mozilla.com>
Thu, 06 Sep 2012 11:01:06 -0400
changeset 104649 2a8acb221714
parent 104648 d291e2674dc6
child 104650 9b7184f6e154
push id23436
push userryanvm@gmail.com
push date2012-09-09 01:10 +0000
treeherdermozilla-central@f31d1aa89848 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbbondy
bugs784859
milestone18.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 784859 - Part 2: Avoid locking to store the computed result in the global variable in CalibratedPerformanceCounter; r=bbondy
xpcom/ds/TimeStamp_windows.cpp
--- a/xpcom/ds/TimeStamp_windows.cpp
+++ b/xpcom/ds/TimeStamp_windows.cpp
@@ -24,22 +24,37 @@
 #include "prlog.h"
 #include <stdio.h>
 
 #include <intrin.h>
 
 static bool
 HasStableTSC()
 {
+  union {
+    int regs[4];
+    struct {
+      int nIds;
+      char cpuString[12];
+    };
+  } cpuInfo;
+
+  __cpuid(cpuInfo.regs, 0);
+  // Only allow Intel CPUs for now
+  // The order of the registers is reg[1], reg[3], reg[2].  We just adjust the
+  // string so that we can compare in one go.
+  if (_strnicmp(cpuInfo.cpuString, "GenuntelineI", sizeof(cpuInfo.cpuString)))
+    return false;
+
   int regs[4];
 
   // detect if the Advanced Power Management feature is supported
   __cpuid(regs, 0x80000000);
   if (regs[0] < 0x80000007)
-          return false;
+    return false;
 
   __cpuid(regs, 0x80000007);
   // if bit 8 is set than TSC will run at a constant rate
   // in all ACPI P-state, C-states and T-states
   return regs[3] & (1 << 8);
 }
 
 
@@ -196,16 +211,28 @@ static bool sForceRecalibrate = false;
 
 namespace mozilla {
 
 
 static ULONGLONG
 CalibratedPerformanceCounter();
 
 
+static inline ULONGLONG
+InterlockedRead64(volatile ULONGLONG* destination)
+{
+#ifdef _WIN64
+  // Aligned 64-bit reads on x86-64 are atomic
+  return *destination;
+#else
+  // Dirty hack since Windows doesn't provide an atomic 64-bit read function
+  return _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*> (destination), 0, 0);
+#endif
+}
+
 // ----------------------------------------------------------------------------
 // Critical Section helper class
 // ----------------------------------------------------------------------------
 
 class AutoCriticalSection
 {
 public:
   AutoCriticalSection(LPCRITICAL_SECTION section)
@@ -489,62 +516,92 @@ CheckCalibration(LONGLONG overflow, ULON
       mt2ms_d(sSkew), sForceRecalibrate));
 
     sForceRecalibrate = false;
   }
 
   return true;
 }
 
+// AtomicStoreIfGreaterThan tries to store the maximum of two values in one of them
+// without locking.  The only scenario in which two racing threads may corrupt the
+// maximum value is when they both try to increase the value without knowing about
+// each other, like below:
+//
+// Thread 1 reads 1000.  newValue in thread 1 is 1005.
+// Thread 2 reads 1000.  newValue in thread 2 is 1001.
+// Thread 1 tries to store.  Its value is less than newValue, so the store happens.
+//                           *destination is now 1005.
+// Thread 2 tries to store.  Its value is less than newValue, so the store happens.
+//                           *destination is now 1001.
+//
+// The correct value to be stored if this was happening serially is 1005.  The
+// following algorithm achieves that.
+//
+// The return value is the maximum value.
+ULONGLONG
+AtomicStoreIfGreaterThan(ULONGLONG* destination, ULONGLONG newValue)
+{
+  ULONGLONG readValue;
+  do {
+    readValue = InterlockedRead64(destination);
+    if (readValue > newValue)
+      return readValue;
+  } while (readValue != _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*> (destination),
+                                                      newValue, readValue));
+
+  return newValue;
+}
+
 // The main function.  Result is in [mt] ensuring to not go back and be mostly
 // reliable with highest possible resolution.
 static ULONGLONG
 CalibratedPerformanceCounter()
 {
   // XXX This is using ObserverService, cannot instantiate in the static
   // startup, really needs a better initation code here.
   StandbyObserver::Ensure();
 
   // Don't hold the lock over call to QueryPerformanceCounter, since it is
   // the largest bottleneck, let threads read the value concurently to have
   // possibly a better performance.
 
   ULONGLONG qpc = PerformanceCounter();
   DWORD gtcw = GetTickCount();
 
+  ULONGLONG result;
+  {
   AutoCriticalSection lock(&sTimeStampLock);
 
   // Rollover protection
   ULONGLONG gtc = TickCount64(gtcw);
 
   LONGLONG diff = qpc - ms2mt(gtc) - sSkew;
   LONGLONG overflow = 0;
 
   if (diff < sUnderrunThreshold) {
     overflow = sUnderrunThreshold - diff;
   }
   else if (diff > sOverrunThreshold) {
     overflow = diff - sOverrunThreshold;
   }
 
-  ULONGLONG result = qpc;
+  result = qpc;
   if (!CheckCalibration(overflow, qpc, gtc)) {
     // We are back on GTC, QPC has been observed unreliable
     result = ms2mt(gtc) + sSkew;
   }
 
 #if 0
   LOG(("TimeStamp: result = %1.2fms, diff = %1.4fms",
       mt2ms_d(result), mt2ms_d(diff)));
 #endif
+  }
 
-  if (result > sLastResult)
-    sLastResult = result;
-
-  return sLastResult;
+  return AtomicStoreIfGreaterThan(&sLastResult, result);
 }
 
 // ----------------------------------------------------------------------------
 // TimeDuration and TimeStamp implementation
 // ----------------------------------------------------------------------------
 
 double
 TimeDuration::ToSeconds() const