Bug 1464509: Dump profiler counters to the ProfileBuffer and JSON r=mstange
authorRandell Jesup <rjesup@jesup.org>
Tue, 09 Oct 2018 22:28:26 -0400
changeset 498903 eed77f6052d967a82e8097b12b3928cd7c646970
parent 498902 1ea62adf090265c003ed0385e97e59229f0a85f4
child 498904 b69c28ab39fc3b8f36d72143c731e10c6bfa1023
push id1864
push userffxbld-merge
push dateMon, 03 Dec 2018 15:51:40 +0000
treeherdermozilla-release@f040763d99ad [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmstange
bugs1464509
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1464509: Dump profiler counters to the ProfileBuffer and JSON r=mstange Don't allow DuplicateLastSample to duplicate counters or memory use values
tools/profiler/core/ProfileBuffer.h
tools/profiler/core/ProfileBufferEntry.cpp
tools/profiler/core/ProfileBufferEntry.h
tools/profiler/core/platform.cpp
tools/profiler/public/ProfilerCounts.h
--- a/tools/profiler/core/ProfileBuffer.h
+++ b/tools/profiler/core/ProfileBuffer.h
@@ -72,16 +72,22 @@ public:
                            UniqueStacks& aUniqueStacks) const;
 
   void StreamMarkersToJSON(SpliceableJSONWriter& aWriter, int aThreadId,
                            const mozilla::TimeStamp& aProcessStartTime,
                            double aSinceTime,
                            UniqueStacks& aUniqueStacks) const;
   void StreamPausedRangesToJSON(SpliceableJSONWriter& aWriter,
                                 double aSinceTime) const;
+  void StreamCountersToJSON(SpliceableJSONWriter& aWriter,
+                            const mozilla::TimeStamp& aProcessStartTime,
+                            double aSinceTime) const;
+  void StreamMemoryToJSON(SpliceableJSONWriter& aWriter,
+                          const mozilla::TimeStamp& aProcessStartTime,
+                          double aSinceTime) const;
 
   // Find (via |aLastSample|) the most recent sample for the thread denoted by
   // |aThreadId| and clone it, patching in the current time as appropriate.
   // Mutate |aLastSample| to point to the newly inserted sample.
   // Returns whether duplication was successful.
   bool DuplicateLastSample(int aThreadId,
                            const mozilla::TimeStamp& aProcessStartTime,
                            mozilla::Maybe<uint64_t>& aLastSample);
--- a/tools/profiler/core/ProfileBufferEntry.cpp
+++ b/tools/profiler/core/ProfileBufferEntry.cpp
@@ -63,16 +63,28 @@ ProfileBufferEntry::ProfileBufferEntry(K
 }
 
 ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int aInt)
   : mKind(aKind)
 {
   u.mInt = aInt;
 }
 
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int64_t aInt64)
+  : mKind(aKind)
+{
+  u.mInt64 = aInt64;
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, uint64_t aUint64)
+  : mKind(aKind)
+{
+  u.mUint64 = aUint64;
+}
+
 // END ProfileBufferEntry
 ////////////////////////////////////////////////////////////////////////
 
 class JSONSchemaWriter
 {
   JSONWriter& mWriter;
   uint32_t mIndex;
 
@@ -174,58 +186,68 @@ MakeForEachTrackedOptimizationTypeInfoLa
 //     ... etc ...
 //
 //     The elements need to be added in-order.
 class MOZ_RAII AutoArraySchemaWriter
 {
 public:
   AutoArraySchemaWriter(SpliceableJSONWriter& aWriter, UniqueJSONStrings& aStrings)
     : mJSONWriter(aWriter)
-    , mStrings(aStrings)
+    , mStrings(&aStrings)
+    , mNextFreeIndex(0)
+  {
+    mJSONWriter.StartArrayElement(SpliceableJSONWriter::SingleLineStyle);
+  }
+
+  explicit AutoArraySchemaWriter(SpliceableJSONWriter& aWriter)
+    : mJSONWriter(aWriter)
+    , mStrings(nullptr)
     , mNextFreeIndex(0)
   {
     mJSONWriter.StartArrayElement(SpliceableJSONWriter::SingleLineStyle);
   }
 
   ~AutoArraySchemaWriter() {
     mJSONWriter.EndArray();
   }
 
-  void IntElement(uint32_t aIndex, uint32_t aValue) {
+  void IntElement(uint64_t aIndex, uint64_t aValue) {
     FillUpTo(aIndex);
     mJSONWriter.IntElement(aValue);
   }
 
   void DoubleElement(uint32_t aIndex, double aValue) {
     FillUpTo(aIndex);
     mJSONWriter.DoubleElement(aValue);
   }
 
   void StringElement(uint32_t aIndex, const char* aValue) {
+    MOZ_RELEASE_ASSERT(mStrings);
     FillUpTo(aIndex);
-    mStrings.WriteElement(mJSONWriter, aValue);
+    mStrings->WriteElement(mJSONWriter, aValue);
   }
 
   // Write an element using a callback that takes a JSONWriter& and a
   // UniqueJSONStrings&.
   template<typename LambdaT>
   void FreeFormElement(uint32_t aIndex, LambdaT aCallback) {
+    MOZ_RELEASE_ASSERT(mStrings);
     FillUpTo(aIndex);
-    aCallback(mJSONWriter, mStrings);
+    aCallback(mJSONWriter, *mStrings);
   }
 
 private:
   void FillUpTo(uint32_t aIndex) {
     MOZ_ASSERT(aIndex >= mNextFreeIndex);
     mJSONWriter.NullElements(aIndex - mNextFreeIndex);
     mNextFreeIndex = aIndex + 1;
   }
 
   SpliceableJSONWriter& mJSONWriter;
-  UniqueJSONStrings& mStrings;
+  UniqueJSONStrings* mStrings;
   uint32_t mNextFreeIndex;
 };
 
 template<typename LambdaT>
 class ForEachTrackedOptimizationAttemptsLambdaOp
   : public JS::ForEachTrackedOptimizationAttemptOp
 {
 public:
@@ -791,28 +813,38 @@ private:
   const ProfileBuffer& mBuffer;
   uint64_t mReadPos;
 };
 
 // The following grammar shows legal sequences of profile buffer entries.
 // The sequences beginning with a ThreadId entry are known as "samples".
 //
 // (
-//   (
+//   ( /* Samples */
 //     ThreadId
 //     Time
 //     ( NativeLeafAddr
 //     | Label DynamicStringFragment* LineNumber? Category?
 //     | JitReturnAddr
 //     )+
 //     Marker*
 //     Responsiveness?
 //     ResidentMemory?
 //     UnsharedMemory?
 //   )
+//   | ( ResidentMemory UnsharedMemory? Time)  /* Memory */
+//   | ( /* Counters */
+//       CounterId
+//       Time
+//       (
+//         CounterKey
+//         Count
+//         Number?
+//       )*
+//     )
 //   | CollectionStart
 //   | CollectionEnd
 //   | Pause
 //   | Resume
 // )*
 //
 // The most complicated part is the stack entry sequence that begins with
 // Label. Here are some examples.
@@ -895,47 +927,48 @@ private:
 //     DynamicStringFragment("u (https")
 //     DynamicStringFragment("://perf-")
 //     DynamicStringFragment("html.io/")
 //     DynamicStringFragment("ac0da204")
 //     DynamicStringFragment("aaa44d75")
 //     DynamicStringFragment("a800.bun")
 //     DynamicStringFragment("dle.js:2")
 //     DynamicStringFragment("5)")
+
+// Because this is a format entirely internal to the Profiler, any parsing
+// error indicates a bug in the ProfileBuffer writing or the parser itself,
+// or possibly flaky hardware.
+#define ERROR_AND_CONTINUE(msg) \
+  { \
+    fprintf(stderr, "ProfileBuffer parse error: %s", msg); \
+    MOZ_ASSERT(false, msg); \
+    continue; \
+  }
+
 void
 ProfileBuffer::StreamSamplesToJSON(SpliceableJSONWriter& aWriter, int aThreadId,
                                    double aSinceTime,
                                    UniqueStacks& aUniqueStacks) const
 {
   UniquePtr<char[]> strbuf = MakeUnique<char[]>(kMaxFrameKeyLength);
 
-  // Because this is a format entirely internal to the Profiler, any parsing
-  // error indicates a bug in the ProfileBuffer writing or the parser itself,
-  // or possibly flaky hardware.
-  #define ERROR_AND_CONTINUE(msg) \
-    { \
-      fprintf(stderr, "ProfileBuffer parse error: %s", msg); \
-      MOZ_ASSERT(false, msg); \
-      continue; \
-    }
-
   EntryGetter e(*this);
 
   for (;;) {
     // This block skips entries until we find the start of the next sample.
     // This is useful in three situations.
     //
     // - The circular buffer overwrites old entries, so when we start parsing
     //   we might be in the middle of a sample, and we must skip forward to the
     //   start of the next sample.
     //
     // - We skip samples that don't have an appropriate ThreadId or Time.
     //
-    // - We skip range Pause, Resume, CollectionStart, Marker, and CollectionEnd
-    //   entries between samples.
+    // - We skip range Pause, Resume, CollectionStart, Marker, Counter
+    //   and CollectionEnd entries between samples.
     while (e.Has()) {
       if (e.Get().IsThreadId()) {
         break;
       } else {
         e.Next();
       }
     }
 
@@ -1092,18 +1125,16 @@ ProfileBuffer::StreamSamplesToJSON(Splic
 
     if (e.Has() && e.Get().IsUnsharedMemory()) {
       sample.mUSS = Some(e.Get().u.mDouble);
       e.Next();
     }
 
     WriteSample(aWriter, *aUniqueStacks.mUniqueStrings, sample);
   }
-
-  #undef ERROR_AND_CONTINUE
 }
 
 void
 ProfileBuffer::AddJITInfoForRange(uint64_t aRangeStart,
                                   int aThreadId, JSContext* aContext,
                                   JITFrameInfo& aJITFrameInfo) const
 {
   // We can only process JitReturnAddr entries if we have a JSContext.
@@ -1166,16 +1197,249 @@ ProfileBuffer::StreamMarkersToJSON(Splic
           marker->GetThreadId() == aThreadId) {
         marker->StreamJSON(aWriter, aProcessStartTime, aUniqueStacks);
       }
     }
     e.Next();
   }
 }
 
+
+struct CounterKeyedSample
+{
+  double mTime;
+  uint64_t mNumber;
+  int64_t mCount;
+};
+
+typedef nsTArray<CounterKeyedSample> CounterKeyedSamples;
+
+
+typedef nsDataHashtable<nsUint64HashKey, CounterKeyedSamples> CounterMap;
+
+void
+ProfileBuffer::StreamCountersToJSON(SpliceableJSONWriter& aWriter,
+                                    const TimeStamp& aProcessStartTime,
+                                    double aSinceTime) const
+{
+  // Because this is a format entirely internal to the Profiler, any parsing
+  // error indicates a bug in the ProfileBuffer writing or the parser itself,
+  // or possibly flaky hardware.
+
+  EntryGetter e(*this);
+  enum Schema : uint32_t {
+    TIME = 0,
+    NUMBER = 1,
+    COUNT = 2
+  };
+
+  // Stream all counters. We skip other entries, because we process them in
+  // StreamSamplesToJSON()/etc.
+  //
+  // Valid sequence in the buffer:
+  // CounterID
+  // Time
+  // ( CounterKey Count Number? )*
+  //
+  // And the JSON (example):
+  // "counters": {
+  //  "name": "malloc",
+  //  "category": "Memory",
+  //  "description": "Amount of allocated memory",
+  //  "sample_groups": {
+  //   "id": 0,
+  //   "samples": {
+  //    "schema": {"time": 0, "number": 1, "count": 2},
+  //    "data": [
+  //     [
+  //      16117.033968000002,
+  //      2446216,
+  //      6801320
+  //     ],
+  //     [
+  //      16118.037638,
+  //      2446216,
+  //      6801320
+  //     ],
+  //    ],
+  //   }
+  //  }
+  // },
+
+  // Build the map of counters and populate it
+  nsDataHashtable<nsVoidPtrHashKey, CounterMap> counters;
+
+  while (e.Has()) {
+    // skip all non-Counters, including if we start in the middle of a counter
+    if (e.Get().IsCounterId()) {
+      void* id = e.Get().u.mPtr;
+      CounterMap& counter = counters.GetOrInsert(id);
+      e.Next();
+      if (!e.Has() || !e.Get().IsTime()) {
+        ERROR_AND_CONTINUE("expected a Time entry");
+      }
+      double time = e.Get().u.mDouble;
+      if (time >= aSinceTime) {
+        e.Next();
+        while (e.Has() && e.Get().IsCounterKey()) {
+          uint64_t key = e.Get().u.mUint64;
+          CounterKeyedSamples& data = counter.GetOrInsert(key);
+          e.Next();
+          if (!e.Has() || !e.Get().IsCount()) {
+            ERROR_AND_CONTINUE("expected a Count entry");
+          }
+          int64_t count = e.Get().u.mUint64;
+          e.Next();
+          uint64_t number;
+          if (!e.Has() || !e.Get().IsNumber()) {
+            number = 0;
+          } else {
+            number = e.Get().u.mInt64;
+          }
+          CounterKeyedSample sample = {time, number, count};
+          data.AppendElement(sample);
+        }
+      } else {
+        // skip counter sample - only need to skip the initial counter
+        // id, then let the loop at the top skip the rest
+      }
+    }
+    e.Next();
+  }
+  // we have a map of a map of counter entries; dump them to JSON
+  if (counters.Count() == 0) {
+    return;
+  }
+
+  aWriter.StartArrayProperty("counters");
+  for (auto iter = counters.Iter(); !iter.Done(); iter.Next()) {
+    CounterMap& counter = iter.Data();
+    const BaseProfilerCount* base_counter = static_cast<const BaseProfilerCount*>(iter.Key());
+
+    aWriter.Start();
+    aWriter.StringProperty("name", base_counter->mLabel);
+    aWriter.StringProperty("category", base_counter->mCategory);
+    aWriter.StringProperty("description", base_counter->mDescription);
+
+    aWriter.StartObjectProperty("sample_groups");
+    for (auto counter_iter = counter.Iter(); !counter_iter.Done(); counter_iter.Next()) {
+      CounterKeyedSamples& samples = counter_iter.Data();
+      uint64_t key = counter_iter.Key();
+
+      size_t size = samples.Length();
+      if (size == 0) {
+        continue;
+      }
+      aWriter.IntProperty("id", static_cast<int64_t>(key));
+      aWriter.StartObjectProperty("samples");
+      {
+        // XXX Can we assume a missing count means 0?
+        JSONSchemaWriter schema(aWriter);
+        schema.WriteField("time");
+        schema.WriteField("number");
+        schema.WriteField("count");
+      }
+
+      aWriter.StartArrayProperty("data");
+      uint64_t previousNumber = 0;
+      int64_t previousCount = 0;
+      for (size_t i = 0; i < size; i++) {
+        // Encode as deltas, and only encode if different than the last sample
+        if (i == 0 || samples[i].mNumber != previousNumber || samples[i].mCount != previousCount) {
+          MOZ_ASSERT(i == 0 ||
+                     samples[i].mTime >= samples[i - 1].mTime);
+          MOZ_ASSERT(samples[i].mNumber >= previousNumber);
+
+          aWriter.StartArrayElement(SpliceableJSONWriter::SingleLineStyle);
+          aWriter.DoubleElement(samples[i].mTime);
+          aWriter.IntElement(samples[i].mNumber - previousNumber); // uint64_t
+          aWriter.IntElement(samples[i].mCount - previousCount); // int64_t
+          aWriter.EndArray();
+          previousNumber = samples[i].mNumber;
+          previousCount = samples[i].mCount;
+        }
+      }
+      aWriter.EndArray(); // data
+      aWriter.EndObject(); // samples
+    }
+    aWriter.EndObject(); // sample groups
+    aWriter.End(); // for each counter
+  }
+  aWriter.EndArray(); // counters
+}
+
+void
+ProfileBuffer::StreamMemoryToJSON(SpliceableJSONWriter& aWriter,
+                                  const TimeStamp& aProcessStartTime,
+                                  double aSinceTime) const
+{
+  enum Schema : uint32_t {
+    TIME = 0,
+    RSS = 1,
+    USS = 2
+  };
+
+  EntryGetter e(*this);
+
+  aWriter.StartObjectProperty("memory");
+  // Stream all memory (rss/uss) data. We skip other entries, because we
+  // process them in StreamSamplesToJSON()/etc.
+  aWriter.IntProperty("initial_heap", 0); // XXX FIX
+  aWriter.StartObjectProperty("samples");
+  {
+    JSONSchemaWriter schema(aWriter);
+    schema.WriteField("time");
+    schema.WriteField("rss");
+    schema.WriteField("uss");
+  }
+
+  aWriter.StartArrayProperty("data");
+  int64_t previous_rss = 0;
+  int64_t previous_uss = 0;
+  while (e.Has()) {
+    // valid sequence: Resident, Unshared?, Time
+    if (e.Get().IsResidentMemory()) {
+      int64_t rss = e.Get().u.mInt64;
+      int64_t uss = 0;
+      e.Next();
+      if (e.Has()) {
+        if (e.Get().IsUnsharedMemory()) {
+          uss = e.Get().u.mDouble;
+          e.Next();
+          if (!e.Has()) {
+            break;
+          }
+        }
+        if (e.Get().IsTime()) {
+          double time = e.Get().u.mDouble;
+          if (time >= aSinceTime &&
+              (previous_rss != rss || previous_uss != uss)) {
+            aWriter.StartArrayElement(SpliceableJSONWriter::SingleLineStyle);
+            aWriter.DoubleElement(time);
+            aWriter.IntElement(rss); // int64_t
+            if (uss != 0) {
+              aWriter.IntElement(uss); // int64_t
+            }
+            aWriter.EndArray();
+            previous_rss = rss;
+            previous_uss = uss;
+          }
+        } else {
+          ERROR_AND_CONTINUE("expected a Time entry");
+        }
+      }
+    }
+    e.Next();
+  }
+  aWriter.EndArray(); // data
+  aWriter.EndObject(); // samples
+  aWriter.EndObject(); // memory
+}
+#undef ERROR_AND_CONTINUE
+
 static void
 AddPausedRange(SpliceableJSONWriter& aWriter, const char* aReason,
                const Maybe<double>& aStartTime, const Maybe<double>& aEndTime)
 {
   aWriter.Start();
   if (aStartTime) {
     aWriter.DoubleProperty("startTime", *aStartTime);
   } else {
@@ -1261,25 +1525,44 @@ ProfileBuffer::DuplicateLastSample(int a
         // We're done.
         return true;
       case ProfileBufferEntry::Kind::Time:
         // Copy with new time
         AddEntry(ProfileBufferEntry::Time(
           (TimeStamp::Now() - aProcessStartTime).ToMilliseconds()));
         break;
       case ProfileBufferEntry::Kind::Marker:
-        // Don't copy markers
+      case ProfileBufferEntry::Kind::ResidentMemory:
+      case ProfileBufferEntry::Kind::UnsharedMemory:
+      case ProfileBufferEntry::Kind::CounterKey:
+      case ProfileBufferEntry::Kind::Number:
+      case ProfileBufferEntry::Kind::Count:
+      case ProfileBufferEntry::Kind::Responsiveness:
+        // Don't copy anything not part of a thread's stack sample
+        break;
+      case ProfileBufferEntry::Kind::CounterId:
+        // CounterId is normally followed by Time - if so, we'd like
+        // to skip it.  If we duplicate Time, it won't hurt anything, just
+        // waste buffer space (and this can happen if the CounterId has
+        // fallen off the end of the buffer, but Time (and Number/Count)
+        // are still in the buffer).
+        e.Next();
+        if (e.Has() && e.Get().GetKind() != ProfileBufferEntry::Kind::Time) {
+          // this would only happen if there was an invalid sequence
+          // in the buffer.  Don't skip it.
+          continue;
+        }
+        // we've skipped Time
         break;
       default: {
         // Copy anything else we don't know about.
         ProfileBufferEntry entry = e.Get();
         AddEntry(entry);
         break;
       }
     }
     e.Next();
   }
   return true;
 }
 
 // END ProfileBuffer
 ////////////////////////////////////////////////////////////////////////
-
--- a/tools/profiler/core/ProfileBufferEntry.h
+++ b/tools/profiler/core/ProfileBufferEntry.h
@@ -25,34 +25,41 @@
 #include "mozilla/HashFunctions.h"
 #include "mozilla/UniquePtr.h"
 #include "nsClassHashtable.h"
 #include "mozilla/Variant.h"
 #include "nsTArray.h"
 
 class ProfilerMarker;
 
+// NOTE!  If you add entries, you need to verify if they need to be added to the
+// switch statement in DuplicateLastSample!
 #define FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(macro) \
   macro(Category,              int) \
   macro(CollectionStart,       double) \
   macro(CollectionEnd,         double) \
   macro(Label,                 const char*) \
   macro(DynamicStringFragment, char*) /* char[kNumChars], really */ \
   macro(JitReturnAddr,         void*) \
   macro(LineNumber,            int) \
   macro(ColumnNumber,          int) \
   macro(NativeLeafAddr,        void*) \
   macro(Marker,                ProfilerMarker*) \
   macro(Pause,                 double) \
-  macro(ResidentMemory,        double) \
   macro(Responsiveness,        double) \
   macro(Resume,                double) \
   macro(ThreadId,              int) \
   macro(Time,                  double) \
-  macro(UnsharedMemory,        double)
+  macro(ResidentMemory,        uint64_t) \
+  macro(UnsharedMemory,        uint64_t) \
+  macro(CounterId,             void*) \
+  macro(CounterKey,            uint64_t) \
+  macro(Number,                uint64_t) \
+  macro(Count,                 int64_t)
+
 
 // NB: Packing this structure has been shown to cause SIGBUS issues on ARM.
 #if !defined(GP_ARCH_arm)
 #pragma pack(push, 1)
 #endif
 
 class ProfileBufferEntry
 {
@@ -73,16 +80,18 @@ public:
 
 private:
   // aString must be a static string.
   ProfileBufferEntry(Kind aKind, const char *aString);
   ProfileBufferEntry(Kind aKind, char aChars[kNumChars]);
   ProfileBufferEntry(Kind aKind, void *aPtr);
   ProfileBufferEntry(Kind aKind, ProfilerMarker *aMarker);
   ProfileBufferEntry(Kind aKind, double aDouble);
+  ProfileBufferEntry(Kind aKind, int64_t aInt64);
+  ProfileBufferEntry(Kind aKind, uint64_t aUint64);
   ProfileBufferEntry(Kind aKind, int aInt);
 
 public:
   #define CTOR(k, t) \
     static ProfileBufferEntry k(t aVal) { \
       return ProfileBufferEntry(Kind::k, aVal); \
     }
   FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(CTOR)
@@ -105,16 +114,18 @@ private:
   Kind mKind;
   union {
     const char*     mString;
     char            mChars[kNumChars];
     void*           mPtr;
     ProfilerMarker* mMarker;
     double          mDouble;
     int             mInt;
+    int64_t         mInt64;
+    uint64_t        mUint64;
   } u;
 };
 
 #if !defined(GP_ARCH_arm)
 // Packed layout: 1 byte for the tag + 8 bytes for the value.
 static_assert(sizeof(ProfileBufferEntry) == 9, "bad ProfileBufferEntry size");
 #pragma pack(pop)
 #endif
@@ -349,18 +360,16 @@ private:
 //   "tid": 42,
 //   "samples":
 //   {
 //     "schema":
 //     {
 //       "stack": 0,           /* index into stackTable */
 //       "time": 1,            /* number */
 //       "responsiveness": 2,  /* number */
-//       "rss": 3,             /* number */
-//       "uss": 4              /* number */
 //     },
 //     "data":
 //     [
 //       [ 1, 0.0, 0.0 ]       /* { stack: 1, time: 0.0, responsiveness: 0.0 } */
 //     ]
 //   },
 //
 //   "markers":
@@ -413,10 +422,66 @@ private:
 //   [
 //     "(root)",
 //     "foo.js",
 //     "baseline",
 //     "example marker"
 //   ]
 // }
 //
-
+// Process:
+// {
+//   "name": "Bar",
+//   "pid": 24,
+//   "threads":
+//   [
+//     <0-N threads from above>
+//   ],
+//   "counters": /* includes the memory counter */
+//   [
+//     {
+//       "name": "qwerty",
+//       "category": "uiop",
+//       "description": "this is qwerty uiop",
+//       "sample_groups:
+//       [
+//         {
+//           "id": 42,    /* number (thread id, or object identifier (tab), etc) */
+//           "samples:
+//           {
+//             "schema":
+//             {
+//               "time": 1,            /* number */
+//               "number": 2,          /* number (of times the counter was touched) */
+//               "count": 3            /* number (total for the counter) */
+//             },
+//             "data":
+//             [
+//               [ 0.1, 1824, 454622 ] /* { time: 0.1, number: 1824, count: 454622 } */
+//             ]
+//           },
+//         },
+//         /* more sample-group objects with different id's */
+//       ]
+//     },
+//     /* more counters */
+//   ],
+//   "memory":
+//   {
+//     "initial_heap": 12345678,
+//     "samples:
+//     {
+//       "schema":
+//       {
+//         "time": 1,            /* number */
+//         "rss": 2,             /* number */
+//         "uss": 3              /* number */
+//       },
+//       "data":
+//       [
+//         /* { time: 0.1, rss: 12345678, uss: 87654321} */
+//         [ 0.1, 12345678, 87654321 ]
+//       ]
+//     },
+//   },
+// }
+//
 #endif /* ndef ProfileBufferEntry_h */
--- a/tools/profiler/core/platform.cpp
+++ b/tools/profiler/core/platform.cpp
@@ -1476,18 +1476,17 @@ DoSyncSample(PSLockRef aLock, Registered
   DoSharedSample(aLock, /* aIsSynchronous = */ true, aRegisteredThread, aNow,
                  aRegs, /* aLastSample = */ nullptr, aBuffer);
 }
 
 // Writes the components of a periodic sample to ActivePS's ProfileBuffer.
 static void
 DoPeriodicSample(PSLockRef aLock, RegisteredThread& aRegisteredThread,
                  ProfiledThreadData& aProfiledThreadData,
-                 const TimeStamp& aNow, const Registers& aRegs,
-                 int64_t aRSSMemory, int64_t aUSSMemory)
+                 const TimeStamp& aNow, const Registers& aRegs)
 {
   // WARNING: this function runs within the profiler's "critical section".
 
   ProfileBuffer& buffer = ActivePS::Buffer(aLock);
 
   DoSharedSample(aLock, /* aIsSynchronous = */ false, aRegisteredThread, aNow,
                  aRegs, &aProfiledThreadData.LastSample(), buffer);
 
@@ -1500,26 +1499,16 @@ DoPeriodicSample(PSLockRef aLock, Regist
   }
 
   ThreadResponsiveness* resp = aProfiledThreadData.GetThreadResponsiveness();
   if (resp && resp->HasData()) {
     double delta = resp->GetUnresponsiveDuration(
       (aNow - CorePS::ProcessStartTime()).ToMilliseconds());
     buffer.AddEntry(ProfileBufferEntry::Responsiveness(delta));
   }
-
-  if (aRSSMemory != 0) {
-    double rssMemory = static_cast<double>(aRSSMemory);
-    buffer.AddEntry(ProfileBufferEntry::ResidentMemory(rssMemory));
-  }
-
-  if (aUSSMemory != 0) {
-    double ussMemory = static_cast<double>(aUSSMemory);
-    buffer.AddEntry(ProfileBufferEntry::UnsharedMemory(ussMemory));
-  }
 }
 
 // END sampling/unwinding code
 ////////////////////////////////////////////////////////////////////////
 
 ////////////////////////////////////////////////////////////////////////
 // BEGIN saving/streaming code
 
@@ -1880,16 +1869,19 @@ locked_profiler_stream_json_for_this_pro
 
   // Put meta data
   aWriter.StartObjectProperty("meta");
   {
     StreamMetaJSCustomObject(aLock, aWriter, aIsShuttingDown);
   }
   aWriter.EndObject();
 
+  buffer.StreamCountersToJSON(aWriter, CorePS::ProcessStartTime(), aSinceTime);
+  buffer.StreamMemoryToJSON(aWriter, CorePS::ProcessStartTime(), aSinceTime);
+
   // Data of TaskTracer doesn't belong in the circular buffer.
   if (ActivePS::FeatureTaskTracer(aLock)) {
     aWriter.StartObjectProperty("tasktracer");
     StreamTaskTracer(aLock, aWriter);
     aWriter.EndObject();
   }
 
   // Lists the samples for each thread profile
@@ -2180,23 +2172,55 @@ SamplerThread::Run()
       }
 
       ActivePS::Buffer(lock).DeleteExpiredStoredMarkers();
 
       if (!ActivePS::IsPaused(lock)) {
         const nsTArray<LiveProfiledThreadData>& liveThreads =
           ActivePS::LiveProfiledThreads(lock);
 
+        TimeDuration delta = sampleStart - CorePS::ProcessStartTime();
+        ProfileBuffer& buffer = ActivePS::Buffer(lock);
+
+        // Report memory use
         int64_t rssMemory = 0;
         int64_t ussMemory = 0;
-        if (!liveThreads.IsEmpty() && ActivePS::FeatureMemory(lock)) {
+        if (ActivePS::FeatureMemory(lock)) {
           rssMemory = nsMemoryReporterManager::ResidentFast();
 #if defined(GP_OS_linux) || defined(GP_OS_android)
           ussMemory = nsMemoryReporterManager::ResidentUnique();
 #endif
+          if (rssMemory != 0) {
+            buffer.AddEntry(ProfileBufferEntry::ResidentMemory(rssMemory));
+            if (ussMemory != 0) {
+              buffer.AddEntry(ProfileBufferEntry::UnsharedMemory(ussMemory));
+            }
+          }
+          buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
+        }
+
+        // handle per-process generic counters
+        const nsTArray<BaseProfilerCount*>& counters =
+          CorePS::Counters(lock);
+        TimeStamp now = TimeStamp::Now();
+        for (auto& counter : counters) {
+          // create Buffer entries for each counter
+          buffer.AddEntry(ProfileBufferEntry::CounterId(counter));
+          buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
+          // XXX support keyed maps of counts
+          // In the future, we'll support keyed counters - for example, counters with a key
+          // which is a thread ID. For "simple" counters we'll just use a key of 0.
+          int64_t count;
+          uint64_t number;
+          counter->Sample(count, number);
+          buffer.AddEntry(ProfileBufferEntry::CounterKey(0));
+          buffer.AddEntry(ProfileBufferEntry::Count(count));
+          if (number) {
+            buffer.AddEntry(ProfileBufferEntry::Number(number));
+          }
         }
 
         for (auto& thread : liveThreads) {
           RegisteredThread* registeredThread = thread.mRegisteredThread;
           ProfiledThreadData* profiledThreadData =
             thread.mProfiledThreadData.get();
           RefPtr<ThreadInfo> info = registeredThread->Info();
 
@@ -2213,21 +2237,25 @@ SamplerThread::Run()
             }
           }
 
           ThreadResponsiveness* resp = profiledThreadData->GetThreadResponsiveness();
           if (resp) {
             resp->Update();
           }
 
-          TimeStamp now = TimeStamp::Now();
+          now = TimeStamp::Now();
           SuspendAndSampleAndResumeThread(lock, *registeredThread,
                                           [&](const Registers& aRegs) {
             DoPeriodicSample(lock, *registeredThread, *profiledThreadData, now,
-                             aRegs, rssMemory, ussMemory);
+                             aRegs);
+            // only report these once per sample-time (if 0 we don't put them in the buffer,
+            // so for the rest of the threads we won't insert them)
+            rssMemory = 0;
+            ussMemory = 0;
           });
         }
 
 #if defined(USE_LUL_STACKWALK)
         // The LUL unwind object accumulates frame statistics. Periodically we
         // should poke it to give it a chance to print those statistics.  This
         // involves doing I/O (fprintf, __android_log_print, etc.) and so
         // can't safely be done from the critical section inside
--- a/tools/profiler/public/ProfilerCounts.h
+++ b/tools/profiler/public/ProfilerCounts.h
@@ -77,31 +77,36 @@ public:
     , mCategory(aCategory)
     , mDescription(aDescription)
     , mCounter(aCounter)
     , mNumber(aNumber)
     {
 #define COUNTER_CANARY 0xDEADBEEF
 #ifdef DEBUG
       mCanary = COUNTER_CANARY;
+      mPrevNumber = 0;
 #endif
       // Can't call profiler_* here since this may be non-xul-library
     }
 #ifdef DEBUG
   ~BaseProfilerCount() {
     mCanary = 0;
   }
 #endif
 
   void Sample(int64_t &aCounter, uint64_t &aNumber)
   {
     MOZ_ASSERT(mCanary == COUNTER_CANARY);
 
     aCounter = *mCounter;
     aNumber = mNumber ? *mNumber : 0;
+#ifdef DEBUG
+    MOZ_ASSERT(aNumber >= mPrevNumber);
+    mPrevNumber = aNumber;
+#endif
   }
 
   // We don't define ++ and Add() here, since the static defines directly
   // increment the atomic counters, and the subclasses implement ++ and
   // Add() directly.
 
   // These typically are static strings (for example if you use the macros
   // below)
@@ -117,16 +122,17 @@ public:
   // by the profiler (see profiler_add/remove_sampled_counter()).  If you're
   // using a lot of these, they probably should be allocated at runtime (see
   // class ProfilerCountOnly below).
   ProfilerAtomicSigned* mCounter;
   ProfilerAtomicUnsigned* mNumber; // may be null
 
 #ifdef DEBUG
   uint32_t mCanary;
+  uint64_t mPrevNumber; // value of number from the last Sample()
 #endif
 };
 
 // Designed to be allocated dynamically, and simply incremented with obj++
 // or obj->Add(n)
 class ProfilerCounter final : public BaseProfilerCount
 {
 public: