author  Jim Blandy <jimb@mozilla.com> 
Thu, 08 Oct 2015 13:05:31 0700  
changeset 267004  0140d126b76b4fe692b22d880f897496370ef480 
parent 267003  de953677a1819ad7e64e647d4c5d6093b2cc76c2 
child 267005  09bde5536609afa6b9cd0af71245433f920b33b4 
push id  29504 
push user  cbook@mozilla.com 
push date  Fri, 09 Oct 2015 09:43:23 +0000 
treeherder  mozillacentral@d01dd42e654b [default view] [failures only] 
perfherder  [talos] [build metrics] [platform microbench] (compared to previous push) 
reviewers  waldo 
bugs  1206357 
milestone  44.0a1 
first release with  nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64

last release without  nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64

new file mode 100644  /dev/null +++ b/mfbt/FastBernoulliTrial.h @@ 0,0 +1,376 @@ +/* * Mode: C++; tabwidth: 8; indenttabsmode: nil; cbasicoffset: 2 * */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_FastBernoulliTrial_h +#define mozilla_FastBernoulliTrial_h + +#include "mozilla/Assertions.h" +#include "mozilla/XorShift128PlusRNG.h" + +#include <cmath> +#include <stdint.h> + +namespace mozilla { + +/** + * class FastBernoulliTrial: Efficient sampling with uniform probability + * + * When gathering statistics about a program's behavior, we may be observing + * events that occur very frequently (e.g., function calls or memory + * allocations) and we may be gathering information that is somewhat expensive + * to produce (e.g., call stacks). Sampling all the events could have a + * significant impact on the program's performance. + * + * Why not just sample every N'th event? This technique is called "systematic + * sampling"; it's simple and efficient, and it's fine if we imagine a + * patternless stream of events. But what if we're sampling allocations, and the + * program happens to have a loop where each iteration does exactly N + * allocations? You would end up sampling the same allocation every time through + * the loop; the entire rest of the loop becomes invisible to your measurements! + * More generally, if each iteration does M allocations, and M and N have any + * common divisor at all, most allocation sites will never be sampled. If + * they're both even, say, the oddnumbered allocations disappear from your + * results. + * + * Ideally, we'd like each event to have some probability P of being sampled, + * independent of its neighbors and of its position in the sequence. This is + * called "Bernoulli sampling", and it doesn't suffer from any of the problems + * mentioned above. + * + * One disadvantage of Bernoulli sampling is that you can't be sure exactly how + * many samples you'll get: technically, it's possible that you might sample + * none of them, or all of them. But if the number of events N is large, these + * aren't likely outcomes; you can generally expect somewhere around P * N + * events to be sampled. + * + * The other disadvantage of Bernoulli sampling is that you have to generate a + * random number for every event, which can be slow. + * + * [significant pause] + * + * BUT NOT WITH THIS CLASS! FastBernoulliTrial lets you do true Bernoulli + * sampling, while generating a fresh random number only when we do decide to + * sample an event, not on every trial. When it decides not to sample, a call to + * FastBernoulliTrial::trial is nothing but decrementing a counter and + * comparing it to zero. So the lower your sampling probability is, the less + * overhead FastBernoulliTrial imposes. + * + * Probabilities of 0 and 1 are handled efficiently. (In neither case need we + * ever generate a random number at all.) + * + * The essential API: + * + *  FastBernoulliTrial(double P) + * Construct an instance that selects events with probability P. + * + *  FastBernoulliTrial::trial() + * Return true with probability P. Call this each time an event occurs, to + * decide whether to sample it or not. + * + *  FastBernoulliTrial::trial(size_t n) + * Equivalent to calling trial() n times, and returning true if any of those + * calls do. However, like trial, this runs in fast constant time. + * + * What is this good for? In some applications, some events are "bigger" than + * others. For example, large allocations are more significant than small + * allocations. Perhaps we'd like to imagine that we're drawing allocations + * from a stream of bytes, and performing a separate Bernoulli trial on every + * byte from the stream. We can accomplish this by calling t.trial(S) for + * the number of bytes S, and sampling the event if that returns true. + * + * Of course, this style of sampling needs to be paired with analysis and + * presentation that makes the size of the event apparent, lest trials with + * large values for n appear to be indistinguishable from those with small + * values for n. + */ +class FastBernoulliTrial { + /* + * This comment should just read, "Generate skip counts with a geometric + * distribution", and leave everyone to go look that up and see why it's the + * right thing to do, if they don't know already. + * + * BUT IF YOU'RE CURIOUS, COMMENTS ARE FREE... + * + * Instead of generating a fresh random number for every trial, we can + * randomly generate a count of how many times we should return false before + * the next time we return true. We call this a "skip count". Once we've + * returned true, we generate a fresh skip count, and begin counting down + * again. + * + * Here's an awesome fact: by exercising a little care in the way we generate + * skip counts, we can produce results indistinguishable from those we would + * get "rolling the dice" afresh for every trial. + * + * In short, skip counts in Bernoulli trials of probability P obey a geometric + * distribution. If a random variable X is uniformly distributed from [0..1), + * then std::floor(std::log(X) / std::log(1P)) has the appropriate geometric + * distribution for the skip counts. + * + * Why that formula? + * + * Suppose we're to return true with some probability P, say, 0.3. Spread + * all possible futures along a line segment of length 1. In portion P of + * those cases, we'll return true on the next call to trial; the skip count + * is 0. For the remaining portion 1P of cases, the skip count is 1 or more. + * + * skip: 0 1 or more + * ^ + * portion: 0.3 0.7 + * P 1P + * + * But the "1 or more" section of the line is subdivided the same way: *within + * that section*, in portion P the second call to trial() returns true, and in + * portion 1P it returns false a second time; the skip count is two or more. + * So we return true on the second call in proportion 0.7 * 0.3, and skip at + * least the first two in proportion 0.7 * 0.7. + * + * skip: 0 1 2 or more + * ^^ + * portion: 0.3 0.7 * 0.3 0.7 * 0.7 + * P (1P)*P (1P)^2 + * + * We can continue to subdivide: + * + * skip >= 0:  (1P)^0  + * skip >= 1:   (1P)^1  + * skip >= 2:   (1P)^2  + * skip >= 3:  ^  (1P)^3  + * skip >= 4:  .  (1P)^4  + * . + * ^X, see below + * + * In other words, the likelihood of the next n calls to trial returning + * false is (1P)^n. The longer a run we require, the more the likelihood + * drops. Further calls may return false too, but this is the probability + * we'll skip at least n. + * + * This is interesting, because we can pick a point along this line segment + * and see which skip count's range it falls within; the point X above, for + * example, is within the ">= 2" range, but not within the ">= 3" range, so it + * designates a skip count of 2. So if we pick points on the line at random + * and use the skip counts they fall under, that will be indistinguishable + * from generating a fresh random number between 0 and 1 for each trial and + * comparing it to P. + * + * So to find the skip count for a point X, we must ask: To what whole power + * must we raise 1P such that we include X, but the next power would exclude + * it? This is exactly std::floor(std::log(X) / std::log(1P)). + * + * Our algorithm is then, simply: When constructed, compute an initial skip + * count. Return false from trial that many times, and then compute a new skip + * count. + * + * For a call to trial(n), if the skip count is greater than n, return false + * and subtract n from the skip count. If the skip count is less than n, + * return true and compute a new skip count. Since each trial is independent, + * it doesn't matter by how much n overshoots the skip count; we can actually + * compute a new skip count at *any* time without affecting the distribution. + * This is really beautiful. + */ + public: + /** + * Construct a fast Bernoulli trial generator. Calls to trial() return true + * with probability aProbability. Use aState0 and aState1 to seed the + * random number generator; both may not be zero. + */ + FastBernoulliTrial(double aProbability, uint64_t aState0, uint64_t aState1) + : mGenerator(aState0, aState1) + { + setProbability(aProbability); + } + + /** + * Return true with probability mProbability. Call this each time an event + * occurs, to decide whether to sample it or not. The lower mProbability is, + * the faster this function runs. + */ + bool trial() { + if (mSkipCount) { + mSkipCount; + return false; + } + + return chooseSkipCount(); + } + + /** + * Equivalent to calling trial() n times, and returning true if any of those + * calls do. However, like trial, this runs in fast constant time. + * + * What is this good for? In some applications, some events are "bigger" than + * others. For example, large allocations are more significant than small + * allocations. Perhaps we'd like to imagine that we're drawing allocations + * from a stream of bytes, and performing a separate Bernoulli trial on every + * byte from the stream. We can accomplish this by calling t.trial(S) for + * the number of bytes S, and sampling the event if that returns true. + * + * Of course, this style of sampling needs to be paired with analysis and + * presentation that makes the "size" of the event apparent, lest trials with + * large values for n appear to be indistinguishable from those with small + * values for n, despite being potentially much more likely to be sampled. + */ + bool trial(size_t aCount) { + if (mSkipCount > aCount) { + mSkipCount = aCount; + return false; + } + + return chooseSkipCount(); + } + + void setRandomState(uint64_t aState0, uint64_t aState1) { + mGenerator.setState(aState0, aState1); + } + + void setProbability(double aProbability) { + MOZ_ASSERT(0 <= aProbability && aProbability <= 1); + mProbability = aProbability; + if (0 < mProbability && mProbability < 1) { + /* + * Let's look carefully at how this calculation plays out in floating + * point arithmetic. We'll assume IEEE, but the final C++ code we arrive + * at would still be fine if our numbers were mathematically perfect. So, + * while we've considered IEEE's edge cases, we haven't done anything that + * should be actively bad when using other representations. + * + * (In the below, read comparisons as exact mathematical comparisons: when + * we say something "equals 1", that means it's exactly equal to 1. We + * treat approximation using intervals with open boundaries: saying a + * value is in (0,1) doesn't specify how close to 0 or 1 the value gets. + * When we use closed boundaries like [1, 2**53], we're careful to ensure + * the boundary values are actually representable.) + * + *  After the comparison above, we know mProbability is in (0,1). + * + *  The gaps below 1 are 2**53, so that interval is (0, 12**53]. + * + *  Because the floatingpoint gaps near 1 are wider than those near + * zero, there are many small positive doubles ε such that 1ε rounds to + * exactly 1. However, 2**53 can be represented exactly. So + * 1mProbability is in [2**53, 1]. + * + *  log(1  mProbability) is thus in (37, 0]. + * + * That range includes zero, but when we use mInvLogNotProbability, it + * would be helpful if we could trust that it's negative. So when log(1 + *  mProbability) is 0, we'll just set mProbability to 0, so that + * mInvLogNotProbability is not used in chooseSkipCount. + * + *  How much of the range of mProbability does this cause us to ignore? + * The only value for which log returns 0 is exactly 1; the slope of log + * at 1 is 1, so for small ε such that 1  ε != 1, log(1  ε) is ε, + * never 0. The gaps near one are larger than the gaps near zero, so if + * 1  ε wasn't 1, then ε is representable. So if log(1  mProbability) + * isn't 0, then 1  mProbability isn't 1, which means that mProbability + * is at least 2**53, as discussed earlier. This is a sampling + * likelihood of roughly one in ten trillion, which is unlikely to be + * distinguishable from zero in practice. + * + * So by forbidding zero, we've tightened our range to (37, 2**53]. + * + *  Finally, 1 / log(1  mProbability) is in [2**53, 1/37). This all + * falls readily within the range of an IEEE double. + * + * ALL THAT HAVING BEEN SAID: here are the five lines of actual code: + */ + double logNotProbability = std::log(1  mProbability); + if (logNotProbability == 0.0) + mProbability = 0.0; + else + mInvLogNotProbability = 1 / logNotProbability; + } + + chooseSkipCount(); + } + + private: + /* The likelihood that any given call to trial should return true. */ + double mProbability; + + /* + * The value of 1/std::log(1  mProbability), cached for repeated use. + * + * If mProbability is exactly 0 or exactly 1, we don't use this value. + * Otherwise, we guarantee this value is in the range [2**53, 1/37), i.e. + * definitely negative, as required by chooseSkipCount. See setProbability for + * the details. + */ + double mInvLogNotProbability; + + /* Our random number generator. */ + non_crypto::XorShift128PlusRNG mGenerator; + + /* The number of times trial should return false before next returning true. */ + size_t mSkipCount; + + /* + * Choose the next skip count. This also returns the value that trial should + * return, since we have to check for the extreme values for mProbability + * anyway, and trial should never return true at all when mProbability is 0. + */ + bool chooseSkipCount() { + /* + * If the probability is 1.0, every call to trial returns true. Make sure + * mSkipCount is 0. + */ + if (mProbability == 1.0) { + mSkipCount = 0; + return true; + } + + /* + * If the probabilility is zero, trial never returns true. Don't bother us + * for a while. + */ + if (mProbability == 0.0) { + mSkipCount = SIZE_MAX; + return false; + } + + /* + * What sorts of values can this call to std::floor produce? + * + * Since mGenerator.nextDouble returns a value in [0, 12**53], std::log + * returns a value in the range [infinity, 2**53], all negative. Since + * mInvLogNotProbability is negative (see its comments), the product is + * positive and possibly infinite. std::floor returns +infinity unchanged. + * So the result will always be positive. + * + * Converting a double to an integer that is out of range for that integer + * is undefined behavior, so we must clamp our result to SIZE_MAX, to ensure + * we get an acceptable value for mSkipCount. + * + * The clamp is written carefully. Note that if we had said: + * + * if (skipCount > SIZE_MAX) + * skipCount = SIZE_MAX; + * + * that leads to undefined behavior 64bit machines: SIZE_MAX coerced to + * double is 2^64, not 2^641, so this doesn't actually set skipCount to a + * value that can be safely assigned to mSkipCount. + * + * Jakub Oleson cleverly suggested flipping the sense of the comparison: if + * we require that skipCount < SIZE_MAX, then because of the gaps (2048) + * between doubles at that magnitude, the highest double less than 2^64 is + * 2^64  2048, which is fine to store in a size_t. + * + * (On 32bit machines, all size_t values can be represented exactly in + * double, so all is well.) + */ + double skipCount = std::floor(std::log(mGenerator.nextDouble()) + * mInvLogNotProbability); + if (skipCount < SIZE_MAX) + mSkipCount = skipCount; + else + mSkipCount = SIZE_MAX; + + return true; + } +}; + +} /* namespace mozilla */ + +#endif /* mozilla_FastBernoulliTrial_h */
 a/mfbt/moz.build +++ b/mfbt/moz.build @@ 34,16 +34,17 @@ EXPORTS.mozilla = [ 'DebugOnly.h', 'decimal/Decimal.h', 'doubleconversion/doubleconversion.h', 'doubleconversion/utils.h', 'Endian.h', 'EnumeratedArray.h', 'EnumeratedRange.h', 'EnumSet.h', + 'FastBernoulliTrial.h', 'FloatingPoint.h', 'Function.h', 'GuardObjects.h', 'HashFunctions.h', 'IndexSequence.h', 'IntegerPrintfMacros.h', 'IntegerRange.h', 'IntegerTypeTraits.h',
new file mode 100644  /dev/null +++ b/mfbt/tests/TestFastBernoulliTrial.cpp @@ 0,0 +1,209 @@ +/* * Mode: C++; tabwidth: 8; indenttabsmode: nil; cbasicoffset: 2 * */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/Assertions.h" +#include "mozilla/FastBernoulliTrial.h" + +#include <math.h> + +// Note that because we always provide FastBernoulliTrial with a fixed +// pseudorandom seed in these tests, the results here are completely +// deterministic. +// +// A nonoptimized version of this test runs in .009s on my laptop. Using larger +// sample sizes lets us meet tighter bounds on the counts. + +static void +TestProportions() +{ + mozilla::FastBernoulliTrial bernoulli(1.0, + 698079309544035222ULL, + 6012389156611637584ULL); + + for (size_t i = 0; i < 100; i++) + MOZ_RELEASE_ASSERT(bernoulli.trial()); + + { + bernoulli.setProbability(0.5); + size_t count = 0; + for (size_t i = 0; i < 1000; i++) + count += bernoulli.trial(); + MOZ_RELEASE_ASSERT(count == 496); + } + + { + bernoulli.setProbability(0.001); + size_t count = 0; + for (size_t i = 0; i < 1000; i++) + count += bernoulli.trial(); + MOZ_RELEASE_ASSERT(count == 2); + } + + { + bernoulli.setProbability(0.85); + size_t count = 0; + for (size_t i = 0; i < 1000; i++) + count += bernoulli.trial(); + MOZ_RELEASE_ASSERT(count == 852); + } + + bernoulli.setProbability(0.0); + for (size_t i = 0; i < 100; i++) + MOZ_RELEASE_ASSERT(!bernoulli.trial()); +} + +static void +TestHarmonics() +{ + mozilla::FastBernoulliTrial bernoulli(0.1, + 698079309544035222ULL, + 6012389156611637584ULL); + + const size_t n = 100000; + bool trials[n]; + for (size_t i = 0; i < n; i++) + trials[i] = bernoulli.trial(); + + // For each harmonic and phase, check that the proportion sampled is + // within acceptable bounds. + for (size_t harmonic = 1; harmonic < 20; harmonic++) { + size_t expected = n / harmonic / 10; + size_t low_expected = expected * 85 / 100; + size_t high_expected = expected * 115 / 100; + + for (size_t phase = 0; phase < harmonic; phase++) { + size_t count = 0; + for (size_t i = phase; i < n; i += harmonic) + count += trials[i]; + + MOZ_RELEASE_ASSERT(low_expected <= count && count <= high_expected); + } + } +} + +static void +TestTrialN() +{ + mozilla::FastBernoulliTrial bernoulli(0.01, + 0x67ff17e25d855942ULL, + 0x74f298193fe1c5b1ULL); + + { + size_t count = 0; + for (size_t i = 0; i < 10000; i++) + count += bernoulli.trial(1); + + // Expected value: 0.01 * 10000 == 100 + MOZ_RELEASE_ASSERT(count == 97); + } + + { + size_t count = 0; + for (size_t i = 0; i < 10000; i++) + count += bernoulli.trial(3); + + // Expected value: (1  (1  0.01) ** 3) == 0.0297, + // 0.0297 * 10000 == 297 + MOZ_RELEASE_ASSERT(count == 304); + } + + { + size_t count = 0; + for (size_t i = 0; i < 10000; i++) + count += bernoulli.trial(10); + + // Expected value: (1  (1  0.01) ** 10) == 0.0956, + // 0.0956 * 10000 == 956 + MOZ_RELEASE_ASSERT(count == 936); + } + + { + size_t count = 0; + for (size_t i = 0; i < 10000; i++) + count += bernoulli.trial(100); + + // Expected value: (1  (1  0.01) ** 100) == 0.6339 + // 0.6339 * 10000 == 6339 + MOZ_RELEASE_ASSERT(count == 6372); + } + + { + size_t count = 0; + for (size_t i = 0; i < 10000; i++) + count += bernoulli.trial(1000); + + // Expected value: (1  (1  0.01) ** 1000) == 0.9999 + // 0.9999 * 10000 == 9999 + MOZ_RELEASE_ASSERT(count == 9998); + } +} + +static void +TestChangeProbability() +{ + mozilla::FastBernoulliTrial bernoulli(1.0, + 0x67ff17e25d855942ULL, + 0x74f298193fe1c5b1ULL); + + // Establish a very high skip count. + bernoulli.setProbability(0.0); + + // This should reestablish a zero skip count. + bernoulli.setProbability(1.0); + + // So this should return true. + MOZ_RELEASE_ASSERT(bernoulli.trial()); +} + +static void +TestCuspProbabilities() +{ + /* + * FastBernoulliTrial takes care to avoid screwing up on edge cases. The + * checks here all look pretty dumb, but they exercise paths in the code that + * could exhibit undefined behavior if coded naïvely. + */ + + /* + * This should not be perceptibly different from 1; for 64bit doubles, this + * is a one in ten trillion chance of the trial not succeeding. Overflows + * converting doubles to size_t skip counts may change this, though. + */ + mozilla::FastBernoulliTrial bernoulli(nextafter(1, 0), + 0x67ff17e25d855942ULL, + 0x74f298193fe1c5b1ULL); + + for (size_t i = 0; i < 1000; i++) + MOZ_RELEASE_ASSERT(bernoulli.trial()); + + /* + * This should not be perceptibly different from 0; for 64bit doubles, + * the FastBernoulliTrial will actually treat this as exactly zero. + */ + bernoulli.setProbability(nextafter(0, 1)); + for (size_t i = 0; i < 1000; i++) + MOZ_RELEASE_ASSERT(!bernoulli.trial()); + + /* + * This should be a vanishingly low probability which FastBernoulliTrial does + * *not* treat as exactly zero. + */ + bernoulli.setProbability(1  nextafter(1, 0)); + for (size_t i = 0; i < 1000; i++) + MOZ_RELEASE_ASSERT(!bernoulli.trial()); +} + +int +main() +{ + TestProportions(); + TestHarmonics(); + TestTrialN(); + TestChangeProbability(); + TestCuspProbabilities(); + + return 0; +}
 a/mfbt/tests/moz.build +++ b/mfbt/tests/moz.build @@ 11,16 +11,17 @@ CppUnitTests([ 'TestBloomFilter', 'TestCasting', 'TestCeilingFloor', 'TestCheckedInt', 'TestCountPopulation', 'TestCountZeroes', 'TestEndian', 'TestEnumSet', + 'TestFastBernoulliTrial', 'TestFloatingPoint', 'TestFunction', 'TestIntegerPrintfMacros', 'TestIntegerRange', 'TestJSONWriter', 'TestMacroArgs', 'TestMacroForEach', 'TestMathAlgorithms',