/
ddsketch.hpp
275 lines (237 loc) · 8.94 KB
/
ddsketch.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
/*
* DDSketch.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2020 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DDSKETCH_H
#define DDSKETCH_H
#include <iterator>
#include <limits>
#include <type_traits>
#pragma once
#include <algorithm>
#include <cassert>
#include <cmath>
#include <vector>
// A namespace for fast log() computation.
namespace fastLogger {
// Basically, the goal is to compute log(x)/log(r).
// For double, it is represented as 2^e*(1+s) (0<=s<1), so our goal becomes
// e*log(2)/log(r)*log(1+s), and we approximate log(1+s) with a cubic function.
// See more details on Datadog's paper, or CubicallyInterpolatedMapping.java in
// https://github.com/DataDog/sketches-java/
inline const double correctingFactor = 1.00988652862227438516; // = 7 / (10 * log(2));
constexpr inline const double A = 6.0 / 35.0, B = -3.0 / 5.0, C = 10.0 / 7.0;
inline double fastlog(double value) {
int e;
double s = frexp(value, &e);
s = s * 2 - 1;
return ((A * s + B) * s + C) * s + e - 1;
}
inline double reverseLog(double index) {
long exponent = floor(index);
// Derived from Cardano's formula
double d0 = B * B - 3 * A * C;
double d1 = 2 * B * B * B - 9 * A * B * C - 27 * A * A * (index - exponent);
double p = cbrt((d1 - sqrt(d1 * d1 - 4 * d0 * d0 * d0)) / 2);
double significandPlusOne = -(B + p + d0 / p) / (3 * A) + 1;
return ldexp(significandPlusOne / 2, exponent + 1);
}
} // namespace fastLogger
// DDSketch for non-negative numbers (those < EPS = 10^-18 are
// treated as 0, and huge numbers (>1/EPS) fail ASSERT). This is the base
// class without a concrete log() implementation.
template <class Impl, class T>
class DDSketchBase {
static constexpr T defaultMin() { return std::numeric_limits<T>::max(); }
static constexpr T defaultMax() {
if constexpr (std::is_floating_point_v<T>) {
return -std::numeric_limits<T>::max();
} else {
return std::numeric_limits<T>::min();
}
}
public:
explicit DDSketchBase(double errorGuarantee)
: errorGuarantee(errorGuarantee), populationSize(0), zeroPopulationSize(0), minValue(defaultMin()),
maxValue(defaultMax()), sum(T()) {}
DDSketchBase<Impl, T>& addSample(T sample) {
// Call it addSample for now, while it is not a sample anymore
if (!populationSize)
minValue = maxValue = sample;
if (sample <= EPS) {
zeroPopulationSize++;
} else {
int index = static_cast<Impl*>(this)->getIndex(sample);
assert(index >= 0 && index < int(buckets.size()));
buckets[index]++;
}
populationSize++;
sum += sample;
maxValue = std::max(maxValue, sample);
minValue = std::min(minValue, sample);
return *this;
}
double mean() const {
if (populationSize == 0)
return 0;
return (double)sum / populationSize;
}
T median() { return percentile(0.5); }
T percentile(double percentile) {
assert(percentile >= 0 && percentile <= 1);
if (populationSize == 0)
return T();
uint64_t targetPercentilePopulation = percentile * (populationSize - 1);
// Now find the tPP-th (0-indexed) element
if (targetPercentilePopulation < zeroPopulationSize)
return T(0);
int index = -1;
[[maybe_unused]] bool found = false;
if (percentile <= 0.5) { // count up
uint64_t count = zeroPopulationSize;
for (size_t i = 0; i < buckets.size(); i++) {
if (targetPercentilePopulation < count + buckets[i]) {
// count + buckets[i] = # of numbers so far (from the rightmost to
// this bucket, inclusive), so if target is in this bucket, it should
// means tPP < cnt + bck[i]
found = true;
index = i;
break;
}
count += buckets[i];
}
} else { // and count down
uint64_t count = 0;
for (auto rit = buckets.rbegin(); rit != buckets.rend(); rit++) {
if (targetPercentilePopulation + count + *rit >= populationSize) {
// cnt + bkt[i] is # of numbers to the right of this bucket (incl.),
// so if target is not in this bucket (i.e., to the left of this
// bucket), it would be as right as the left bucket's rightmost
// number, so we would have tPP + cnt + bkt[i] < total population (tPP
// is 0-indexed), that means target is in this bucket if this
// condition is not satisfied.
found = true;
index = std::distance(rit, buckets.rend()) - 1;
break;
}
count += *rit;
}
}
assert(found);
return static_cast<Impl*>(this)->getValue(index);
}
T min() const { return minValue; }
T max() const { return maxValue; }
void clear() {
std::fill(buckets.begin(), buckets.end(), 0);
populationSize = zeroPopulationSize = 0;
sum = 0;
minValue = defaultMin();
maxValue = defaultMax();
}
uint64_t getPopulationSize() const { return populationSize; }
double getErrorGuarantee() const { return errorGuarantee; }
DDSketchBase<Impl, T>& mergeWith(const DDSketchBase<Impl, T>& anotherSketch) {
// Must have the same guarantee
assert(fabs(errorGuarantee - anotherSketch.errorGuarantee) < EPS &&
anotherSketch.buckets.size() == buckets.size());
for (size_t i = 0; i < anotherSketch.buckets.size(); i++) {
buckets[i] += anotherSketch.buckets[i];
}
populationSize += anotherSketch.populationSize;
zeroPopulationSize += anotherSketch.zeroPopulationSize;
minValue = std::min(minValue, anotherSketch.minValue);
maxValue = std::max(maxValue, anotherSketch.maxValue);
sum += anotherSketch.sum;
return *this;
}
constexpr static double EPS = 1e-18; // smaller numbers are considered as 0
protected:
double errorGuarantee; // As defined in the paper
uint64_t populationSize, zeroPopulationSize; // we need to separately count 0s
std::vector<uint64_t> buckets;
T minValue, maxValue, sum;
void setBucketSize(int capacity) { buckets.resize(capacity, 0); }
};
// DDSketch with fast log implementation for float numbers
template <class T>
class DDSketch : public DDSketchBase<DDSketch<T>, T> {
public:
explicit DDSketch(double errorGuarantee = 0.005)
: DDSketchBase<DDSketch<T>, T>(errorGuarantee), gamma((1.0 + errorGuarantee) / (1.0 - errorGuarantee)),
multiplier(fastLogger::correctingFactor * log(2) / log(gamma)) {
offset = getIndex(1.0 / DDSketchBase<DDSketch<T>, T>::EPS);
this->setBucketSize(2 * offset);
}
int getIndex(T sample) {
static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Do not support non-little-endian systems");
return ceil(fastLogger::fastlog(sample) * multiplier) + offset;
}
T getValue(int index) { return fastLogger::reverseLog((index - offset) / multiplier) * 2.0 / (1 + gamma); }
private:
double gamma, multiplier;
int offset = 0;
};
// DDSketch with <cmath> log. Slow and only use this when others doesn't work.
template <class T>
class DDSketchSlow : public DDSketchBase<DDSketchSlow<T>, T> {
public:
DDSketchSlow(double errorGuarantee = 0.1)
: DDSketchBase<DDSketchSlow<T>, T>(errorGuarantee), gamma((1.0 + errorGuarantee) / (1.0 - errorGuarantee)),
logGamma(log(gamma)) {
offset = getIndex(1.0 / DDSketchBase<DDSketch<T>, T>::EPS) + 5;
this->setBucketSize(2 * offset);
}
int getIndex(T sample) { return ceil(log(sample) / logGamma) + offset; }
T getValue(int index) { return (T)(2.0 * pow(gamma, (index - offset)) / (1 + gamma)); }
private:
double gamma, logGamma;
int offset = 0;
};
// DDSketch for unsigned int. Faster than the float version. Fixed accuracy.
class DDSketchFastUnsigned : public DDSketchBase<DDSketchFastUnsigned, unsigned> {
public:
DDSketchFastUnsigned() : DDSketchBase<DDSketchFastUnsigned, unsigned>(errorGuarantee) { this->setBucketSize(129); }
int getIndex(unsigned sample) {
__uint128_t v = sample;
v *= v;
v *= v; // sample^4
uint64_t low = (uint64_t)v, high = (uint64_t)(v >> 64);
return 128 - (high == 0 ? ((low == 0 ? 64 : __builtin_clzll(low)) + 64) : __builtin_clzll(high));
}
unsigned getValue(int index) {
double r = 1, g = gamma;
while (index) { // quick power method for power(gamma, index)
if (index & 1)
r *= g;
g *= g;
index >>= 1;
}
// 2.0 * pow(gamma, index) / (1 + gamma) is what we need
return (unsigned)(2.0 * r / (1 + gamma) + 0.5); // round to nearest int
}
private:
constexpr static double errorGuarantee = 0.08642723372;
// getIndex basically calc floor(log_2(x^4)) + 1,
// which is almost ceil(log_2(x^4)) as it only matters when x is a power of 2,
// and it does not change the error bound. Original sketch asks for
// ceil(log_r(x)), so we know r = pow(2, 1/4) = 1.189207115. And r = (1 + eG)
// / (1 - eG) so eG = 0.08642723372.
constexpr static double gamma = 1.189207115;
};
#endif