From 9690aef9cd3ebe76c31ee8a2816dac059f2cb117 Mon Sep 17 00:00:00 2001 From: Diego Hurtado Date: Tue, 9 Aug 2022 19:15:06 +0200 Subject: [PATCH] Add exponential Histogram Fixes #2421 --- CHANGELOG.md | 2 + .../_internal/exponential/aggregation.py | 548 ++++++++++++ .../metrics/_internal/exponential/buckets.py | 253 ++++++ .../metrics/_internal/exponential/config.py | 31 + .../metrics/_internal/exponential/exponent.py | 150 ++++ .../exponential_histogram_mapping.py | 66 ++ .../metrics/_internal/exponential/float64.py | 181 ++++ .../exponential/logarithm_mapping.py | 204 +++++ .../sdk/metrics/_internal/point.py | 124 ++- .../exponential/test_exponent_mapping.py | 99 +++ .../metrics/exponential/test_exponential.py | 92 ++ .../test_exponential_aggregation.py | 818 ++++++++++++++++++ .../metrics/exponential/test_histogram.py | 35 + .../exponential/test_logarithm_mapping.py | 111 +++ 14 files changed, 2713 insertions(+), 1 deletion(-) create mode 100644 opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/aggregation.py create mode 100644 opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/buckets.py create mode 100644 opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/config.py create mode 100644 opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/exponent.py create mode 100644 opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/exponential_histogram_mapping.py create mode 100644 opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/float64.py create mode 100644 opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/logarithm_mapping.py create mode 100644 opentelemetry-sdk/tests/metrics/exponential/test_exponent_mapping.py create mode 100644 opentelemetry-sdk/tests/metrics/exponential/test_exponential.py create mode 100644 opentelemetry-sdk/tests/metrics/exponential/test_exponential_aggregation.py create mode 100644 opentelemetry-sdk/tests/metrics/exponential/test_histogram.py create mode 100644 opentelemetry-sdk/tests/metrics/exponential/test_logarithm_mapping.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 31f01555077..6407f05610c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- Add Exponential Histogram + ([#2930](https://github.com/open-telemetry/opentelemetry-python/pull/2930)) - Add a configurable max_export_batch_size to the gRPC metrics exporter ([#2809](https://github.com/open-telemetry/opentelemetry-python/pull/2809)) - Remove support for 3.6 diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/aggregation.py b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/aggregation.py new file mode 100644 index 00000000000..13862340369 --- /dev/null +++ b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/aggregation.py @@ -0,0 +1,548 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from math import inf +from typing import Optional, TypeVar, Union + +from opentelemetry.sdk.metrics._internal.aggregation import ( + AggregationTemporality, + _Aggregation, +) +from opentelemetry.sdk.metrics._internal.exponential.buckets import Buckets +from opentelemetry.sdk.metrics._internal.exponential.config import ( + DEFAULT_MAX_SIZE, +) +from opentelemetry.sdk.metrics._internal.exponential.exponent import ( + ExponentMapping, +) +from opentelemetry.sdk.metrics._internal.exponential.logarithm_mapping import ( + LogarithmExponentialHistogramMapping, +) +from opentelemetry.sdk.metrics._internal.measurement import Measurement +from opentelemetry.sdk.metrics._internal.point import Buckets as BucketsPoint +from opentelemetry.sdk.metrics._internal.point import ( + ExponentialHistogramDataPoint, +) +from opentelemetry.sdk.metrics._internal.point import ( + Histogram as HistogramPoint, +) +from opentelemetry.sdk.metrics._internal.point import ( + HistogramDataPoint, + NumberDataPoint, +) +from opentelemetry.util.types import Attributes + +_DataPointVarT = TypeVar("_DataPointVarT", NumberDataPoint, HistogramDataPoint) + + +# pylint: disable=protected-access +class _ExponentialBucketHistogramAggregation(_Aggregation[HistogramPoint]): + def __init__( + self, + attributes: Attributes, + start_time_unix_nano: int, + max_size: int = DEFAULT_MAX_SIZE, + ): + super().__init__(attributes) + # maxSize is the maximum capacity of the positive and negative ranges. + # it is set by Init(), preserved by Copy and Move.) + self._max_size = max_size + + # sum is the sum of all Updates reflected in the aggregator. It has + # the same type number as the corresponding sdkinstrument.Descriptor. + self._sum = 0 + + # count is incremented by 1 per Update. + self._count = 0 + + # zeroCount is incremented by 1 when the measured value is exactly 0. + self._zero_count = 0 + + # min is set when count > 0 + self._min = 0 + + # max is set when count > 0 + self._max = 0 + + # positive holds the positive values + self._positive = Buckets() + + # negative holds the negative values by their absolute value + self._negative = Buckets() + + # mapping corresponds to the current scale, is shared by both positive + # and negative ranges. + + self._mapping = LogarithmExponentialHistogramMapping( + LogarithmExponentialHistogramMapping._max_scale + ) + self._instrument_temporality = AggregationTemporality.DELTA + self._start_time_unix_nano = start_time_unix_nano + + @property + def _scale(self): + if self._count == self._zero_count: + return 0 + + return self._mapping.scale + + def aggregate(self, measurement: Measurement) -> None: + self._update_by_incr(measurement.value, 1) + + def collect( + self, + aggregation_temporality: AggregationTemporality, + collection_start_nano: int, + ) -> Optional[_DataPointVarT]: + """ + Atomically return a point for the current value of the metric. + """ + + with self._lock: + if not any(self._negative._counts) and not any( + self._positive._counts + ): + return None + + start_time_unix_nano = self._start_time_unix_nano + sum_ = self._sum + max_ = self._max + min_ = self._min + + self._negative._counts = [0] + self._positive._counts = [0] + self._start_time_unix_nano = collection_start_nano + self._sum = 0 + self._min = inf + self._max = -inf + + current_point = ExponentialHistogramDataPoint( + attributes=self._attributes, + start_time_unix_nano=start_time_unix_nano, + time_unix_nano=collection_start_nano, + count=( + sum(self._negative._counts) + + sum(self._positive._counts) + + self._zero_count + ), + sum=sum_, + scale=self._scale, + zero_count=self._zero_count, + positive=BucketsPoint( + self._positive.offset, self._positive._counts + ), + negative=BucketsPoint( + self._negative.offset, self._negative._counts + ), + # FIXME: Find the right value for flags + flags=0, + min=min_, + max=max_, + ) + + if self._previous_point is None or ( + self._instrument_temporality is aggregation_temporality + ): + self._previous_point = current_point + return current_point + + max_ = current_point.max + min_ = current_point.min + + if aggregation_temporality is AggregationTemporality.CUMULATIVE: + start_time_unix_nano = self._previous_point.start_time_unix_nano + sum_ = current_point.sum + self._previous_point.sum + # Only update min/max on delta -> cumulative + max_ = max(current_point.max, self._previous_point.max) + min_ = min(current_point.min, self._previous_point.min) + + negative_counts = [ + curr_count + prev_count + for curr_count, prev_count in zip( + current_point.negative.bucket_counts, + self._previous_point.negative.bucket_counts, + ) + ] + positive_counts = [ + curr_count + prev_count + for curr_count, prev_count in zip( + current_point.positive.bucket_counts, + self._previous_point.positive.bucket_counts, + ) + ] + else: + start_time_unix_nano = self._previous_point.time_unix_nano + sum_ = current_point.sum - self._previous_point.sum + + negative_counts = [ + curr_count + prev_count + for curr_count, prev_count in zip( + current_point.negative.bucket_counts, + self._previous_point.negative.bucket_counts, + ) + ] + positive_counts = [ + curr_count + prev_count + for curr_count, prev_count in zip( + current_point.positive.bucket_counts, + self._previous_point.positive.bucket_counts, + ) + ] + + current_point = ExponentialHistogramDataPoint( + attributes=self._attributes, + start_time_unix_nano=start_time_unix_nano, + time_unix_nano=current_point.time_unix_nano, + count=( + sum(negative_counts) + sum(positive_counts) + self._zero_count + ), + sum=sum_, + scale=self._scale, + zero_count=self._zero_count, + positive=BucketsPoint(self._positive.offset, positive_counts), + negative=BucketsPoint(self._negative.offset, negative_counts), + # FIXME: Find the right value for flags + flags=0, + min=min_, + max=max_, + ) + + self._previous_point = current_point + return current_point + + def _clear(self) -> None: + self._positive.clear() + self._negative.clear() + self._sum = 0 + self._count = 0 + self._zero_count = 0 + self._min = 0 + self._max = 0 + self._mapping = LogarithmExponentialHistogramMapping( + LogarithmExponentialHistogramMapping._max_scale + ) + + def _swap(self, other: "_ExponentialBucketHistogramAggregation") -> None: + + for attribute in [ + "_positive", + "_negative", + "_sum", + "_count", + "_zero_count", + "_min", + "_max", + "_mapping", + ]: + temp = getattr(self, attribute) + setattr(self, attribute, getattr(other, attribute)) + setattr(other, attribute, temp) + + def _copy_into( + self, other: "_ExponentialBucketHistogramAggregation" + ) -> None: + other._clear() + + for attribute in [ + "_positive", + "_negative", + "_sum", + "_count", + "_zero_count", + "_min", + "_max", + "_mapping", + ]: + setattr(other, attribute, getattr(self, attribute)) + + def _update_by_incr(self, number: Union[int, float], incr: int) -> None: + + value = float(number) + + if self._count == 0: + self._min = number + self._max = number + + else: + if number < self._min: + self._min = number + if number > self._max: + self._max = number + + self._count += incr + + if value == 0: + self._zero_count += incr + return + + self._sum += number * incr + + if value > 0: + buckets = self._positive + else: + value = -value + buckets = self._negative + + self._update(buckets, value, incr) + + def _downscale(self, change: int) -> None: + """ + Substracts change from the current mapping scale + """ + + if change == 0: + return + + if change < 0: + raise Exception(f"Impossible change of scale: {change}") + + new_scale = self._mapping.scale - change + + self._positive.downscale(change) + self._negative.downscale(change) + + if new_scale <= 0: + mapping = ExponentMapping(new_scale) + else: + mapping = LogarithmExponentialHistogramMapping(new_scale) + + self._mapping = mapping + + # pylint: disable=no-self-use + def _change_scale(self, high: int, low: int, size: int) -> int: + """ + Calculates how much downscaling is needed by shifting the high and low + values until they are separated by no more than size. + """ + + change = 0 + + # FIXME this slightly different from the Go implementation. It should + # be functionally equal but avoids an infinite loop in certain + # circumstances (high == 0, low == -1, size == 1). + while high - low > size: + high = high >> 1 + low = low >> 1 + + change += 1 + + if high - low == size: + high = high >> 1 + low = low >> 1 + + change += 1 + + return change + + def _update(self, buckets: Buckets, value: float, incr: int) -> None: + + index = self._mapping.map_to_index(value) + + low, high, success = self._increment_index_by(buckets, index, incr) + + if success: + return + + self._downscale(self._change_scale(high, low, self._max_size)) + + index = self._mapping.map_to_index(value) + + _, _, success = self._increment_index_by(buckets, index, incr) + + if not success: + raise Exception("Downscale logic error") + + def _increment_index_by( + self, buckets: Buckets, index: int, incr: int + ) -> tuple: + """ + Determines if the index lies inside the current range + [indexStart, indexEnd] and, if not, returns the minimum size (up to + maxSize) will satisfy the new value.)] + + Returns a tuple: low, high, success + """ + + if incr == 0: + # Skipping a bunch of work for 0 increment. This + # happens when merging sparse data, for example. + # This also happens UpdateByIncr is used with a 0 + # increment, means it can be safely skipped. + + return 0, 0, True + + if buckets.len() == 0: + # Go initializes its backing here if it hasn't been done before. + # I think we don't need to worry about that because the backing + # has been initialized already. + buckets._index_start = index + buckets._index_end = index + buckets._index_base = index + + elif index < buckets._index_start: + span = buckets._index_end - index + + if span >= self._max_size: + # rescaling needed, mapped value to the right + + return index, buckets._index_end, False + + if span >= buckets._backing.size(): + self._grow(buckets, span + 1) + + buckets._index_start = index + + elif index > buckets._index_end: + span = index - buckets._index_start + + if span >= self._max_size: + # rescaling needed, mapped value to the right + + return buckets._index_start, index, False + + if span >= buckets._backing.size(): + + self._grow(buckets, span + 1) + + buckets._index_end = index + + bucket_index = index - buckets._index_base + + if bucket_index < 0: + bucket_index += buckets._backing.size() + + buckets.increment_bucket(bucket_index, incr) + + return 0, 0, True + + def _grow(self, buckets: Buckets, needed: int): + """ + Resizes the backing array by doubling in size up to maxSize. + this extends the array with a bunch of zeros and copies the + existing counts to the same position. + """ + + size = buckets._backing.size() + bias = buckets._index_base - buckets._index_start + old_positive_limit = size - bias + new_size = power_of_two_rounded_up(needed) + if new_size > self._max_size: + new_size = self._max_size + + new_positive_limit = new_size - bias + buckets._backing.grow_to( + new_size, old_positive_limit, new_positive_limit + ) + + def _high_low_at_scale(self, buckets: Buckets, scale: int) -> tuple: + """ + Returns low, high + """ + + if buckets.len() == 0: + return 0, -1 + + shift = self._scale - scale + + return buckets._index_start >> shift, buckets._index_end >> shift + + def _merge_from(self, other: "_ExponentialBucketHistogramAggregation"): + + if self._count == 0: + self._min = other._min + self._max = other._max + + elif other._count != 0: + if other._min < self._min: + self._min = other._min + if other._max > self._max: + self._max = other._max + + self._sum += other._sum + self._count += other._count + self._zero_count += other._zero_count + + min_scale = min(self._scale, other._scale) + + low_positive, high_positive = _with( + *self._high_low_at_scale(self._positive, min_scale), + *other._high_low_at_scale(other._positive, min_scale) + ) + + low_negative, high_negative = _with( + *self._high_low_at_scale(self._negative, min_scale), + *other._high_low_at_scale(other._negative, min_scale) + ) + + min_scale = min( + min_scale - self._change_scale( + high_positive, low_positive, self._max_size + ), + min_scale - self._change_scale( + high_negative, low_negative, self._max_size + ), + ) + + self._downscale(self._scale - min_scale) + + self._merge_buckets(self._positive, other, other._positive, min_scale) + self._merge_buckets(self._negative, other, other._negative, min_scale) + + def _merge_buckets( + self, + mine: Buckets, + other: "_ExponentialBucketHistogramAggregation", + theirs: Buckets, + scale: int, + ) -> None: + + their_offset = theirs.offset() + their_change = other._scale - scale + + for index in range(theirs.len()): + + _, _, success = self._increment_index_by( + mine, (their_offset + index) >> their_change, theirs.at(index) + ) + + if not success: + raise Exception("Incorrect merge scale") + + +def power_of_two_rounded_up(number: int) -> int: + """ + Computes the least power of two that is >= number. + """ + + number = number - 1 + number |= number >> 1 + number |= number >> 2 + number |= number >> 4 + number |= number >> 8 + number |= number >> 16 + number = number + 1 + return number + + +def _with(h_low, h_high, o_low, o_high): + """ + Returns low, high + """ + if o_low > o_high: + return h_low, h_high + + if h_low > h_high: + return o_low, o_high + + return min(h_low, o_low), max(h_high, o_high) diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/buckets.py b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/buckets.py new file mode 100644 index 00000000000..e923fb4c48b --- /dev/null +++ b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/buckets.py @@ -0,0 +1,253 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import ABC, abstractmethod + + +class BucketsBacking(ABC): + @abstractmethod + def size(self) -> int: + """ + Returns the physical size of the backing array, which is + >= buckets.Len() the number allocated. + """ + + @abstractmethod + def grow_to( + self, new_size: int, old_positive_limit: int, new_positive_limit: int + ) -> None: + """ + Grows the backing array into a new size and copies old entries into + their correct new positions. + """ + + @abstractmethod + def reverse(self, start: int, end: int) -> None: + """ + Reverses the items in the backing array from [start, end[. + """ + + @abstractmethod + def empty_bucket(self, src: int) -> int: + """ + Empties the count from a bucket for moving into another one + """ + + @abstractmethod + def try_increment(self, bucket_index: int, incr: int) -> bool: + """ + Increments a bucket by incr, returns False if that operation would + overflow the current backing width. + """ + + @abstractmethod + def count_at(self, pos: int) -> int: + """ + Returns the count at a specific bucket. + """ + + @abstractmethod + def reset(self) -> None: + """ + Resets all buckets to zero count + """ + + +class BucketsVarWidth(BucketsBacking): + def __init__(self): + + self._counts = [0] + + def size(self) -> int: + """ + Returns the physical size of the backing array, which is + >= buckets.Len() the number allocated. + """ + return len(self._counts) + + def grow_to( + self, new_size: int, old_positive_limit: int, new_positive_limit: int + ) -> None: + """ + Grows the backing array into a new size and copies old entries into + their correct new positions. + """ + # FIXME this follows Go implementation maybe too closely. Since we + # don't need to request memory for a larger list, maybe this can be + # implemented in a more pythonical way. + tmp = [0] * new_size + tmp[new_positive_limit:] = self._counts[old_positive_limit:] + tmp[0:old_positive_limit] = self._counts[0:old_positive_limit] + self._counts = tmp + + def reverse(self, start: int, end: int) -> None: + """ + Reverses the items in the backing array from [start, end[. + """ + + for index, value in enumerate(reversed(self._counts[start:end])): + self._counts[index + start] = value + + def empty_bucket(self, src: int) -> int: + """ + Empties the count from a bucket for moving into another one + returns the count from that bucket before it was set to zero. + """ + + temp = self._counts[src] + self._counts[src] = 0 + return temp + + def try_increment(self, bucket_index: int, incr: int) -> bool: + """ + Increments a bucket by incr, returns False if that operation would + overflow the current backing width. + """ + + # The Go implementation checks the increment before applying it because + # it may result in a bigger quantity that it can be handled by the + # corresponding bucket counter type. Python3 has no limits for ints + # so we don't check anything. + # FIXME Rename this method to something like increment + self._counts[bucket_index] += incr + return True + + def count_at(self, pos: int) -> int: + """ + Returns the count at a specific bucket. + """ + + return self._counts[pos] + + def reset(self) -> None: + """ + Resets all buckets to zero count + """ + + self._counts = [0] * len(self._counts) + + +class Buckets: + def __init__(self): + self._backing = BucketsVarWidth() + + # The term "index" refers to the number of the + # histogram bucket used to determine its boundaries. + # The lower-boundary of a bucket is determined by + # formula base**index and the upper-boundary of a + # bucket is base**(index+1). Index values are signed + # to account for values less than or equal to 1. + + # Index of the 0th position in the backing array: backing[0] is the + # count in the bucket with index self._index_base. + self._index_base = 0 + + # indexStart is the smallest index value represented in the backing + # array. + self._index_start = 0 + + # indexEnd is the largest index value represented in the backing array. + self._index_end = 0 + + def offset(self) -> int: + return self._index_start + + def len(self) -> int: + if self._backing.size() == 0: + return 0 + + if self._index_end == self._index_start and self.at(0) == 0: + return 0 + + return self._index_end - self._index_start + 1 + + # pylint: disable=invalid-name + def at(self, pos_0: int) -> int: + pos = pos_0 + + bias = self._index_base - self._index_start + + if pos < bias: + pos += self._backing.size() + + pos -= bias + + return self._backing.count_at(pos) + + def clear(self) -> None: + + self._index_base = 0 + self._index_start = 0 + self._index_end = 0 + + self._backing.reset() + + # pylint: disable=invalid-name + def downscale(self, by: int) -> None: + """ + Rotates, then collapses 2**`by`-to-1 buckets. + """ + self.rotate() + + size = 1 + self._index_end - self._index_start + each = 1 << by + inpos = 0 + outpos = 0 + + pos = self._index_start + + while pos <= self._index_end: + mod = pos % each + if mod < 0: + mod += each + + index = mod + + while index < each and inpos < size: + self.relocate_bucket(outpos, inpos) + inpos += 1 + pos += 1 + index += 1 + + outpos += 1 + + self._index_start >>= by + self._index_end >>= by + self._index_base = self._index_start + + def rotate(self) -> None: + bias = self._index_base - self._index_start + + if bias == 0: + return + + # Rotate the array so that self._index_base == self._index_start + + self._index_base = self._index_start + + self._backing.reverse(0, self._backing.size()) + self._backing.reverse(0, bias) + self._backing.reverse(bias, self._backing.size()) + + def relocate_bucket(self, dest: int, src: int) -> None: + if dest == src: + return + + self.increment_bucket(dest, self._backing.empty_bucket(src)) + + def increment_bucket(self, bucket_index: int, incr: int): + # Here we just call try_increment, the Go implementation also deals + # with the widening of buckets + + self._backing.try_increment(bucket_index, incr) diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/config.py b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/config.py new file mode 100644 index 00000000000..a50a81d43ec --- /dev/null +++ b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/config.py @@ -0,0 +1,31 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# DefaultMaxSize is the default maximum number of buckets per positive or +# negative number range. The value 160 is specified by OpenTelemetry--yields a +# maximum relative error of less than 5% for data with contrast 10**5 (e.g., +# latencies in the range 1ms to 100s). +# See the derivation here: +# https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/sdk.md#exponential-bucket-histogram-aggregation) + +DEFAULT_MAX_SIZE = 160 + +# MinSize is the smallest reasonable configuration, which is small enough to +# contain the entire normal floating point range at MinScale. +MIN_SIZE = 2 + +# MaximumMaxSize is an arbitrary limit meant to limit accidental use +# of giant histograms. +MAX_MAX_SIZE = 16384 diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/exponent.py b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/exponent.py new file mode 100644 index 00000000000..e9b126c1dac --- /dev/null +++ b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/exponent.py @@ -0,0 +1,150 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from math import ldexp + +from opentelemetry.sdk.metrics._internal.exponential.exponential_histogram_mapping import ( + ExponentialHistogramMapping, + ExponentialMappingOverflowError, + ExponentialMappingUnderflowError, +) +from opentelemetry.sdk.metrics._internal.exponential.float64 import ( + MAX_NORMAL_EXPONENT, + MIN_NORMAL_EXPONENT, + MIN_NORMAL_VALUE, + SIGNIFICAND_WIDTH, + get_ieee_754_exponent, + get_ieee_754_significand, +) + +# The size of the exponential histogram buckets is determined by a parameter +# known as scale, larger values of scale will produce smaller buckets. Bucket +# boundaries of the exponential histogram are located at integer powers of the +# base, where: + +# base = 2 ** (2 ** (-scale)) + +# MinScale defines the point at which the exponential mapping +# function becomes useless for float64. With scale -10, ignoring +# subnormal values, bucket indices range from -1 to 1. +MIN_SCALE = -10 + +# MaxScale is the largest scale supported by exponential mapping. Use +# ../logarithm for larger scales. +MAX_SCALE = 0 + + +# FIXME Fix this name +class ExponentMapping(ExponentialHistogramMapping): + + _exponent_mappings = {} + + def __new__(cls, scale): + + if scale > MAX_SCALE: + raise Exception(f"scale is larger than {MAX_SCALE}") + + if scale < MIN_SCALE: + raise Exception(f"scale is smaller than {MIN_SCALE}") + + if scale not in cls._exponent_mappings.keys(): + cls._exponent_mappings[scale] = super().__new__(cls) + + return cls._exponent_mappings[scale] + + def __init__(self, scale: int): + super().__init__(scale) + self._shift = -self._scale + + def _min_normal_lower_boundary_index(self) -> int: + """ + Returns the largest index such that base ** index <= + opentelemetry.sdk.metrics._internal.exponential.float64.MIN_VALUE. + histogram bucket with this index covers the range + (base**index, base**(index+1)], including MinValue. + """ + index = MIN_NORMAL_EXPONENT >> self._shift + + if self._shift < 2: + index -= 1 + + return index + + def _max_normal_lower_boundary_index(self) -> int: + """ + Returns the index such that base ** index equals the largest + representable boundary. A histogram bucket with this + index covers the range ((2 ** 1024)/base, 2 ** 1024], which includes + opentelemetry.sdk.metrics._internal.exponential.float64.MAX_VALUE. + Note that this bucket is incomplete, since the upper + boundary cannot be represented (FIXME Why?). One greater than this + index corresponds with the bucket containing values > 2 ** 1024. + """ + return MAX_NORMAL_EXPONENT >> self._shift + + def map_to_index(self, value: float) -> int: + if value < MIN_NORMAL_VALUE: + return self._min_normal_lower_boundary_index() + + exponent = get_ieee_754_exponent(value) + + # Positive integers are represented in binary as having an infinite + # amount of leading zeroes, for example 2 is represented as ...00010. + + # A negative integer -x is represented in binary as the complement of + # (x - 1). For example, -4 is represented as the complement of 4 - 1 + # == 3. 3 is represented as ...00011. Its compliment is ...11100, the + # binary representation of -4. + + # get_ieee_754_significand(value) gets the positive integer made up + # from the rightmost SIGNIFICAND_WIDTH bits (the mantissa) of the IEEE + # 754 representation of value. If value is an exact power of 2, all + # these SIGNIFICAND_WIDTH bits would be all zeroes, and when 1 is + # substracted the resulting value is -1. The binary representation of + # -1 is ...111, so when these bits are right shifted SIGNIFICAND_WIDTH + # places, the resulting value for correction is -1. If value is not an + # exact power of 2, at least one of the rightmost SIGNIFICAND_WIDTH + # bits would be 1 (even for values whose decimal part is 0, like 5.0 + # since the IEEE 754 of such number is too the product of a power of 2 + # (defined in the exponent part of the IEEE 754 representation) and the + # value defined in the mantissa). Having at least one of the rightmost + # SIGNIFICAND_WIDTH bit being 1 means that get_ieee_754(value) will + # always be greater or equal to 1, and when 1 is substracted, the + # result will be greater or equal to 0, whose representation in binary + # will be of at most SIGNIFICAND_WIDTH ones that have an infinite + # amount of leading zeroes. When those SIGNIFICAND_WIDTH bits are + # shifted to the right SIGNIFICAND_WIDTH places, the resulting value + # will be 0. + + # In summary, correction will be -1 if value is a power of 2, 0 if not. + + # FIXME Document why we can assume value will not be 0, inf, or NaN. + correction = (get_ieee_754_significand(value) - 1) >> SIGNIFICAND_WIDTH + + # FIXME understand this + return (exponent + correction) >> self._shift + + # FIXME Should this be a property? + def get_lower_boundary(self, index: int) -> float: + if index < self._min_normal_lower_boundary_index(): + raise ExponentialMappingUnderflowError() + + if index > self._max_normal_lower_boundary_index(): + raise ExponentialMappingOverflowError() + + return ldexp(1, index << self._shift) + + @property + def scale(self) -> int: + return -self._shift diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/exponential_histogram_mapping.py b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/exponential_histogram_mapping.py new file mode 100644 index 00000000000..4ecdd52a9ca --- /dev/null +++ b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/exponential_histogram_mapping.py @@ -0,0 +1,66 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import ABC, abstractmethod + + +# FIXME Make sure this is a good name, find a better one if necessary. +class ExponentialHistogramMapping(ABC): + def __init__(self, scale: int): + self._scale = scale + + @abstractmethod + def map_to_index(self, value: float) -> int: + """ + MapToIndex maps positive floating point values to indexes + corresponding to Scale(). Implementations are not expected + to handle zeros, +Inf, NaN, or negative values. + """ + + # FIXME Should this be a property? + @abstractmethod + def get_lower_boundary(self, index: int) -> float: + """ + LowerBoundary returns the lower boundary of a given bucket + index. The index is expected to map onto a range that is + at least partially inside the range of normalized floating + point values. If the corresponding bucket's upper boundary + is less than or equal to 0x1p-1022, UnderflowError will be + raised. If the corresponding bucket's lower boundary is + greater than math.MaxFloat64, OverflowError will be raised. + """ + + @property + @abstractmethod + def scale(self) -> int: + """ + Scale returns the parameter that controls the resolution of + this mapping. For details see: + https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/datamodel.md#exponential-scale + """ + + +# FIXME make sure this is a good name +class ExponentialMappingUnderflowError(Exception): + """ + Raised when computing the lower boundary of an index that maps into a + denormalized floating point value. + """ + + +# FIXME make sure this is a good name +class ExponentialMappingOverflowError(Exception): + """ + Raised when computing the lower boundary of an index that maps into +Inf. + """ diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/float64.py b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/float64.py new file mode 100644 index 00000000000..5b9d99619f2 --- /dev/null +++ b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/float64.py @@ -0,0 +1,181 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ctypes +from decimal import Decimal +from sys import float_info + +# An IEEE 754 double-precision (64 bit) floating point number is represented +# as: 1 bit for sign, 11 bits for exponent and 52 bits for significand. Since +# thse numbers are in a normalized form (in scientific notation), the first bit +# of the significand will always be 1. Because of that, that bit is not stored +# but implicit, to make room for one more bit and more precision. + +SIGNIFICAND_WIDTH = 52 +EXPONENT_WIDTH = 11 + +# This mask is equivalent to 52 "1" bits (there are 13 hexadecimal 4-bit "f"s +# in the significand mask, 13 * 4 == 52) or 0xfffffffffffff in hexadecimal. +SIGNIFICAND_MASK = (1 << SIGNIFICAND_WIDTH) - 1 + +# There are 11 bits for the exponent, but the exponent bias values 0 (11 "0" +# bits) and 2047 (11 "1" bits) have special meanings so the exponent range is +# from 1 to 2046. To calculate the exponent value, 1023 is substracted from the +# exponent, so the exponent value range is from -1022 to +1023. +EXPONENT_BIAS = (2 ** (EXPONENT_WIDTH - 1)) - 1 + +# All the exponent mask bits are set to 1 for the 11 exponent bits. +EXPONENT_MASK = ((1 << EXPONENT_WIDTH) - 1) << SIGNIFICAND_WIDTH + +# The exponent mask bit is to 1 for the sign bit. +SIGN_MASK = 1 << (EXPONENT_WIDTH + SIGNIFICAND_WIDTH) + +MIN_NORMAL_EXPONENT = -EXPONENT_BIAS + 1 +MAX_NORMAL_EXPONENT = EXPONENT_BIAS + +# Smallest possible normal value (2.2250738585072014e-308) +# This value is the result of using the smallest possible number in the +# mantissa, 1.0000000000000000000000000000000000000000000000000000 (52 "0"s in +# the fractional part) = 1.0000000000000000 and a single "1" in the exponent. +# Finally 1.0000000000000000 * 2 ** -1022 = 2.2250738585072014e-308. +MIN_NORMAL_VALUE = float_info.min + +# Greatest possible normal value (1.7976931348623157e+308) +# The binary representation of a float in scientific notation uses (for the +# significand) one bit for the integer part (which is implicit) and 52 bits for +# the fractional part. Consider a float binary 1.111. It is equal to 1 + 1/2 + +# 1/4 + 1/8. The greatest possible value in the 52-bit significand would be +# then 1.1111111111111111111111111111111111111111111111111111 (52 "1"s in the +# fractional part) = 1.9999999999999998. Finally, +# 1.9999999999999998 * 2 ** 1023 = 1.7976931348623157e+308. +MAX_NORMAL_VALUE = float_info.max + + +def get_ieee_754_64_binary(value: float): + """ + The purpose of this function is to more easily illustrate the IEEE 754 + 64-bit float representation. + """ + result = bin(ctypes.c_uint64.from_buffer(ctypes.c_double(value)).value)[2:] + + if result == "0": + result = result * 64 + + if value > 0: + result = f"0{result}" + + decimal_exponent = 0 + + exponent = result[1:12] + + for index, bit in enumerate(reversed(exponent)): + if int(bit): + decimal_exponent += 2**index + + # 0 has a special representation in IEE 574, all exponent and mantissa bits + # are 0. The sign bit still represents its sign, so there is 0 (all bits + # are set to 0) and -0 (the first bit is 1, the rest are 0). + if value == 0: + implicit_bit = 0 + else: + implicit_bit = 1 + + decimal_exponent -= 1023 * implicit_bit + + decimal_mantissa = Decimal(implicit_bit) + + mantissa = result[12:] + + for index, bit in enumerate(mantissa): + if int(bit): + decimal_mantissa += Decimal(1) / Decimal(2 ** (index + 1)) + + sign = result[0] + + return { + "sign": sign, + "exponent": exponent, + "mantissa": mantissa, + # IEEE 754 can only exactly represent a discrete series of numbers, the + # intention of this field is to show the actual decimal value that is + # represented. + "decimal": str( + Decimal(-1 if int(sign) else 1) + * Decimal(2**decimal_exponent) + * decimal_mantissa + ), + } + + +def get_ieee_754_exponent(value: float) -> int: + """ + Gets the exponent of the IEEE 754 representation of a float. + """ + + # 0000 -> 0 + # 0001 -> 1 + # 0010 -> 2 + # 0011 -> 3 + + # 0100 -> 4 + # 0101 -> 5 + # 0110 -> 6 + # 0111 -> 7 + + # 1000 -> 8 + # 1001 -> 9 + # 1010 -> 10 + # 1011 -> 11 + + # 1100 -> 12 + # 1101 -> 13 + # 1110 -> 14 + # 1111 -> 15 + + # 0 & 10 == 0 + # 1 & 10 == 0 + # 2 & 10 == 2 + # 3 & 10 == 2 + # 4 & 10 == 0 + # 6 & 10 == 2 + + # 12 >> 2 == 3 + # 1 >> 2 == 0 + + return ( + ( + # This step gives the integer that corresponds to the IEEE 754 + # representation of a float. + ctypes.c_uint64.from_buffer(ctypes.c_double(value)).value + # This step isolates the exponent bits, turning every bit + # outside of the exponent field to 0. + & EXPONENT_MASK + ) + # This step moves the exponent bits to the right, removing the + # mantissa bits that were set to 0 by the previous step. This + # leaves the IEEE 754 exponent value, ready for the next step. + >> SIGNIFICAND_WIDTH + # This step substracts the exponent bias from the IEEE 754 value, + # leaving the actual exponent value. + ) - EXPONENT_BIAS + + +def get_ieee_754_significand(value: float) -> int: + return ( + ctypes.c_uint64.from_buffer(ctypes.c_double(value)).value + # This stepe isolates the significand bits. There is no need to do any + # bit shifting as the significand bits are already the rightmost field + # in an IEEE 754 representation. + & SIGNIFICAND_MASK + ) diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/logarithm_mapping.py b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/logarithm_mapping.py new file mode 100644 index 00000000000..5a34cdbf243 --- /dev/null +++ b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/exponential/logarithm_mapping.py @@ -0,0 +1,204 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from math import exp, floor, ldexp, log +from threading import Lock + +from opentelemetry.sdk.metrics._internal.exponential.exponential_histogram_mapping import ( + ExponentialHistogramMapping, + ExponentialMappingOverflowError, + ExponentialMappingUnderflowError, +) +from opentelemetry.sdk.metrics._internal.exponential.float64 import ( + MAX_NORMAL_EXPONENT, + MIN_NORMAL_EXPONENT, + MIN_NORMAL_VALUE, + get_ieee_754_exponent, + get_ieee_754_significand, +) + + +# FIXME Make sure this is a good name +class LogarithmExponentialHistogramMapping(ExponentialHistogramMapping): + + # FIXME make sure this is a good name + _prebuilt_mappings = {} + _prebuilt_mappings_lock = Lock() + + # MinScale ensures that the ../exponent mapper is used for + # zero and negative scale values. Do not use the logarithm + # mapper for scales <= 0. + _min_scale = 1 + + # FIXME Go implementation uses a value of 20 here, find out the right + # value for this implementation. + _max_scale = 20 + + def __new__(cls, scale: int): + + if scale > cls._max_scale: + raise Exception(f"scale is larger than {cls._max_scale}") + + if scale < cls._min_scale: + raise Exception(f"scale is smaller than {cls._min_scale}") + + if scale not in cls._prebuilt_mappings.keys(): + cls._prebuilt_mappings[scale] = super().__new__(cls) + + with cls._prebuilt_mappings_lock: + if scale in cls._prebuilt_mappings: + cls._prebuilt_mappings[scale] = super().__new__(cls) + + return cls._prebuilt_mappings[scale] + + def __init__(self, scale: int): + + super().__init__(scale) + + if scale < self._min_scale or scale > self._max_scale: + raise Exception("Scale is out of bounds") + + # FIXME calculate the right value for self._max_index + self._max_index = 1 + # FIXME calculate the right value for self._min_index + self._min_index = 1 + # FIXME calculate the right value for self._scale_factor + + ln_2 = log(2) + + # scaleFactor is used and computed as follows: + # index = log(value) / log(base) + # = log(value) / log(2^(2^-scale)) + # = log(value) / (2^-scale * log(2)) + # = log(value) * (1/log(2) * 2^scale) + # = log(value) * scaleFactor + # where: + # scaleFactor = (1/log(2) * 2^scale) + # = math.Log2E * math.Exp2(scale) + # = math.Ldexp(math.Log2E, scale) + # Because multiplication is faster than division, we define scaleFactor as a multiplier. + # This implementation was copied from a Java prototype. See: + # https://github.com/newrelic-experimental/newrelic-sketch-java/blob/1ce245713603d61ba3a4510f6df930a5479cd3f6/src/main/java/com/newrelic/nrsketch/indexer/LogIndexer.java + # for the equations used here. + self._scale_factor = ldexp(1 / ln_2, scale) + + # log(boundary) = index * log(base) + # log(boundary) = index * log(2^(2^-scale)) + # log(boundary) = index * 2^-scale * log(2) + # boundary = exp(index * inverseFactor) + # where: + # inverseFactor = 2^-scale * log(2) + # = math.Ldexp(math.Ln2, -scale) + self._inverse_factor = ldexp(ln_2, -scale) + + # scale is between MinScale and MaxScale. The exponential + # base is defined as 2**(2**(-scale)). + self._scale = scale + + def _min_normal_lower_boundary_index(self) -> int: + + """ + minNormalLowerBoundaryIndex is the index such that base**index equals + MinValue. A histogram bucket with this index covers the range + (MinValue, MinValue*base]. One less than this index corresponds + with the bucket containing values <= MinValue. + """ + return MIN_NORMAL_EXPONENT << self._scale + + def _max_normal_lower_boundary_index(self) -> int: + + """ + maxNormalLowerBoundaryIndex is the index such that base**index equals the + greatest representable lower boundary. A histogram bucket with this + index covers the range (0x1p+1024/base, 0x1p+1024], which includes + MaxValue; note that this bucket is incomplete, since the upper + boundary cannot be represented. One greater than this index + corresponds with the bucket containing values > 0x1p1024. + """ + return ((MAX_NORMAL_EXPONENT + 1) << self._scale) - 1 + + def map_to_index(self, value: float) -> int: + """ + MapToIndex maps positive floating point values to indexes + corresponding to Scale(). Implementations are not expected + to handle zeros, +Inf, NaN, or negative values. + """ + + # Note: we can assume not a 0, Inf, or NaN; positive sign bit. + if value <= MIN_NORMAL_VALUE: + return self._min_normal_lower_boundary_index() - 1 + + # Exact power-of-two correctness: an optional special case. + if get_ieee_754_significand(value) == 0: + exponent = get_ieee_754_exponent(value) + return (exponent << self._scale) - 1 + + # Non-power of two cases. Use Floor(x) to round the scaled + # logarithm. We could use Ceil(x)-1 to achieve the same + # result, though Ceil() is typically defined as -Floor(-x) + # and typically not performed in hardware, so this is likely + # less code. + index = floor(log(value) * self._scale_factor) + + max_ = self._max_normal_lower_boundary_index() + + if index >= max_: + return max_ + return index + + def get_lower_boundary(self, index: int) -> float: + """ + LowerBoundary returns the lower boundary of a given bucket + index. The index is expected to map onto a range that is + at least partially inside the range of normalized floating + point values. If the corresponding bucket's upper boundary + is less than or equal to 0x1p-1022, UnderflowError will be + raised. If the corresponding bucket's lower boundary is + greater than math.MaxFloat64, OverflowError will be raised. + """ + + # LowerBoundary implements mapping.Mapping. + max_ = self._max_normal_lower_boundary_index() + + if index >= max_: + if index == max_: + # Note that the equation on the last line of this + # function returns +Inf. Use the alternate equation. + return 2 * exp( + index - (1 << self._scale) * self._inverse_factor + ) + raise ExponentialMappingOverflowError() + min_ = self._min_normal_lower_boundary_index() + if index <= min_: + if index == min_: + return MIN_NORMAL_VALUE + if index == min_ - 1: + # Similar to the logic above, the math.Exp() + # formulation is not accurate for subnormal + # values. + return ( + exp(index + (1 << self._scale) * self._inverse_factor) / 2 + ) + raise ExponentialMappingUnderflowError() + return exp(index * self._inverse_factor) + + @property + def scale(self) -> int: + """ + Scale returns the parameter that controls the resolution of + this mapping. For details see: + https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/datamodel.md#exponential-scale + """ + + return self._scale diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/point.py b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/point.py index b4d813accaf..786357a3ab4 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/point.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/point.py @@ -4,7 +4,7 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http: #www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -60,6 +60,128 @@ def to_json(self, indent=4) -> str: return dumps(asdict(self), indent=indent) +@dataclass(frozen=True) +class Buckets: + # Buckets are a set of bucket counts, encoded in a contiguous array + # of counts. + + # Offset is the bucket index of the first entry in the bucket_counts array. + # + # Note: This uses a varint encoding as a simple form of compression. + offset: int + + # Count is an array of counts, where count[i] carries the count + # of the bucket at index (offset+i). count[i] is the count of + # values greater than base^(offset+i) and less or equal to than + # base^(offset+i+1). + # + # Note: By contrast, the explicit HistogramDataPoint uses + # fixed64. This field is expected to have many buckets, + # especially zeros, so uint64 has been selected to ensure + # varint encoding. + bucket_counts: Sequence[int] + + +@dataclass(frozen=True) +class ExponentialHistogramDataPoint: + """Single data point in a timeseries whose boundaries are defined by an + exponential function. This timeseries describes the time-varying scalar + value of a metric. + """ + + # The set of key/value pairs that uniquely identify the timeseries from + # where this point belongs. The list may be empty (may contain 0 elements). + # Attribute keys MUST be unique (it is not allowed to have more than one + # attribute with the same key). + attributes: Attributes + + # StartTimeUnixNano is optional but strongly encouraged, see the + # the detailed comments above Metric. + # + # Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + # 1970. + start_time_unix_nano: int + + # TimeUnixNano is required, see the detailed comments above Metric. + # + # Value is UNIX Epoch time in nanoseconds since 00:00:00 UTC on 1 January + # 1970. + time_unix_nano: int + + # count is the number of values in the population. Must be + # non-negative. This value must be equal to the sum of the "bucket_counts" + # values in the positive and negative Buckets plus the "zero_count" field. + count: int + + # sum of the values in the population. If count is zero then this field + # must be zero. + # + # Note: Sum should only be filled out when measuring non-negative discrete + # events, and is assumed to be monotonic over the values of these events. + # Negative events *can* be recorded, but sum should not be filled out when + # doing so. This is specifically to enforce compatibility w/ OpenMetrics, + # see: https: #github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#histogram + sum: Union[int, float] + + # scale describes the resolution of the histogram. Boundaries are + # located at powers of the base, where: + # + # base = (2^(2^-scale)) + # + # The histogram bucket identified by `index`, a signed integer, + # contains values that are greater than (base^index) and + # less than or equal to (base^(index+1)). + # + # The positive and negative ranges of the histogram are expressed + # separately. Negative values are mapped by their absolute value + # into the negative range using the same scale as the positive range. + # + # scale is not restricted by the protocol, as the permissible + # values depend on the range of the data. + scale: int + + # zero_count is the count of values that are either exactly zero or + # within the region considered zero by the instrumentation at the + # tolerated degree of precision. This bucket stores values that + # cannot be expressed using the standard exponential formula as + # well as values that have been rounded to zero. + # + # Implementations MAY consider the zero bucket to have probability + # mass equal to (zero_count / count). + zero_count: int + + # positive carries the positive range of exponential bucket counts. + positive: Buckets + + # negative carries the negative range of exponential bucket counts. + negative: Buckets + + # Flags that apply to this specific data point. See DataPointFlags + # for the available flags and their meaning. + flags: int + + # min is the minimum value over (start_time, end_time]. + min: float + + # max is the maximum value over (start_time, end_time]. + max: float + + def to_json(self, indent=4) -> str: + return dumps(asdict(self), indent=indent) + + +@dataclass(frozen=True) +class ExponentialHistogram: + """Represents the type of a metric that is calculated by aggregating as an + ExponentialHistogram of all reported measurements over a time interval. + """ + + data_points: Sequence[ExponentialHistogramDataPoint] + aggregation_temporality: ( + "opentelemetry.sdk.metrics.export.AggregationTemporality" + ) + + @dataclass(frozen=True) class Sum: """Represents the type of a scalar metric that is calculated as a sum of diff --git a/opentelemetry-sdk/tests/metrics/exponential/test_exponent_mapping.py b/opentelemetry-sdk/tests/metrics/exponential/test_exponent_mapping.py new file mode 100644 index 00000000000..f559166ca02 --- /dev/null +++ b/opentelemetry-sdk/tests/metrics/exponential/test_exponent_mapping.py @@ -0,0 +1,99 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import TestCase + +from opentelemetry.sdk.metrics._internal.exponential.exponent import ( + ExponentMapping, +) +from opentelemetry.sdk.metrics._internal.exponential.float64 import ( + MAX_NORMAL_EXPONENT, + MAX_NORMAL_VALUE, + MIN_NORMAL_EXPONENT, +) + + +class TestExponentMapping(TestCase): + def test_singleton(self): + + self.assertIs(ExponentMapping(-3), ExponentMapping(-3)) + self.assertIsNot(ExponentMapping(-3), ExponentMapping(-5)) + + def test_exponent_mapping_0(self): + + try: + ExponentMapping(0) + + except Exception as error: + self.fail(f"Unexpected exception raised: {error}") + + def test_map_to_index(self): + + exponent_mapping = ExponentMapping(0) + + # This is the equivalent to 1.1 in hexadecimal + hex_1_1 = 1 + (1 / 16) + + # Testing with values near +infinite + self.assertEqual( + exponent_mapping.map_to_index(MAX_NORMAL_VALUE), + MAX_NORMAL_EXPONENT, + ) + self.assertEqual(exponent_mapping.map_to_index(MAX_NORMAL_VALUE), 1023) + self.assertEqual(exponent_mapping.map_to_index(2**1023), 1022) + self.assertEqual(exponent_mapping.map_to_index(2**1022), 1021) + self.assertEqual( + exponent_mapping.map_to_index(hex_1_1 * (2**1023)), 1023 + ) + self.assertEqual( + exponent_mapping.map_to_index(hex_1_1 * (2**1022)), 1022 + ) + + # Testing with values near 1 + self.assertEqual(exponent_mapping.map_to_index(4), 1) + self.assertEqual(exponent_mapping.map_to_index(3), 1) + self.assertEqual(exponent_mapping.map_to_index(2), 0) + self.assertEqual(exponent_mapping.map_to_index(1), -1) + self.assertEqual(exponent_mapping.map_to_index(0.75), -1) + self.assertEqual(exponent_mapping.map_to_index(0.51), -1) + self.assertEqual(exponent_mapping.map_to_index(0.5), -2) + self.assertEqual(exponent_mapping.map_to_index(0.26), -2) + self.assertEqual(exponent_mapping.map_to_index(0.25), -3) + self.assertEqual(exponent_mapping.map_to_index(0.126), -3) + self.assertEqual(exponent_mapping.map_to_index(0.125), -4) + + # Testing with values near 0 + self.assertEqual(exponent_mapping.map_to_index(2**-1022), -1023) + self.assertEqual( + exponent_mapping.map_to_index(hex_1_1 * (2**-1022)), -1022 + ) + self.assertEqual(exponent_mapping.map_to_index(2**-1021), -1022) + self.assertEqual( + exponent_mapping.map_to_index(hex_1_1 * (2**-1021)), -1021 + ) + self.assertEqual( + exponent_mapping.map_to_index(2**-1022), MIN_NORMAL_EXPONENT - 1 + ) + self.assertEqual( + exponent_mapping.map_to_index(2**-1021), MIN_NORMAL_EXPONENT + ) + # The smallest subnormal value in Pyhton is 2 ** -1074 = 5e-324. + # This value is also the result of: + # s = 1 + # while s / 2: + # s = s / 2 + # s == 5e-324 + self.assertEqual( + exponent_mapping.map_to_index(2**-1074), MIN_NORMAL_EXPONENT - 1 + ) diff --git a/opentelemetry-sdk/tests/metrics/exponential/test_exponential.py b/opentelemetry-sdk/tests/metrics/exponential/test_exponential.py new file mode 100644 index 00000000000..d8039137e0e --- /dev/null +++ b/opentelemetry-sdk/tests/metrics/exponential/test_exponential.py @@ -0,0 +1,92 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import TestCase + +from opentelemetry.sdk.metrics._internal.exponential.float64 import ( + get_ieee_754_64_binary, +) + + +class TestExponential(TestCase): + def test_get_ieee_754_64_binary(self): + """ + Bit 0: 1 for negative values, 0 for positive values + Bits 1 - 11: exponent, substract 1023 from it to get the actual value + Bits 12 - 63: mantissa, a leading 1 is implicit + """ + + # 0 + # 10000000001 == 1025 -> 1025 - 1023 == 2 + # 0000000000000000000000000000000000000000000000000000 + + result = get_ieee_754_64_binary(4.0) + + self.assertEqual(result["sign"], "0") + self.assertEqual(result["exponent"], "10000000001") + self.assertEqual( + result["mantissa"], + "0000000000000000000000000000000000000000000000000000", + ) + self.assertEqual(result["decimal"], "4") + + result = get_ieee_754_64_binary(4.5) + + self.assertEqual(result["sign"], "0") + self.assertEqual(result["exponent"], "10000000001") + self.assertEqual( + result["mantissa"], + "0010000000000000000000000000000000000000000000000000", + ) + self.assertEqual(result["decimal"], "4.500") + + result = get_ieee_754_64_binary(-4.5) + + self.assertEqual(result["sign"], "1") + self.assertEqual(result["exponent"], "10000000001") + self.assertEqual( + result["mantissa"], + "0010000000000000000000000000000000000000000000000000", + ) + self.assertEqual(result["decimal"], "-4.500") + + result = get_ieee_754_64_binary(0.0) + + self.assertEqual(result["sign"], "0") + self.assertEqual(result["exponent"], "00000000000") + self.assertEqual( + result["mantissa"], + "0000000000000000000000000000000000000000000000000000", + ) + self.assertEqual(result["decimal"], "0") + + result = get_ieee_754_64_binary(-0.0) + + self.assertEqual(result["sign"], "1") + self.assertEqual(result["exponent"], "00000000000") + self.assertEqual( + result["mantissa"], + "0000000000000000000000000000000000000000000000000000", + ) + self.assertEqual(result["decimal"], "-0") + + result = get_ieee_754_64_binary(4.3) + + self.assertEqual(result["sign"], "0") + self.assertEqual(result["exponent"], "10000000001") + self.assertEqual( + result["mantissa"], + "0001001100110011001100110011001100110011001100110011", + ) + self.assertEqual(result["decimal"], "4.299999999999999822364316064") diff --git a/opentelemetry-sdk/tests/metrics/exponential/test_exponential_aggregation.py b/opentelemetry-sdk/tests/metrics/exponential/test_exponential_aggregation.py new file mode 100644 index 00000000000..131db88cdb7 --- /dev/null +++ b/opentelemetry-sdk/tests/metrics/exponential/test_exponential_aggregation.py @@ -0,0 +1,818 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from itertools import permutations +from unittest import TestCase +from unittest.mock import Mock +from typing import Sequence +from random import seed, random +from sys import float_info + +from opentelemetry.sdk.metrics._internal.exponential.aggregation import ( + _ExponentialBucketHistogramAggregation, +) +from opentelemetry.sdk.metrics._internal.exponential.buckets import Buckets +from opentelemetry.sdk.metrics._internal.exponential.exponent import ( + ExponentMapping, +) +from opentelemetry.sdk.metrics._internal.exponential.exponential_histogram_mapping import ( + ExponentialHistogramMapping, +) +from opentelemetry.sdk.metrics._internal.exponential.logarithm_mapping import ( + LogarithmExponentialHistogramMapping, +) +from opentelemetry.sdk.metrics._internal.measurement import Measurement + + +def get_counts(buckets: Buckets) -> int: + + counts = [] + + for index in range(buckets.len()): + counts.append(buckets.at(index)) + + return counts + + +def center_val(mapping: ExponentialHistogramMapping, index: int) -> float: + return ( + mapping.get_lower_boundary(index) + + mapping.get_lower_boundary(index + 1) + ) / 2 + + +class TestAggregation(TestCase): + def require_equal(self, a, b): + + if a._sum == 0 or b._sum == 0: + self.assertAlmostEqual(a._sum, b._sum, 1e-6) + else: + self.assertLessEqual(a._sum, (b._sum * (1 + 1e-6))) + self.assertGreaterEqual(a._sum, (b._sum * (1 - 1e-6))) + + self.assertEqual(a._count, b._count) + self.assertEqual(a._zero_count, b._zero_count) + self.assertEqual(a._scale, b._scale) + + self.assertEqual(a._positive.len(), b._positive.len()) + self.assertEqual(a._negative.len(), b._negative.len()) + + for index in range(a._positive.len()): + self.assertEqual(a._positive.at(index), b._positive.at(index)) + + for index in range(a._negative.len()): + self.assertEqual(a._negative.at(index), b._negative.at(index)) + + def test_alternating_growth_0(self): + """ + Tests insertion of [2, 4, 1]. The index of 2 (i.e., 0) becomes + `indexBase`, the 4 goes to its right and the 1 goes in the last + position of the backing array. With 3 binary orders of magnitude + and MaxSize=4, this must finish with scale=0; with minimum value 1 + this must finish with offset=-1 (all scales). + + """ + + # The corresponding Go test is TestAlternatingGrowth1 where: + # agg := NewFloat64(NewConfig(WithMaxSize(4))) + # agg is an instance of github.com/lightstep/otel-launcher-go/lightstep/sdk/metric/aggregator/histogram/structure.Histogram[float64] + + exponential_histogram_aggregation = ( + _ExponentialBucketHistogramAggregation(Mock(), Mock(), max_size=4) + ) + + exponential_histogram_aggregation.aggregate(Measurement(2, Mock())) + exponential_histogram_aggregation.aggregate(Measurement(4, Mock())) + exponential_histogram_aggregation.aggregate(Measurement(1, Mock())) + + self.assertEqual( + exponential_histogram_aggregation._positive.offset(), -1 + ) + self.assertEqual(exponential_histogram_aggregation._scale, 0) + self.assertEqual( + get_counts(exponential_histogram_aggregation._positive), [1, 1, 1] + ) + + def test_alternating_growth_1(self): + """ + Tests insertion of [2, 2, 4, 1, 8, 0.5]. The test proceeds as¶ + above but then downscales once further to scale=-1, thus index -1¶ + holds range [0.25, 1.0), index 0 holds range [1.0, 4), index 1¶ + holds range [4, 16).¶ + """ + + exponential_histogram_aggregation = ( + _ExponentialBucketHistogramAggregation(Mock(), Mock(), max_size=4) + ) + + exponential_histogram_aggregation.aggregate(Measurement(2, Mock())) + exponential_histogram_aggregation.aggregate(Measurement(2, Mock())) + exponential_histogram_aggregation.aggregate(Measurement(2, Mock())) + exponential_histogram_aggregation.aggregate(Measurement(1, Mock())) + exponential_histogram_aggregation.aggregate(Measurement(8, Mock())) + exponential_histogram_aggregation.aggregate(Measurement(0.5, Mock())) + + self.assertEqual( + exponential_histogram_aggregation._positive.offset(), -1 + ) + self.assertEqual(exponential_histogram_aggregation._scale, -1) + self.assertEqual( + get_counts(exponential_histogram_aggregation._positive), [2, 3, 1] + ) + + def test_permutations(self): + """ + Tests that every permutation of certain sequences with maxSize=2 + results¶ in the same scale=-1 histogram. + """ + + for test_values, expected in [ + [ + [0.5, 1.0, 2.0], + { + "scale": -1, + "offset": -1, + "len": 2, + "at_0": 2, + "at_1": 1, + }, + ], + [ + [1.0, 2.0, 4.0], + { + "scale": -1, + "offset": -1, + "len": 2, + "at_0": 1, + "at_1": 2, + }, + ], + [ + [0.25, 0.5, 1], + { + "scale": -1, + "offset": -2, + "len": 2, + "at_0": 1, + "at_1": 2, + }, + ], + ]: + + for permutation in permutations(test_values): + + exponential_histogram_aggregation = ( + _ExponentialBucketHistogramAggregation( + Mock(), Mock(), max_size=2 + ) + ) + + for value in permutation: + + exponential_histogram_aggregation.aggregate( + Measurement(value, Mock()) + ) + + self.assertEqual( + exponential_histogram_aggregation._scale, expected["scale"] + ) + self.assertEqual( + exponential_histogram_aggregation._positive.offset(), + expected["offset"], + ) + self.assertEqual( + exponential_histogram_aggregation._positive.len(), + expected["len"], + ) + self.assertEqual( + exponential_histogram_aggregation._positive.at(0), + expected["at_0"], + ) + self.assertEqual( + exponential_histogram_aggregation._positive.at(1), + expected["at_1"], + ) + + def test_ascending_sequence(self): + + for max_size in [3, 4, 6, 9]: + for offset in range(-5, 6): + for init_scale in [0, 4]: + self.ascending_sequence_test(max_size, offset, init_scale) + + def ascending_sequence_test( + self, max_size: int, offset: int, init_scale: int + ): + + for step in range(max_size, max_size * 4): + + exponential_histogram_aggregation = ( + _ExponentialBucketHistogramAggregation( + Mock(), Mock(), max_size=max_size + ) + ) + + if init_scale <= 0: + mapping = ExponentMapping(init_scale) + else: + mapping = LogarithmExponentialHistogramMapping(init_scale) + + min_val = center_val(mapping, offset) + max_val = center_val(mapping, offset + step) + + sum_ = 0.0 + + for index in range(max_size): + value = center_val(mapping, offset + index) + exponential_histogram_aggregation.aggregate( + Measurement(value, Mock()) + ) + sum_ += value + + self.assertEqual( + init_scale, exponential_histogram_aggregation._scale + ) + self.assertEqual( + offset, exponential_histogram_aggregation._positive.offset() + ) + + exponential_histogram_aggregation.aggregate( + Measurement(max_val, Mock()) + ) + sum_ += max_val + + self.assertNotEqual( + 0, exponential_histogram_aggregation._positive.at(0) + ) + + # The maximum-index filled bucket is at or + # above the mid-point, (otherwise we + # downscaled too much). + + max_fill = 0 + total_count = 0 + + for index in range( + exponential_histogram_aggregation._positive.len() + ): + total_count += exponential_histogram_aggregation._positive.at( + index + ) + if exponential_histogram_aggregation._positive.at(index) != 0: + max_fill = index + + # FIXME the corresponding Go code is + # require.GreaterOrEqual(t, maxFill, uint32(maxSize)/2), make sure + # this is actually equivalent. + self.assertGreaterEqual(max_fill, int(max_size / 2)) + + self.assertGreaterEqual(max_size + 1, total_count) + self.assertGreaterEqual( + max_size + 1, exponential_histogram_aggregation._count + ) + self.assertGreaterEqual( + sum_, exponential_histogram_aggregation._sum + ) + + if init_scale <= 0: + mapping = ExponentMapping( + exponential_histogram_aggregation._scale + ) + else: + mapping = LogarithmExponentialHistogramMapping( + exponential_histogram_aggregation._scale + ) + index = mapping.map_to_index(min_val) + + self.assertEqual( + index, exponential_histogram_aggregation._positive.offset() + ) + + index = mapping.map_to_index(max_val) + + self.assertEqual( + index, + exponential_histogram_aggregation._positive.offset() + + exponential_histogram_aggregation._positive.len() + - 1, + ) + + def test_merge_simple_event(self): + + exponential_histogram_aggregation_0 = ( + _ExponentialBucketHistogramAggregation(Mock(), Mock(), max_size=4) + ) + exponential_histogram_aggregation_1 = ( + _ExponentialBucketHistogramAggregation(Mock(), Mock(), max_size=4) + ) + exponential_histogram_aggregation_2 = ( + _ExponentialBucketHistogramAggregation(Mock(), Mock(), max_size=4) + ) + exponential_histogram_aggregation_2._flag = True + + for index in range(4): + value_0 = 2 << index + value_1 = 1 / (1 << index) + + exponential_histogram_aggregation_0.aggregate( + Measurement(value_0, Mock()) + ) + exponential_histogram_aggregation_1.aggregate( + Measurement(value_1, Mock()) + ) + exponential_histogram_aggregation_2.aggregate( + Measurement(value_0, Mock()) + ) + exponential_histogram_aggregation_2.aggregate( + Measurement(value_1, Mock()) + ) + + self.assertEqual(0, exponential_histogram_aggregation_0._scale) + self.assertEqual(0, exponential_histogram_aggregation_1._scale) + self.assertEqual(-1, exponential_histogram_aggregation_2._scale) + + self.assertEqual( + 0, exponential_histogram_aggregation_0._positive.offset() + ) + self.assertEqual( + -4, exponential_histogram_aggregation_1._positive.offset() + ) + self.assertEqual( + -2, exponential_histogram_aggregation_2._positive.offset() + ) + + self.assertEqual( + [1, 1, 1, 1], + get_counts(exponential_histogram_aggregation_0._positive) + ) + self.assertEqual( + [1, 1, 1, 1], + get_counts(exponential_histogram_aggregation_1._positive) + ) + self.assertEqual( + [2, 2, 2, 2], + get_counts(exponential_histogram_aggregation_2._positive) + ) + + exponential_histogram_aggregation_0._merge_from( + exponential_histogram_aggregation_1 + ) + + self.assertEqual(-1, exponential_histogram_aggregation_0._scale) + self.assertEqual(-1, exponential_histogram_aggregation_2._scale) + + self.require_equal( + exponential_histogram_aggregation_0, + exponential_histogram_aggregation_2 + ) + + def test_merge_simple_odd(self): + + exponential_histogram_aggregation_0 = ( + _ExponentialBucketHistogramAggregation(Mock(), Mock(), max_size=4) + ) + exponential_histogram_aggregation_1 = ( + _ExponentialBucketHistogramAggregation(Mock(), Mock(), max_size=4) + ) + exponential_histogram_aggregation_2 = ( + _ExponentialBucketHistogramAggregation(Mock(), Mock(), max_size=4) + ) + exponential_histogram_aggregation_2._flag = True + + for index in range(4): + value_0 = 2 << index + value_1 = 2 / (1 << index) + + exponential_histogram_aggregation_0.aggregate( + Measurement(value_0, Mock()) + ) + exponential_histogram_aggregation_1.aggregate( + Measurement(value_1, Mock()) + ) + exponential_histogram_aggregation_2.aggregate( + Measurement(value_0, Mock()) + ) + exponential_histogram_aggregation_2.aggregate( + Measurement(value_1, Mock()) + ) + + self.assertEqual(4, exponential_histogram_aggregation_0._count) + self.assertEqual(4, exponential_histogram_aggregation_1._count) + self.assertEqual(8, exponential_histogram_aggregation_2._count) + + self.assertEqual(0, exponential_histogram_aggregation_0._scale) + self.assertEqual(0, exponential_histogram_aggregation_1._scale) + self.assertEqual(-1, exponential_histogram_aggregation_2._scale) + + self.assertEqual( + 0, exponential_histogram_aggregation_0._positive.offset() + ) + self.assertEqual( + -3, exponential_histogram_aggregation_1._positive.offset() + ) + self.assertEqual( + -2, exponential_histogram_aggregation_2._positive.offset() + ) + + self.assertEqual( + [1, 1, 1, 1], + get_counts(exponential_histogram_aggregation_0._positive) + ) + self.assertEqual( + [1, 1, 1, 1], + get_counts(exponential_histogram_aggregation_1._positive) + ) + self.assertEqual( + [1, 2, 3, 2], + get_counts(exponential_histogram_aggregation_2._positive) + ) + + exponential_histogram_aggregation_0._merge_from( + exponential_histogram_aggregation_1 + ) + + self.assertEqual(-1, exponential_histogram_aggregation_0._scale) + self.assertEqual(-1, exponential_histogram_aggregation_2._scale) + + self.require_equal( + exponential_histogram_aggregation_0, + exponential_histogram_aggregation_2 + ) + + def test_merge_exhaustive(self): + + factor = 1024.0 + count = 16 + + means = [0.0, factor] + stddevs = [1.0, factor] + + for mean in means: + for stddev in stddevs: + seed(77777677777) + + values = [] + + for _ in range(count): + # FIXME random() is not equivalent to the corresponding + # function in the Go implementation. + values.append(mean + random() * stddev) + + for partition in range(1, count): + + for size in [2, 6, 8, 9, 16]: + for incr in [ + int(1), + int(0x100), + int(0x10000), + int(0x100000000), + ]: + self._test_merge_exhaustive( + values[0:partition], + values[partition:count], + size, + incr + ) + + def _test_merge_exhaustive( + self, + values_0: Sequence[float], + values_1: Sequence[float], + size: int, + incr: int + ): + + exponential_histogram_aggregation_0 = ( + _ExponentialBucketHistogramAggregation( + Mock(), Mock(), max_size=size + ) + ) + exponential_histogram_aggregation_1 = ( + _ExponentialBucketHistogramAggregation( + Mock(), Mock(), max_size=size + ) + ) + exponential_histogram_aggregation_2 = ( + _ExponentialBucketHistogramAggregation( + Mock(), Mock(), max_size=size + ) + ) + + for value_0 in values_0: + exponential_histogram_aggregation_0._update_by_incr(value_0, incr) + exponential_histogram_aggregation_2._update_by_incr(value_0, incr) + + for value_1 in values_1: + exponential_histogram_aggregation_1._update_by_incr(value_1, incr) + exponential_histogram_aggregation_2._update_by_incr(value_1, incr) + + exponential_histogram_aggregation_0._merge_from( + exponential_histogram_aggregation_1 + ) + + self.require_equal( + exponential_histogram_aggregation_2, + exponential_histogram_aggregation_0 + ) + + def test_integer_aggregation(self): + exponential_histogram_aggregation_0 = ( + _ExponentialBucketHistogramAggregation( + Mock(), Mock(), max_size=256 + ) + ) + exponential_histogram_aggregation_1 = ( + _ExponentialBucketHistogramAggregation( + Mock(), Mock(), max_size=256 + ) + ) + + expect = 0 + for index in range(2, 257): + expect += index + exponential_histogram_aggregation_0.aggregate( + Measurement(index, Mock()) + ) + exponential_histogram_aggregation_1.aggregate( + Measurement(index, Mock()) + ) + + self.assertEqual(expect, exponential_histogram_aggregation_0._sum) + self.assertEqual(255, exponential_histogram_aggregation_0._count) + + # Scale should be 5. The upper power-of-two is 256 == 2 ** 8. The + # exponential base 2 ** (2 ** -5) raised to the 256th power should be + # 256: + # 2 ** ((2 ** -5) * 256) = + # 2 ** ((2 ** -5) * (2 ** 8)) = + # 2 ** (2 ** 3) = + # 2 ** 8 + + scale = exponential_histogram_aggregation_0._scale + self.assertEqual(5, scale) + + def expect_0(buckets: Buckets): + self.assertEqual(0, buckets.len()) + + def expect_256(buckets: Buckets, factor: int): + # The minimum value 2 has index (1 << scale) - 1, which determines + # the length and the offset: + + self.assertEqual(256 - ((1 << scale) - 1), buckets.len()) + self.assertEqual((1 << scale) - 1, buckets.offset()) + + for index in range(256): + self.assertLessEqual(buckets.at(index), int(6 * factor)) + + expect_256(exponential_histogram_aggregation_0._positive, 1) + expect_0(exponential_histogram_aggregation_0._negative) + + exponential_histogram_aggregation_0._merge_from( + exponential_histogram_aggregation_1 + ) + expect_256(exponential_histogram_aggregation_0._positive, 2) + + self.assertEqual(2 * expect, exponential_histogram_aggregation_0._sum) + + exponential_histogram_aggregation_0._clear() + exponential_histogram_aggregation_1._clear() + + expect = 0 + + for index in range(2, 257): + expect -= index + + exponential_histogram_aggregation_0.aggregate( + Measurement(-index, Mock()) + ) + exponential_histogram_aggregation_1.aggregate( + Measurement(-index, Mock()) + ) + + self.assertEqual(expect, exponential_histogram_aggregation_0._sum) + self.assertEqual(255, exponential_histogram_aggregation_0._count) + + expect_256(exponential_histogram_aggregation_0._negative, 1) + expect_0(exponential_histogram_aggregation_0._positive) + + exponential_histogram_aggregation_0._merge_from( + exponential_histogram_aggregation_1 + ) + + expect_256(exponential_histogram_aggregation_0._negative, 2) + + self.assertEqual(2 * expect, exponential_histogram_aggregation_0._sum) + self.assertEqual(5, exponential_histogram_aggregation_0._scale) + + def test_reset(self): + + exponential_histogram_aggregation = ( + _ExponentialBucketHistogramAggregation( + Mock(), Mock(), max_size=256 + ) + ) + + for incr in [0x1, 0x100, 0x10000, 0x100000000, 0x200000000]: + exponential_histogram_aggregation._clear() + + self.assertEqual(0, exponential_histogram_aggregation._scale) + expect = 0 + + for index in range(2, 257): + expect += index * incr + exponential_histogram_aggregation._update_by_incr(index, incr) + + self.assertEqual(expect, exponential_histogram_aggregation._sum) + self.assertEqual( + 255 * incr, exponential_histogram_aggregation._count + ) + + # See test_integer_aggregation about why scale is 5, len is + # 256 - (1 << scale)- 1 and offset is (1 << scale) - 1. + scale = exponential_histogram_aggregation._scale + self.assertEqual(5, scale) + + self.assertEqual( + 256 - ((1 << scale) - 1), + exponential_histogram_aggregation._positive.len() + ) + self.assertEqual( + (1 << scale) - 1, + exponential_histogram_aggregation._positive.offset() + ) + + for index in range(0, 256): + self.assertLessEqual( + exponential_histogram_aggregation._positive.at(index), + 6 * incr + ) + + def test_move_into(self): + + exponential_histogram_aggregation_0 = ( + _ExponentialBucketHistogramAggregation( + Mock(), Mock(), max_size=256 + ) + ) + exponential_histogram_aggregation_1 = ( + _ExponentialBucketHistogramAggregation( + Mock(), Mock(), max_size=256 + ) + ) + + expect = 0 + + for index in range(2, 257): + expect += index + exponential_histogram_aggregation_0.aggregate( + Measurement(index, Mock()) + ) + exponential_histogram_aggregation_0.aggregate( + Measurement(0, Mock()) + ) + + exponential_histogram_aggregation_0._swap( + exponential_histogram_aggregation_1 + ) + + self.assertEqual(0, exponential_histogram_aggregation_0._sum) + self.assertEqual(0, exponential_histogram_aggregation_0._count) + self.assertEqual(0, exponential_histogram_aggregation_0._zero_count) + self.assertEqual(0, exponential_histogram_aggregation_0._scale) + + self.assertEqual(expect, exponential_histogram_aggregation_1._sum) + self.assertEqual(255 * 2, exponential_histogram_aggregation_1._count) + self.assertEqual(255, exponential_histogram_aggregation_1._zero_count) + + scale = exponential_histogram_aggregation_1._scale + self.assertEqual(5, scale) + + self.assertEqual( + 256 - ((1 << scale) - 1), + exponential_histogram_aggregation_1._positive.len() + ) + self.assertEqual( + (1 << scale) - 1, + exponential_histogram_aggregation_1._positive.offset() + ) + + for index in range(0, 256): + self.assertLessEqual( + exponential_histogram_aggregation_1._positive.at(index), + 6 + ) + + def test_very_large_numbers(self): + + exponential_histogram_aggregation = ( + _ExponentialBucketHistogramAggregation( + Mock(), Mock(), max_size=2 + ) + ) + + def expect_balanced(count: int): + self.assertEqual( + 2, + exponential_histogram_aggregation._positive.len() + ) + self.assertEqual( + -1, + exponential_histogram_aggregation._positive.offset() + ) + self.assertEqual( + count, + exponential_histogram_aggregation._positive.at(0) + ) + self.assertEqual( + count, + exponential_histogram_aggregation._positive.at(1) + ) + + exponential_histogram_aggregation.aggregate( + Measurement(2 ** -100, Mock()) + ) + exponential_histogram_aggregation.aggregate( + Measurement(2 ** 100, Mock()) + ) + + self.assertLessEqual( + 2 ** 100, (exponential_histogram_aggregation._sum * (1 + 1e-5)) + ) + self.assertGreaterEqual( + 2 ** 100, (exponential_histogram_aggregation._sum * (1 - 1e-5)) + ) + + self.assertEqual(2, exponential_histogram_aggregation._count) + self.assertEqual(-7, exponential_histogram_aggregation._scale) + + expect_balanced(1) + + exponential_histogram_aggregation.aggregate( + Measurement(2 ** -127, Mock()) + ) + exponential_histogram_aggregation.aggregate( + Measurement(2 ** 128, Mock()) + ) + + self.assertLessEqual( + 2 ** 128, (exponential_histogram_aggregation._sum * (1 + 1e-5)) + ) + self.assertGreaterEqual( + 2 ** 128, (exponential_histogram_aggregation._sum * (1 - 1e-5)) + ) + + self.assertEqual(4, exponential_histogram_aggregation._count) + self.assertEqual(-7, exponential_histogram_aggregation._scale) + + expect_balanced(2) + + exponential_histogram_aggregation.aggregate( + Measurement(2 ** -129, Mock()) + ) + exponential_histogram_aggregation.aggregate( + Measurement(2 ** 255, Mock()) + ) + + self.assertLessEqual( + 2 ** 255, (exponential_histogram_aggregation._sum * (1 + 1e-5)) + ) + self.assertGreaterEqual( + 2 ** 255, (exponential_histogram_aggregation._sum * (1 - 1e-5)) + ) + self.assertEqual(6, exponential_histogram_aggregation._count) + self.assertEqual(-8, exponential_histogram_aggregation._scale) + + expect_balanced(3) + + def test_full_range(self): + + exponential_histogram_aggregation = ( + _ExponentialBucketHistogramAggregation( + Mock(), Mock(), max_size=1 + ) + ) + + exponential_histogram_aggregation.aggregate( + Measurement(float_info.max, Mock()) + ) + exponential_histogram_aggregation.aggregate( + Measurement(1, Mock()) + ) + exponential_histogram_aggregation.aggregate( + Measurement(2 ** -1074, Mock()) + ) + + self.assertEqual( + float_info.max, exponential_histogram_aggregation._sum + ) diff --git a/opentelemetry-sdk/tests/metrics/exponential/test_histogram.py b/opentelemetry-sdk/tests/metrics/exponential/test_histogram.py new file mode 100644 index 00000000000..1c0f1ab0e00 --- /dev/null +++ b/opentelemetry-sdk/tests/metrics/exponential/test_histogram.py @@ -0,0 +1,35 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import TestCase + +from opentelemetry.sdk.metrics._internal.exponential.aggregation import ( + power_of_two_rounded_up, +) + + +class TestHistogram(TestCase): + def test_power_of_two_rounded_up(self): + + self.assertEqual(power_of_two_rounded_up(2), 2) + self.assertEqual(power_of_two_rounded_up(4), 4) + self.assertEqual(power_of_two_rounded_up(8), 8) + self.assertEqual(power_of_two_rounded_up(16), 16) + self.assertEqual(power_of_two_rounded_up(32), 32) + + self.assertEqual(power_of_two_rounded_up(3), 4) + self.assertEqual(power_of_two_rounded_up(5), 8) + self.assertEqual(power_of_two_rounded_up(9), 16) + self.assertEqual(power_of_two_rounded_up(17), 32) + self.assertEqual(power_of_two_rounded_up(33), 64) diff --git a/opentelemetry-sdk/tests/metrics/exponential/test_logarithm_mapping.py b/opentelemetry-sdk/tests/metrics/exponential/test_logarithm_mapping.py new file mode 100644 index 00000000000..7b20234d061 --- /dev/null +++ b/opentelemetry-sdk/tests/metrics/exponential/test_logarithm_mapping.py @@ -0,0 +1,111 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import TestCase + +from opentelemetry.sdk.metrics._internal.exponential.float64 import ( + MAX_NORMAL_VALUE, +) +from opentelemetry.sdk.metrics._internal.exponential.logarithm_mapping import ( + LogarithmExponentialHistogramMapping, +) + +MAX_NORMAL_VALUE + + +class TestLogarithmMapping(TestCase): + def test_invalid_scale(self): + with self.assertRaises(Exception): + LogarithmExponentialHistogramMapping(-1) + + def test_logarithm_mapping_scale_one(self): + + # The exponentiation factor for this logarithm exponent histogram + # mapping is square_root(2). + # Scale 1 means 1 division between every power of two, having + # a factor sqare_root(2) times the lower boundary. + logarithm_exponent_histogram_mapping = ( + LogarithmExponentialHistogramMapping(1) + ) + + self.assertEqual(logarithm_exponent_histogram_mapping.scale, 1) + + # Note: Do not test exact boundaries, with the exception of + # 1, because we expect errors in that case (e.g., + # MapToIndex(8) returns 5, an off-by-one. See the following + # test. + self.assertEqual( + logarithm_exponent_histogram_mapping.map_to_index(15), 7 + ) + self.assertEqual( + logarithm_exponent_histogram_mapping.map_to_index(9), 6 + ) + self.assertEqual( + logarithm_exponent_histogram_mapping.map_to_index(7), 5 + ) + self.assertEqual( + logarithm_exponent_histogram_mapping.map_to_index(5), 4 + ) + self.assertEqual( + logarithm_exponent_histogram_mapping.map_to_index(3), 3 + ) + self.assertEqual( + logarithm_exponent_histogram_mapping.map_to_index(2.5), 2 + ) + self.assertEqual( + logarithm_exponent_histogram_mapping.map_to_index(1.5), 1 + ) + self.assertEqual( + logarithm_exponent_histogram_mapping.map_to_index(1.2), 0 + ) + # This one is actually an exact test + self.assertEqual( + logarithm_exponent_histogram_mapping.map_to_index(1), -1 + ) + self.assertEqual( + logarithm_exponent_histogram_mapping.map_to_index(0.75), -1 + ) + self.assertEqual( + logarithm_exponent_histogram_mapping.map_to_index(0.55), -2 + ) + self.assertEqual( + logarithm_exponent_histogram_mapping.map_to_index(0.45), -3 + ) + + def test_logarithm_boundary(self): + + for scale in [1, 2, 3, 4, 10, 15]: + logarithm_exponent_histogram_mapping = ( + LogarithmExponentialHistogramMapping(scale) + ) + + for index in [-100, -10, -1, 0, 1, 10, 100]: + + lower_boundary = ( + logarithm_exponent_histogram_mapping.get_lower_boundary( + index + ) + ) + + mapped_index = ( + logarithm_exponent_histogram_mapping.map_to_index( + lower_boundary + ) + ) + + self.assertLessEqual(index - 1, mapped_index) + self.assertGreaterEqual(index, mapped_index) + + # FIXME assert that the difference between lower boundary and + # rounded lower boundary is very small.