From 1f26c26696d90878117256fdbdb4768e2fd27709 Mon Sep 17 00:00:00 2001 From: Michi Hoffmann Date: Wed, 10 Apr 2024 14:32:51 +0200 Subject: [PATCH] Update metric normalization (#1729) Co-authored-by: Alex Bouma --- src/Serializer/EnvelopItems/MetricsItem.php | 119 ++++++++++++------ .../EnvelopeItems/MetricsItemTest.php | 30 +++++ tests/Serializer/PayloadSerializerTest.php | 14 +-- 3 files changed, 117 insertions(+), 46 deletions(-) create mode 100644 tests/Serializer/EnvelopeItems/MetricsItemTest.php diff --git a/src/Serializer/EnvelopItems/MetricsItem.php b/src/Serializer/EnvelopItems/MetricsItem.php index 92776c989..7109f09b7 100644 --- a/src/Serializer/EnvelopItems/MetricsItem.php +++ b/src/Serializer/EnvelopItems/MetricsItem.php @@ -6,6 +6,7 @@ use Sentry\Event; use Sentry\Metrics\MetricsUnit; +use Sentry\Metrics\Types\AbstractType; use Sentry\Serializer\Traits\StacktraceFrameSeralizerTrait; use Sentry\Util\JSON; @@ -19,12 +20,17 @@ class MetricsItem implements EnvelopeItemInterface /** * @var string */ - private const KEY_PATTERN = '/[^a-zA-Z0-9_\/.-]+/i'; + private const KEY_PATTERN = '/[^\w\-.]+/i'; /** * @var string */ - private const VALUE_PATTERN = '/[^\w\d\s_:\/@\.{}\[\]$-]+/i'; + private const UNIT_PATTERN = '/[^\w]+/i'; + + /** + * @var string + */ + private const TAG_KEY_PATTERN = '/[^\w\-.\/]+/i'; public static function toEnvelopeItem(Event $event): string { @@ -37,43 +43,7 @@ public static function toEnvelopeItem(Event $event): string $metricMetaPayload = []; foreach ($metrics as $metric) { - /** - * In case of us adding support for emitting metrics from other namespaces, - * we have to alter the RateLimiter::class to properly handle these - * namespaces. - */ - - // key - my.metric - $line = preg_replace(self::KEY_PATTERN, '_', $metric->getKey()); - - if ($metric->getUnit() !== MetricsUnit::none()) { - // unit - @second - $line .= '@' . $metric->getunit(); - } - - foreach ($metric->serialize() as $value) { - // value - 2:3:4... - $line .= ':' . $value; - } - - // type - |c|, |d|, ... - $line .= '|' . $metric->getType() . '|'; - - $tags = []; - foreach ($metric->getTags() as $key => $value) { - $tags[] = preg_replace(self::KEY_PATTERN, '_', $key) . - ':' . preg_replace(self::VALUE_PATTERN, '', $value); - } - - if (!empty($tags)) { - // tags - #key:value,key:value... - $line .= '#' . implode(',', $tags) . '|'; - } - - // timestamp - T123456789 - $line .= 'T' . $metric->getTimestamp(); - - $statsdPayload[] = $line; + $statsdPayload[] = self::seralizeMetric($metric); if ($metric->hasCodeLocation()) { $metricMetaPayload[$metric->getMri()][] = array_merge( @@ -116,4 +86,75 @@ public static function toEnvelopeItem(Event $event): string $statsdPayload ); } + + public static function seralizeMetric(AbstractType $metric): string + { + /** + * In case of us adding support for emitting metrics from other namespaces, + * we have to alter the RateLimiter::class to properly handle these + * namespaces. + */ + + // key - my.metric + $line = preg_replace(self::KEY_PATTERN, '_', $metric->getKey()); + + if ($metric->getUnit() !== MetricsUnit::none()) { + // unit - @second + $line .= '@' . preg_replace(self::UNIT_PATTERN, '', (string) $metric->getUnit()); + } + + foreach ($metric->serialize() as $value) { + // value - 2:3:4... + $line .= ':' . $value; + } + + // type - |c|, |d|, ... + $line .= '|' . $metric->getType() . '|'; + + $tags = []; + foreach ($metric->getTags() as $key => $value) { + $tags[] = preg_replace(self::TAG_KEY_PATTERN, '', $key) . + ':' . self::escapeTagValues($value); + } + + if (!empty($tags)) { + // tags - #key:value,key:value... + $line .= '#' . implode(',', $tags) . '|'; + } + + // timestamp - T123456789 + $line .= 'T' . $metric->getTimestamp(); + + return $line; + } + + public static function escapeTagValues(string $tagValue): string + { + $result = ''; + + for ($i = 0; $i < mb_strlen($tagValue); ++$i) { + $character = mb_substr($tagValue, $i, 1); + $result .= str_replace( + [ + "\n", + "\r", + "\t", + '\\', + '|', + ',', + ], + [ + '\n', + '\r', + '\t', + '\\\\', + '\u{7c}', + '\u{2c}', + ], + $character + ); + } + + return $result; + } } diff --git a/tests/Serializer/EnvelopeItems/MetricsItemTest.php b/tests/Serializer/EnvelopeItems/MetricsItemTest.php new file mode 100644 index 000000000..399fcf40b --- /dev/null +++ b/tests/Serializer/EnvelopeItems/MetricsItemTest.php @@ -0,0 +1,30 @@ +assertSame('plain', MetricsItem::escapeTagValues('plain')); + $this->assertSame('plain text', MetricsItem::escapeTagValues('plain text')); + $this->assertSame('plain%text', MetricsItem::escapeTagValues('plain%text')); + + // Escape sequences + $this->assertSame('plain \\\\\\\\ text', MetricsItem::escapeTagValues('plain \\\\ text')); + $this->assertSame('plain\\u{2c}text', MetricsItem::escapeTagValues('plain,text')); + $this->assertSame('plain\\u{7c}text', MetricsItem::escapeTagValues('plain|text')); + $this->assertSame('plain 😅', MetricsItem::escapeTagValues('plain 😅')); + + // Escapable control characters + $this->assertSame('plain\\\\ntext', MetricsItem::escapeTagValues("plain\ntext")); + $this->assertSame('plain\\\\rtext', MetricsItem::escapeTagValues("plain\rtext")); + $this->assertSame('plain\\\\ttext', MetricsItem::escapeTagValues("plain\ttext")); + } +} diff --git a/tests/Serializer/PayloadSerializerTest.php b/tests/Serializer/PayloadSerializerTest.php index 2ebd5e823..0e3cd8a4f 100644 --- a/tests/Serializer/PayloadSerializerTest.php +++ b/tests/Serializer/PayloadSerializerTest.php @@ -415,9 +415,9 @@ public static function serializeAsEnvelopeDataProvider(): iterable ]; $counter = new CounterType('counter', 1.0, MetricsUnit::second(), ['foo' => 'bar', 'route' => 'GET /foo'], 1597790835); - $distribution = new DistributionType('distribution', 1.0, MetricsUnit::second(), ['$foo$' => '%bar%'], 1597790835); - $gauge = new GaugeType('gauge', 1.0, MetricsUnit::second(), ['föö' => 'bär'], 1597790835); - $set = new SetType('set', 1.0, MetricsUnit::second(), ['%{key}' => '$value$'], 1597790835); + $distribution = new DistributionType('distribution', 1.0, MetricsUnit::second(), ['foo' => 'bar'], 1597790835); + $gauge = new GaugeType('gauge', 1.0, MetricsUnit::second(), ['foo' => 'bar', 'bar' => 'baz'], 1597790835); + $set = new SetType('set', 1.0, MetricsUnit::second(), ['key' => 'value'], 1597790835); $noTags = new CounterType('no_tags', 1.0, MetricsUnit::second(), [], 1597790835); $event = Event::createMetrics(new EventId('fc9442f5aef34234bb22b9a615e30ccd')); @@ -433,11 +433,11 @@ public static function serializeAsEnvelopeDataProvider(): iterable $event, <<