From 17e92b3e12383e429b5bdaa390cca8add7915143 Mon Sep 17 00:00:00 2001
From: Tony Xiao <txiao@sentry.io>
Date: Thu, 13 Oct 2022 16:08:06 -0400
Subject: [PATCH] ref(profiling): Rename profiling frame keys (#1680)

Standardizing the names of the keys in the frames across SDKs so we're going to
rename them.
---
 sentry_sdk/profiler.py |  93 ++++++++++----
 tests/test_profiler.py | 274 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 338 insertions(+), 29 deletions(-)

diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py
index 5120be2420..aafb4129bb 100644
--- a/sentry_sdk/profiler.py
+++ b/sentry_sdk/profiler.py
@@ -29,6 +29,8 @@
 from sentry_sdk._types import MYPY
 from sentry_sdk.utils import nanosecond_time
 
+RawFrameData = namedtuple("RawFrameData", ["function", "abs_path", "lineno"])
+
 if MYPY:
     from types import FrameType
     from typing import Any
@@ -39,10 +41,46 @@
     from typing import List
     from typing import Optional
     from typing import Sequence
+    from typing import Tuple
+    from typing_extensions import TypedDict
     import sentry_sdk.tracing
 
-
-FrameData = namedtuple("FrameData", ["name", "file", "line"])
+    RawSampleData = Tuple[int, Sequence[Tuple[int, Sequence[RawFrameData]]]]
+
+    ProcessedStack = Tuple[int, ...]
+
+    ProcessedSample = TypedDict(
+        "ProcessedSample",
+        {
+            "elapsed_since_start_ns": str,
+            "thread_id": str,
+            "stack_id": int,
+        },
+    )
+
+    ProcessedFrame = TypedDict(
+        "ProcessedFrame",
+        {
+            "function": str,
+            "filename": str,
+            "lineno": int,
+        },
+    )
+
+    ProcessedThreadMetadata = TypedDict(
+        "ProcessedThreadMetadata",
+        {"name": str},
+    )
+
+    ProcessedProfile = TypedDict(
+        "ProcessedProfile",
+        {
+            "frames": List[ProcessedFrame],
+            "stacks": List[ProcessedStack],
+            "samples": List[ProcessedSample],
+            "thread_metadata": Dict[str, ProcessedThreadMetadata],
+        },
+    )
 
 
 _sample_buffer = None  # type: Optional[SampleBuffer]
@@ -132,7 +170,7 @@ def _sample_stack(*args, **kwargs):
 
 
 def extract_stack(frame, max_stack_depth=MAX_STACK_DEPTH):
-    # type: (Optional[FrameType], int) -> Sequence[FrameData]
+    # type: (Optional[FrameType], int) -> Sequence[RawFrameData]
     """
     Extracts the stack starting the specified frame. The extracted stack
     assumes the specified frame is the top of the stack, and works back
@@ -149,10 +187,10 @@ def extract_stack(frame, max_stack_depth=MAX_STACK_DEPTH):
         frame = frame.f_back
 
     return [
-        FrameData(
-            name=get_frame_name(frame),
-            file=frame.f_code.co_filename,
-            line=frame.f_lineno,
+        RawFrameData(
+            function=get_frame_name(frame),
+            abs_path=frame.f_code.co_filename,
+            lineno=frame.f_lineno,
         )
         for frame in stack
     ]
@@ -268,12 +306,12 @@ class SampleBuffer(object):
     def __init__(self, capacity):
         # type: (int) -> None
 
-        self.buffer = [None] * capacity
-        self.capacity = capacity
-        self.idx = 0
+        self.buffer = [None] * capacity  # type: List[Optional[RawSampleData]]
+        self.capacity = capacity  # type: int
+        self.idx = 0  # type: int
 
     def write(self, sample):
-        # type: (Any) -> None
+        # type: (RawSampleData) -> None
         """
         Writing to the buffer is not thread safe. There is the possibility
         that parallel writes will overwrite one another.
@@ -290,12 +328,12 @@ def write(self, sample):
         self.idx = (idx + 1) % self.capacity
 
     def slice_profile(self, start_ns, stop_ns):
-        # type: (int, int) -> Dict[str, Any]
-        samples = []  # type: List[Any]
-        stacks = dict()  # type: Dict[Any, int]
-        stacks_list = list()  # type: List[Any]
-        frames = dict()  # type: Dict[FrameData, int]
-        frames_list = list()  # type: List[Any]
+        # type: (int, int) -> ProcessedProfile
+        samples = []  # type: List[ProcessedSample]
+        stacks = dict()  # type: Dict[ProcessedStack, int]
+        stacks_list = list()  # type: List[ProcessedStack]
+        frames = dict()  # type: Dict[RawFrameData, int]
+        frames_list = list()  # type: List[ProcessedFrame]
 
         # TODO: This is doing an naive iteration over the
         # buffer and extracting the appropriate samples.
@@ -311,10 +349,6 @@ def slice_profile(self, start_ns, stop_ns):
                 continue
 
             for tid, stack in raw_sample[1]:
-                sample = {
-                    "elapsed_since_start_ns": str(ts - start_ns),
-                    "thread_id": str(tid),
-                }
                 current_stack = []
 
                 for frame in stack:
@@ -322,9 +356,9 @@ def slice_profile(self, start_ns, stop_ns):
                         frames[frame] = len(frames)
                         frames_list.append(
                             {
-                                "name": frame.name,
-                                "file": frame.file,
-                                "line": frame.line,
+                                "function": frame.function,
+                                "filename": frame.abs_path,
+                                "lineno": frame.lineno,
                             }
                         )
                     current_stack.append(frames[frame])
@@ -334,8 +368,13 @@ def slice_profile(self, start_ns, stop_ns):
                     stacks[current_stack] = len(stacks)
                     stacks_list.append(current_stack)
 
-                sample["stack_id"] = stacks[current_stack]
-                samples.append(sample)
+                samples.append(
+                    {
+                        "elapsed_since_start_ns": str(ts - start_ns),
+                        "thread_id": str(tid),
+                        "stack_id": stacks[current_stack],
+                    }
+                )
 
         # This collects the thread metadata at the end of a profile. Doing it
         # this way means that any threads that terminate before the profile ends
@@ -345,7 +384,7 @@ def slice_profile(self, start_ns, stop_ns):
                 "name": thread.name,
             }
             for thread in threading.enumerate()
-        }
+        }  # type: Dict[str, ProcessedThreadMetadata]
 
         return {
             "stacks": stacks_list,
diff --git a/tests/test_profiler.py b/tests/test_profiler.py
index 8b5d1fb5a6..2cd50e9a86 100644
--- a/tests/test_profiler.py
+++ b/tests/test_profiler.py
@@ -7,6 +7,8 @@
 import pytest
 
 from sentry_sdk.profiler import (
+    RawFrameData,
+    SampleBuffer,
     SleepScheduler,
     extract_stack,
     get_frame_name,
@@ -149,11 +151,11 @@ def test_extract_stack_with_max_depth(depth, max_stack_depth, actual_depth):
     assert len(stack) == base_stack_depth + actual_depth
 
     for i in range(actual_depth):
-        assert stack[i].name == "get_frame", i
+        assert stack[i].function == "get_frame", i
 
     # index 0 contains the inner most frame on the stack, so the lamdba
     # should be at index `actual_depth`
-    assert stack[actual_depth].name == "<lambda>", actual_depth
+    assert stack[actual_depth].function == "<lambda>", actual_depth
 
 
 def get_scheduler_threads(scheduler):
@@ -201,3 +203,271 @@ def sampler():
 
     # there should be 0 scheduler threads now because they stopped
     assert len(get_scheduler_threads(scheduler)) == 0
+
+
+current_thread = threading.current_thread()
+thread_metadata = {
+    str(current_thread.ident): {
+        "name": current_thread.name,
+    },
+}
+
+
+@pytest.mark.parametrize(
+    ("capacity", "start_ns", "stop_ns", "samples", "profile"),
+    [
+        pytest.param(
+            10,
+            0,
+            1,
+            [],
+            {
+                "frames": [],
+                "samples": [],
+                "stacks": [],
+                "thread_metadata": thread_metadata,
+            },
+            id="empty",
+        ),
+        pytest.param(
+            10,
+            0,
+            1,
+            [(2, [(1, [RawFrameData("name", "file", 1)])])],
+            {
+                "frames": [],
+                "samples": [],
+                "stacks": [],
+                "thread_metadata": thread_metadata,
+            },
+            id="single sample out of range",
+        ),
+        pytest.param(
+            10,
+            0,
+            1,
+            [(0, [(1, [RawFrameData("name", "file", 1)])])],
+            {
+                "frames": [
+                    {
+                        "function": "name",
+                        "filename": "file",
+                        "lineno": 1,
+                    },
+                ],
+                "samples": [
+                    {
+                        "elapsed_since_start_ns": "0",
+                        "thread_id": "1",
+                        "stack_id": 0,
+                    },
+                ],
+                "stacks": [(0,)],
+                "thread_metadata": thread_metadata,
+            },
+            id="single sample in range",
+        ),
+        pytest.param(
+            10,
+            0,
+            1,
+            [
+                (0, [(1, [RawFrameData("name", "file", 1)])]),
+                (1, [(1, [RawFrameData("name", "file", 1)])]),
+            ],
+            {
+                "frames": [
+                    {
+                        "function": "name",
+                        "filename": "file",
+                        "lineno": 1,
+                    },
+                ],
+                "samples": [
+                    {
+                        "elapsed_since_start_ns": "0",
+                        "thread_id": "1",
+                        "stack_id": 0,
+                    },
+                    {
+                        "elapsed_since_start_ns": "1",
+                        "thread_id": "1",
+                        "stack_id": 0,
+                    },
+                ],
+                "stacks": [(0,)],
+                "thread_metadata": thread_metadata,
+            },
+            id="two identical stacks",
+        ),
+        pytest.param(
+            10,
+            0,
+            1,
+            [
+                (0, [(1, [RawFrameData("name1", "file", 1)])]),
+                (
+                    1,
+                    [
+                        (
+                            1,
+                            [
+                                RawFrameData("name1", "file", 1),
+                                RawFrameData("name2", "file", 2),
+                            ],
+                        )
+                    ],
+                ),
+            ],
+            {
+                "frames": [
+                    {
+                        "function": "name1",
+                        "filename": "file",
+                        "lineno": 1,
+                    },
+                    {
+                        "function": "name2",
+                        "filename": "file",
+                        "lineno": 2,
+                    },
+                ],
+                "samples": [
+                    {
+                        "elapsed_since_start_ns": "0",
+                        "thread_id": "1",
+                        "stack_id": 0,
+                    },
+                    {
+                        "elapsed_since_start_ns": "1",
+                        "thread_id": "1",
+                        "stack_id": 1,
+                    },
+                ],
+                "stacks": [(0,), (0, 1)],
+                "thread_metadata": thread_metadata,
+            },
+            id="two identical frames",
+        ),
+        pytest.param(
+            10,
+            0,
+            1,
+            [
+                (
+                    0,
+                    [
+                        (
+                            1,
+                            [
+                                RawFrameData("name1", "file", 1),
+                                RawFrameData("name2", "file", 2),
+                            ],
+                        )
+                    ],
+                ),
+                (
+                    1,
+                    [
+                        (
+                            1,
+                            [
+                                RawFrameData("name3", "file", 3),
+                                RawFrameData("name4", "file", 4),
+                            ],
+                        )
+                    ],
+                ),
+            ],
+            {
+                "frames": [
+                    {
+                        "function": "name1",
+                        "filename": "file",
+                        "lineno": 1,
+                    },
+                    {
+                        "function": "name2",
+                        "filename": "file",
+                        "lineno": 2,
+                    },
+                    {
+                        "function": "name3",
+                        "filename": "file",
+                        "lineno": 3,
+                    },
+                    {
+                        "function": "name4",
+                        "filename": "file",
+                        "lineno": 4,
+                    },
+                ],
+                "samples": [
+                    {
+                        "elapsed_since_start_ns": "0",
+                        "thread_id": "1",
+                        "stack_id": 0,
+                    },
+                    {
+                        "elapsed_since_start_ns": "1",
+                        "thread_id": "1",
+                        "stack_id": 1,
+                    },
+                ],
+                "stacks": [(0, 1), (2, 3)],
+                "thread_metadata": thread_metadata,
+            },
+            id="two unique stacks",
+        ),
+        pytest.param(
+            1,
+            0,
+            1,
+            [
+                (0, [(1, [RawFrameData("name1", "file", 1)])]),
+                (
+                    1,
+                    [
+                        (
+                            1,
+                            [
+                                RawFrameData("name2", "file", 2),
+                                RawFrameData("name3", "file", 3),
+                            ],
+                        )
+                    ],
+                ),
+            ],
+            {
+                "frames": [
+                    {
+                        "function": "name2",
+                        "filename": "file",
+                        "lineno": 2,
+                    },
+                    {
+                        "function": "name3",
+                        "filename": "file",
+                        "lineno": 3,
+                    },
+                ],
+                "samples": [
+                    {
+                        "elapsed_since_start_ns": "1",
+                        "thread_id": "1",
+                        "stack_id": 0,
+                    },
+                ],
+                "stacks": [(0, 1)],
+                "thread_metadata": thread_metadata,
+            },
+            id="wraps around buffer",
+        ),
+    ],
+)
+def test_sample_buffer(capacity, start_ns, stop_ns, samples, profile):
+    buffer = SampleBuffer(capacity)
+    for sample in samples:
+        buffer.write(sample)
+    result = buffer.slice_profile(start_ns, stop_ns)
+    assert result == profile