From 17e92b3e12383e429b5bdaa390cca8add7915143 Mon Sep 17 00:00:00 2001 From: Tony Xiao Date: Thu, 13 Oct 2022 16:08:06 -0400 Subject: [PATCH] ref(profiling): Rename profiling frame keys (#1680) Standardizing the names of the keys in the frames across SDKs so we're going to rename them. --- sentry_sdk/profiler.py | 93 ++++++++++---- tests/test_profiler.py | 274 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 338 insertions(+), 29 deletions(-) diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py index 5120be2420..aafb4129bb 100644 --- a/sentry_sdk/profiler.py +++ b/sentry_sdk/profiler.py @@ -29,6 +29,8 @@ from sentry_sdk._types import MYPY from sentry_sdk.utils import nanosecond_time +RawFrameData = namedtuple("RawFrameData", ["function", "abs_path", "lineno"]) + if MYPY: from types import FrameType from typing import Any @@ -39,10 +41,46 @@ from typing import List from typing import Optional from typing import Sequence + from typing import Tuple + from typing_extensions import TypedDict import sentry_sdk.tracing - -FrameData = namedtuple("FrameData", ["name", "file", "line"]) + RawSampleData = Tuple[int, Sequence[Tuple[int, Sequence[RawFrameData]]]] + + ProcessedStack = Tuple[int, ...] + + ProcessedSample = TypedDict( + "ProcessedSample", + { + "elapsed_since_start_ns": str, + "thread_id": str, + "stack_id": int, + }, + ) + + ProcessedFrame = TypedDict( + "ProcessedFrame", + { + "function": str, + "filename": str, + "lineno": int, + }, + ) + + ProcessedThreadMetadata = TypedDict( + "ProcessedThreadMetadata", + {"name": str}, + ) + + ProcessedProfile = TypedDict( + "ProcessedProfile", + { + "frames": List[ProcessedFrame], + "stacks": List[ProcessedStack], + "samples": List[ProcessedSample], + "thread_metadata": Dict[str, ProcessedThreadMetadata], + }, + ) _sample_buffer = None # type: Optional[SampleBuffer] @@ -132,7 +170,7 @@ def _sample_stack(*args, **kwargs): def extract_stack(frame, max_stack_depth=MAX_STACK_DEPTH): - # type: (Optional[FrameType], int) -> Sequence[FrameData] + # type: (Optional[FrameType], int) -> Sequence[RawFrameData] """ Extracts the stack starting the specified frame. The extracted stack assumes the specified frame is the top of the stack, and works back @@ -149,10 +187,10 @@ def extract_stack(frame, max_stack_depth=MAX_STACK_DEPTH): frame = frame.f_back return [ - FrameData( - name=get_frame_name(frame), - file=frame.f_code.co_filename, - line=frame.f_lineno, + RawFrameData( + function=get_frame_name(frame), + abs_path=frame.f_code.co_filename, + lineno=frame.f_lineno, ) for frame in stack ] @@ -268,12 +306,12 @@ class SampleBuffer(object): def __init__(self, capacity): # type: (int) -> None - self.buffer = [None] * capacity - self.capacity = capacity - self.idx = 0 + self.buffer = [None] * capacity # type: List[Optional[RawSampleData]] + self.capacity = capacity # type: int + self.idx = 0 # type: int def write(self, sample): - # type: (Any) -> None + # type: (RawSampleData) -> None """ Writing to the buffer is not thread safe. There is the possibility that parallel writes will overwrite one another. @@ -290,12 +328,12 @@ def write(self, sample): self.idx = (idx + 1) % self.capacity def slice_profile(self, start_ns, stop_ns): - # type: (int, int) -> Dict[str, Any] - samples = [] # type: List[Any] - stacks = dict() # type: Dict[Any, int] - stacks_list = list() # type: List[Any] - frames = dict() # type: Dict[FrameData, int] - frames_list = list() # type: List[Any] + # type: (int, int) -> ProcessedProfile + samples = [] # type: List[ProcessedSample] + stacks = dict() # type: Dict[ProcessedStack, int] + stacks_list = list() # type: List[ProcessedStack] + frames = dict() # type: Dict[RawFrameData, int] + frames_list = list() # type: List[ProcessedFrame] # TODO: This is doing an naive iteration over the # buffer and extracting the appropriate samples. @@ -311,10 +349,6 @@ def slice_profile(self, start_ns, stop_ns): continue for tid, stack in raw_sample[1]: - sample = { - "elapsed_since_start_ns": str(ts - start_ns), - "thread_id": str(tid), - } current_stack = [] for frame in stack: @@ -322,9 +356,9 @@ def slice_profile(self, start_ns, stop_ns): frames[frame] = len(frames) frames_list.append( { - "name": frame.name, - "file": frame.file, - "line": frame.line, + "function": frame.function, + "filename": frame.abs_path, + "lineno": frame.lineno, } ) current_stack.append(frames[frame]) @@ -334,8 +368,13 @@ def slice_profile(self, start_ns, stop_ns): stacks[current_stack] = len(stacks) stacks_list.append(current_stack) - sample["stack_id"] = stacks[current_stack] - samples.append(sample) + samples.append( + { + "elapsed_since_start_ns": str(ts - start_ns), + "thread_id": str(tid), + "stack_id": stacks[current_stack], + } + ) # This collects the thread metadata at the end of a profile. Doing it # this way means that any threads that terminate before the profile ends @@ -345,7 +384,7 @@ def slice_profile(self, start_ns, stop_ns): "name": thread.name, } for thread in threading.enumerate() - } + } # type: Dict[str, ProcessedThreadMetadata] return { "stacks": stacks_list, diff --git a/tests/test_profiler.py b/tests/test_profiler.py index 8b5d1fb5a6..2cd50e9a86 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -7,6 +7,8 @@ import pytest from sentry_sdk.profiler import ( + RawFrameData, + SampleBuffer, SleepScheduler, extract_stack, get_frame_name, @@ -149,11 +151,11 @@ def test_extract_stack_with_max_depth(depth, max_stack_depth, actual_depth): assert len(stack) == base_stack_depth + actual_depth for i in range(actual_depth): - assert stack[i].name == "get_frame", i + assert stack[i].function == "get_frame", i # index 0 contains the inner most frame on the stack, so the lamdba # should be at index `actual_depth` - assert stack[actual_depth].name == "", actual_depth + assert stack[actual_depth].function == "", actual_depth def get_scheduler_threads(scheduler): @@ -201,3 +203,271 @@ def sampler(): # there should be 0 scheduler threads now because they stopped assert len(get_scheduler_threads(scheduler)) == 0 + + +current_thread = threading.current_thread() +thread_metadata = { + str(current_thread.ident): { + "name": current_thread.name, + }, +} + + +@pytest.mark.parametrize( + ("capacity", "start_ns", "stop_ns", "samples", "profile"), + [ + pytest.param( + 10, + 0, + 1, + [], + { + "frames": [], + "samples": [], + "stacks": [], + "thread_metadata": thread_metadata, + }, + id="empty", + ), + pytest.param( + 10, + 0, + 1, + [(2, [(1, [RawFrameData("name", "file", 1)])])], + { + "frames": [], + "samples": [], + "stacks": [], + "thread_metadata": thread_metadata, + }, + id="single sample out of range", + ), + pytest.param( + 10, + 0, + 1, + [(0, [(1, [RawFrameData("name", "file", 1)])])], + { + "frames": [ + { + "function": "name", + "filename": "file", + "lineno": 1, + }, + ], + "samples": [ + { + "elapsed_since_start_ns": "0", + "thread_id": "1", + "stack_id": 0, + }, + ], + "stacks": [(0,)], + "thread_metadata": thread_metadata, + }, + id="single sample in range", + ), + pytest.param( + 10, + 0, + 1, + [ + (0, [(1, [RawFrameData("name", "file", 1)])]), + (1, [(1, [RawFrameData("name", "file", 1)])]), + ], + { + "frames": [ + { + "function": "name", + "filename": "file", + "lineno": 1, + }, + ], + "samples": [ + { + "elapsed_since_start_ns": "0", + "thread_id": "1", + "stack_id": 0, + }, + { + "elapsed_since_start_ns": "1", + "thread_id": "1", + "stack_id": 0, + }, + ], + "stacks": [(0,)], + "thread_metadata": thread_metadata, + }, + id="two identical stacks", + ), + pytest.param( + 10, + 0, + 1, + [ + (0, [(1, [RawFrameData("name1", "file", 1)])]), + ( + 1, + [ + ( + 1, + [ + RawFrameData("name1", "file", 1), + RawFrameData("name2", "file", 2), + ], + ) + ], + ), + ], + { + "frames": [ + { + "function": "name1", + "filename": "file", + "lineno": 1, + }, + { + "function": "name2", + "filename": "file", + "lineno": 2, + }, + ], + "samples": [ + { + "elapsed_since_start_ns": "0", + "thread_id": "1", + "stack_id": 0, + }, + { + "elapsed_since_start_ns": "1", + "thread_id": "1", + "stack_id": 1, + }, + ], + "stacks": [(0,), (0, 1)], + "thread_metadata": thread_metadata, + }, + id="two identical frames", + ), + pytest.param( + 10, + 0, + 1, + [ + ( + 0, + [ + ( + 1, + [ + RawFrameData("name1", "file", 1), + RawFrameData("name2", "file", 2), + ], + ) + ], + ), + ( + 1, + [ + ( + 1, + [ + RawFrameData("name3", "file", 3), + RawFrameData("name4", "file", 4), + ], + ) + ], + ), + ], + { + "frames": [ + { + "function": "name1", + "filename": "file", + "lineno": 1, + }, + { + "function": "name2", + "filename": "file", + "lineno": 2, + }, + { + "function": "name3", + "filename": "file", + "lineno": 3, + }, + { + "function": "name4", + "filename": "file", + "lineno": 4, + }, + ], + "samples": [ + { + "elapsed_since_start_ns": "0", + "thread_id": "1", + "stack_id": 0, + }, + { + "elapsed_since_start_ns": "1", + "thread_id": "1", + "stack_id": 1, + }, + ], + "stacks": [(0, 1), (2, 3)], + "thread_metadata": thread_metadata, + }, + id="two unique stacks", + ), + pytest.param( + 1, + 0, + 1, + [ + (0, [(1, [RawFrameData("name1", "file", 1)])]), + ( + 1, + [ + ( + 1, + [ + RawFrameData("name2", "file", 2), + RawFrameData("name3", "file", 3), + ], + ) + ], + ), + ], + { + "frames": [ + { + "function": "name2", + "filename": "file", + "lineno": 2, + }, + { + "function": "name3", + "filename": "file", + "lineno": 3, + }, + ], + "samples": [ + { + "elapsed_since_start_ns": "1", + "thread_id": "1", + "stack_id": 0, + }, + ], + "stacks": [(0, 1)], + "thread_metadata": thread_metadata, + }, + id="wraps around buffer", + ), + ], +) +def test_sample_buffer(capacity, start_ns, stop_ns, samples, profile): + buffer = SampleBuffer(capacity) + for sample in samples: + buffer.write(sample) + result = buffer.slice_profile(start_ns, stop_ns) + assert result == profile