From e4bd4dd2a8e404846299664ee7fd3a2d4fe2a32f Mon Sep 17 00:00:00 2001 From: Carl Friedrich Bolz-Tereick Date: Mon, 30 May 2022 13:51:32 +0200 Subject: [PATCH 1/4] cache the bound method of _trace on self this speeds up pure python tracing because we don't have to re-create a bound method object all the time --- coverage/pytracer.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/coverage/pytracer.py b/coverage/pytracer.py index 08050b586..a0665ad0a 100644 --- a/coverage/pytracer.py +++ b/coverage/pytracer.py @@ -67,6 +67,11 @@ def __init__(self): # On exit, self.in_atexit = True atexit.register(setattr, self, 'in_atexit', True) + # cache a bound method on the instance, so that we don't have to + # re-create a bound method object all the time + self._cached_bound_method_trace = self._trace + + def __repr__(self): return "".format( id(self), @@ -105,7 +110,7 @@ def _trace(self, frame, event, arg_unused): #self.log(":", frame.f_code.co_filename, frame.f_lineno, frame.f_code.co_name + "()", event) - if (self.stopped and sys.gettrace() == self._trace): # pylint: disable=comparison-with-callable + if (self.stopped and sys.gettrace() == self._cached_bound_method_trace): # pylint: disable=comparison-with-callable # The PyTrace.stop() method has been called, possibly by another # thread, let's deactivate ourselves now. if 0: @@ -225,7 +230,7 @@ def _trace(self, frame, event, arg_unused): if self.started_context: self.context = None self.switch_context(None) - return self._trace + return self._cached_bound_method_trace def start(self): """Start this Tracer. @@ -243,10 +248,10 @@ def start(self): # function, but we are marked as running again, so maybe it # will be ok? #self.log("~", "starting on different threads") - return self._trace + return self._cached_bound_method_trace - sys.settrace(self._trace) - return self._trace + sys.settrace(self._cached_bound_method_trace) + return self._cached_bound_method_trace def stop(self): """Stop this Tracer.""" @@ -271,9 +276,9 @@ def stop(self): # so don't warn if we are in atexit on PyPy and the trace function # has changed to None. dont_warn = (env.PYPY and env.PYPYVERSION >= (5, 4) and self.in_atexit and tf is None) - if (not dont_warn) and tf != self._trace: # pylint: disable=comparison-with-callable + if (not dont_warn) and tf != self._cached_bound_method_trace: # pylint: disable=comparison-with-callable self.warn( - f"Trace function changed, data is likely wrong: {tf!r} != {self._trace!r}", + f"Trace function changed, data is likely wrong: {tf!r} != {self._cached_bound_method_trace!r}", slug="trace-changed", ) From 85d482b3b9b00af641112bd588fad88b5ee1064a Mon Sep 17 00:00:00 2001 From: Carl Friedrich Bolz-Tereick Date: Mon, 30 May 2022 13:59:41 +0200 Subject: [PATCH 2/4] optimize checking whether a file should be traced the optimization works based on the following heuristic: in a majority of cases, functions call other functions in the same file. In that situation we don't have to re-check whether we should trace the file --- coverage/pytracer.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/coverage/pytracer.py b/coverage/pytracer.py index a0665ad0a..a9db42f52 100644 --- a/coverage/pytracer.py +++ b/coverage/pytracer.py @@ -151,20 +151,29 @@ def _trace(self, frame, event, arg_unused): self.started_context, ) ) + + # improve tracing performance: when calling a function, both caller + # and callee are often within the same file. if that's the case, we + # don't have to re-check whether to trace the corresponding + # function (which is a little bit espensive since it involves + # dictionary lookups) filename = frame.f_code.co_filename - self.cur_file_name = filename - disp = self.should_trace_cache.get(filename) - if disp is None: - disp = self.should_trace(filename, frame) - self.should_trace_cache[filename] = disp - - self.cur_file_data = None - if disp.trace: - tracename = disp.source_filename - if tracename not in self.data: - self.data[tracename] = set() - self.cur_file_data = self.data[tracename] - else: + if filename != self.cur_file_name: + self.cur_file_name = filename + disp = self.should_trace_cache.get(filename) + if disp is None: + disp = self.should_trace(filename, frame) + self.should_trace_cache[filename] = disp + + self.cur_file_data = None + if disp.trace: + tracename = disp.source_filename + if tracename not in self.data: + self.data[tracename] = set() + self.cur_file_data = self.data[tracename] + else: + frame.f_trace_lines = False + elif not self.cur_file_data: frame.f_trace_lines = False # The call event is really a "start frame" event, and happens for From 893aed46b352f8dc08e27ad1b3e7dc6493898284 Mon Sep 17 00:00:00 2001 From: Carl Friedrich Bolz-Tereick Date: Mon, 30 May 2022 14:17:39 +0200 Subject: [PATCH 3/4] fix optimization in the presence of contexts --- coverage/pytracer.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/coverage/pytracer.py b/coverage/pytracer.py index a9db42f52..e83c8b369 100644 --- a/coverage/pytracer.py +++ b/coverage/pytracer.py @@ -134,12 +134,13 @@ def _trace(self, frame, event, arg_unused): context_maybe = self.should_start_context(frame) if context_maybe is not None: self.context = context_maybe - self.started_context = True + started_context = True self.switch_context(self.context) else: - self.started_context = False + started_context = False else: - self.started_context = False + started_context = False + self.started_context = started_context # Entering a new frame. Decide if we should trace in this file. self._activity = True @@ -148,17 +149,18 @@ def _trace(self, frame, event, arg_unused): self.cur_file_data, self.cur_file_name, self.last_line, - self.started_context, + started_context, ) ) - # improve tracing performance: when calling a function, both caller + # Improve tracing performance: when calling a function, both caller # and callee are often within the same file. if that's the case, we # don't have to re-check whether to trace the corresponding # function (which is a little bit espensive since it involves - # dictionary lookups) + # dictionary lookups). This optimization is only correct if we + # didn't start a context. filename = frame.f_code.co_filename - if filename != self.cur_file_name: + if filename != self.cur_file_name or started_context: self.cur_file_name = filename disp = self.should_trace_cache.get(filename) if disp is None: From 6212c1aa3352ce0a088fb73e3db6f811885180cb Mon Sep 17 00:00:00 2001 From: Carl Friedrich Bolz-Tereick Date: Mon, 30 May 2022 15:35:54 +0200 Subject: [PATCH 4/4] fix too long line --- coverage/pytracer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/coverage/pytracer.py b/coverage/pytracer.py index e83c8b369..4389c9ed7 100644 --- a/coverage/pytracer.py +++ b/coverage/pytracer.py @@ -289,7 +289,8 @@ def stop(self): dont_warn = (env.PYPY and env.PYPYVERSION >= (5, 4) and self.in_atexit and tf is None) if (not dont_warn) and tf != self._cached_bound_method_trace: # pylint: disable=comparison-with-callable self.warn( - f"Trace function changed, data is likely wrong: {tf!r} != {self._cached_bound_method_trace!r}", + f"Trace function changed, data is likely wrong: " + f"{tf!r} != {self._cached_bound_method_trace!r}", slug="trace-changed", )