Skip to content

Commit

Permalink
Add EXPLAIN output to sql sentry breadcrumbs (#499)
Browse files Browse the repository at this point in the history
* Add EXPLAIN output to sql sentry breadcrumbs

* Put EXPLAIN queries behind a config, defaulting to false

* enable explain breadcrumbs when x-debug is active

* pass X-Debug header downstream
  • Loading branch information
Simon Davy committed Feb 12, 2020
1 parent 93034dd commit 3a90b62
Show file tree
Hide file tree
Showing 9 changed files with 162 additions and 35 deletions.
11 changes: 11 additions & 0 deletions talisker/config.py
Expand Up @@ -129,6 +129,7 @@ class Config():
'TALISKER_NETWORKS': [],
'TALISKER_ID_HEADER': 'X-Request-Id',
'TALISKER_DEADLINE_HEADER': 'X-Request-Deadline',
'TALISKER_EXPLAIN_SQL': False,
}

Metadata = collections.namedtuple(
Expand Down Expand Up @@ -263,6 +264,16 @@ def slowquery_threshold(self, raw_name):
"""
return force_int(self[raw_name])

@config_property('TALISKER_EXPLAIN_SQL')
def explain_sql(self, raw_name):
"""Include EXPLAIN plans in sql sentry breadcrumbs. Defaults to false.
When enabled, this will issue extra queries to the db when generating
sentry reports. The EXPLAIN queries will be quick to execute, as it
doesn't actually run the query, but still, caution is advised.
"""
return self.is_active(raw_name)

@config_property('TALISKER_SOFT_REQUEST_TIMEOUT')
def soft_request_timeout(self, raw_name):
"""Set the threshold (in ms) over which WSGI requests will report a
Expand Down
47 changes: 35 additions & 12 deletions talisker/postgresql.py
Expand Up @@ -71,6 +71,7 @@ def prettify_sql(sql):
class TaliskerConnection(connection):
_logger = None
_threshold = None
_explain = None
_safe_dsn = None
_safe_dsn_format = '{user}@{host}:{port}/{dbname}'

Expand Down Expand Up @@ -99,32 +100,51 @@ def query_threshold(self):
self._threshold = talisker.get_config().slowquery_threshold
return self._threshold

@property
def explain_breadcrumbs(self):
if self._explain is None:
self._explain = talisker.get_config().explain_sql
return self._explain

def cursor(self, *args, **kwargs):
kwargs.setdefault('cursor_factory', TaliskerCursor)
return super().cursor(*args, **kwargs)

def _format_query(self, query):
def _format_query(self, query, vars):
if callable(query):
query = query()
query = prettify_sql(query)
query = FILTERED if query is None else query
if query is None or vars is None:
return FILTERED
return query

def _record(self, msg, query, duration, extra={}):
def _record(self, msg, query, vars, duration, extra={}):
talisker.Context.track('sql', duration)

qdata = collections.OrderedDict()
qdata['duration_ms'] = duration
qdata['connection'] = self.safe_dsn
qdata.update(extra)

# grab a reference here, where super() works
base_connection = super()

if self.query_threshold >= 0 and duration > self.query_threshold:
formatted = self._format_query(query)
formatted = self._format_query(query, vars)
self.logger.info(
'slow ' + msg, extra=dict(qdata, trailer=formatted))

def processor(data):
qdata['query'] = self._format_query(query)
qdata['query'] = self._format_query(query, vars)
if self.explain_breadcrumbs or talisker.Context.debug:
try:
cursor = base_connection.cursor()
cursor.execute('EXPLAIN ' + query, vars)
plan = '\n'.join(l[0] for l in cursor.fetchall())
qdata['plan'] = plan
except Exception as e:
qdata['plan'] = 'could not explain query: ' + str(e)

data['data'].update(qdata)

breadcrumb = dict(
Expand All @@ -141,7 +161,7 @@ def apply_timeout(self):
return None

ms = int(ctx_timeout * 1000)
super(TaliskerCursor, self).execute(
super().execute(
'SET LOCAL statement_timeout TO %s', (ms,)
)
return ms
Expand All @@ -153,7 +173,7 @@ def execute(self, query, vars=None):
extra['timeout'] = timeout
timestamp = time.time()
try:
return super(TaliskerCursor, self).execute(query, vars)
return super().execute(query, vars)
except psycopg2.OperationalError as exc:
extra['pgcode'] = exc.pgcode
extra['pgerror'] = exc.pgerror
Expand All @@ -162,9 +182,7 @@ def execute(self, query, vars=None):
raise
finally:
duration = get_rounded_ms(timestamp)
if vars is None:
query = None
self.connection._record('query', query, duration, extra)
self.connection._record('query', query, vars, duration, extra)

def callproc(self, procname, vars=None):
extra = collections.OrderedDict()
Expand All @@ -173,7 +191,7 @@ def callproc(self, procname, vars=None):
extra['timeout'] = timeout
timestamp = time.time()
try:
return super(TaliskerCursor, self).callproc(procname, vars)
return super().callproc(procname, vars)
except psycopg2.OperationalError as exc:
extra['pgcode'] = exc.pgcode
extra['pgerror'] = exc.pgerror
Expand All @@ -184,4 +202,9 @@ def callproc(self, procname, vars=None):
duration = get_rounded_ms(timestamp)
# no query parameters, cannot safely record
self.connection._record(
'stored proc: {}'.format(procname), None, duration, extra)
'stored proc: {}'.format(procname),
None,
vars,
duration,
extra,
)
3 changes: 3 additions & 0 deletions talisker/requests.py
Expand Up @@ -72,6 +72,7 @@
STORAGE = Local()
STORAGE.sessions = {}
HOSTS = module_dict()
DEBUG_HEADER = future.utils.text_to_native_str('X-Debug')


def clear():
Expand Down Expand Up @@ -169,6 +170,8 @@ def send(request, **kwargs):
deadline = datetime.utcfromtimestamp(Context.current.deadline)
formatted = deadline.isoformat() + 'Z'
request.headers[config.deadline_header] = formatted
if Context.debug:
request.headers[DEBUG_HEADER] = '1'
try:
return func(request, **kwargs)
except Exception as e:
Expand Down
14 changes: 8 additions & 6 deletions talisker/sentry.py
Expand Up @@ -262,6 +262,7 @@ def __init__(self, dsn):

def start(self):
self.client = get_client()
new_context()
self.orig_remote = self.client.remote
self.client.set_dsn(self.dsn, DummySentryTransport)
self.transport = self.client.remote.get_transport()
Expand Down Expand Up @@ -432,7 +433,7 @@ def add_talisker_context(data):
def sql_summary(sql_crumbs, start_time):

def duration(crumb):
return float(crumb['data'].get('duration', 0))
return float(crumb['data'].get('duration_ms', 0))

sql_crumbs.sort(key=duration, reverse=True)
sql_time = sum(duration(c) for c in sql_crumbs)
Expand All @@ -445,13 +446,14 @@ def duration(crumb):
sql_summary['non_sql_time'] = request_time - sql_time
sql_summary['total_time'] = request_time

sql_summary['slowest queries'] = [
OrderedDict([
('duration', c['data']['duration']),
slowest = []
for c in sql_crumbs[:5]:
slow = OrderedDict([
('duration_ms', c['data']['duration_ms']),
('query', c['data']['query'])]
)
for c in sql_crumbs[:5]
]
slowest.append(slow)
sql_summary['slowest queries'] = slowest

return sql_summary

Expand Down
2 changes: 1 addition & 1 deletion tests/flask_app.py
Expand Up @@ -72,7 +72,7 @@ def logging():

@app.route('/error/')
def error():
conn.execute(select([users]))
conn.execute(select([users]).where(users.c.id == 1))
talisker.requests.get_session().post(
'http://httpbin.org/post', json={'foo': 'bar'})
logger.info('halp', extra={'foo': 'bar'})
Expand Down
6 changes: 6 additions & 0 deletions tests/test_config.py
Expand Up @@ -61,6 +61,7 @@ def test_config_defaults():
debuglog=None,
colour=False,
slowquery_threshold=-1,
explain_sql=False,
soft_request_timeout=-1,
request_timeout=None,
logstatus=False,
Expand Down Expand Up @@ -127,6 +128,11 @@ def test_query_threshold_config():
assert msg == "'garbage' is not a valid integer"


def test_explain_sql_config():
assert_config({'TALISKER_EXPLAIN_SQL': '1'}, explain_sql=True)
assert_config({'TALISKER_EXPLAIN_SQL': 'garbage'}, explain_sql=False)


def test_request_timeout_config():
assert_config(
{'TALISKER_SOFT_REQUEST_TIMEOUT': '3000'}, soft_request_timeout=3000)
Expand Down
8 changes: 4 additions & 4 deletions tests/test_postgresql.py
Expand Up @@ -58,9 +58,9 @@ def cursor(conn):


def test_connection_record_slow(conn, context, get_breadcrumbs):
query = 'select * from table'
query = 'select * from table where id=%s'
conn._threshold = 0
conn._record('msg', query, 10000)
conn._record('msg', query, (1,), 10000)
records = context.logs.filter(name='talisker.slowqueries')
assert records[0].extra['duration_ms'] == 10000.0
assert records[0]._trailer == prettify_sql(query)
Expand All @@ -69,14 +69,14 @@ def test_connection_record_slow(conn, context, get_breadcrumbs):
@pytest.mark.skipif(not talisker.sentry.enabled, reason='need raven installed')
def test_connection_record_fast(conn, context):
query = 'select * from table'
conn._record('msg', query, 0)
conn._record('msg', query, None, 0)
context.assert_not_log(name='talisker.slowqueries')


@pytest.mark.skipif(not talisker.sentry.enabled, reason='need raven installed')
def test_connection_record_breadcrumb(conn, get_breadcrumbs):
query = 'select * from table'
conn._record('msg', query, 1000)
conn._record('msg', query, None, 1000)
breadcrumb = get_breadcrumbs()[0]
assert breadcrumb['message'] == 'msg'
assert breadcrumb['category'] == 'sql'
Expand Down
2 changes: 2 additions & 0 deletions tests/test_requests.py
Expand Up @@ -223,6 +223,7 @@ def test_metric_hook_registered_endpoint(
def test_configured_session(context, get_breadcrumbs):
deadline = time.time() + 10
expected_deadline = datetime.utcfromtimestamp(deadline).isoformat() + 'Z'
Context.set_debug()
Context.current.deadline = deadline
session = requests.Session()
talisker.requests.configure(session)
Expand All @@ -242,6 +243,7 @@ def test_configured_session(context, get_breadcrumbs):
headers = responses.calls[0].request.headers
assert headers['X-Request-Id'] == 'XXX'
assert headers['X-Request-Deadline'] == expected_deadline
assert headers['X-Debug'] == '1'
assert context.statsd[0] == 'requests.count.localhost.view:1|c'
assert context.statsd[1].startswith(
'requests.latency.localhost.view.200:')
Expand Down
104 changes: 92 additions & 12 deletions tests/test_sentry.py
Expand Up @@ -44,6 +44,8 @@

from talisker import testing

from tests.conftest import require_module

DATESTRING = '2016-01-02 03:04:05.1234'


Expand Down Expand Up @@ -272,13 +274,13 @@ def test_add_talisker_context():
@freeze_time(DATESTRING)
def test_sql_summary_crumb():
crumbs = [
{'category': 'sql', 'data': {'duration': 10.0, 'query': '1'}},
{'category': 'sql', 'data': {'duration': 15.0, 'query': '2'}},
{'category': 'sql', 'data': {'duration': 13.0, 'query': '3'}},
{'category': 'sql', 'data': {'duration': 5.0, 'query': '4'}},
{'category': 'sql', 'data': {'duration': 11.0, 'query': '5'}},
{'category': 'sql', 'data': {'duration': 7.0, 'query': '6'}},
{'category': 'sql', 'data': {'duration': 4.0, 'query': '7'}},
{'category': 'sql', 'data': {'duration_ms': 10.0, 'query': '1'}},
{'category': 'sql', 'data': {'duration_ms': 15.0, 'query': '2'}},
{'category': 'sql', 'data': {'duration_ms': 13.0, 'query': '3'}},
{'category': 'sql', 'data': {'duration_ms': 5.0, 'query': '4'}},
{'category': 'sql', 'data': {'duration_ms': 11.0, 'query': '5'}},
{'category': 'sql', 'data': {'duration_ms': 7.0, 'query': '6'}},
{'category': 'sql', 'data': {'duration_ms': 4.0, 'query': '7'}},
]

start_time = time.time() - 0.5 # 500ms
Expand All @@ -289,15 +291,93 @@ def test_sql_summary_crumb():
'total_time': 500.0,
'non_sql_time': 435.0,
'slowest queries': [
{'duration': 15.0, 'query': '2'},
{'duration': 13.0, 'query': '3'},
{'duration': 11.0, 'query': '5'},
{'duration': 10.0, 'query': '1'},
{'duration': 7.0, 'query': '6'},
{'duration_ms': 15.0, 'query': '2'},
{'duration_ms': 13.0, 'query': '3'},
{'duration_ms': 11.0, 'query': '5'},
{'duration_ms': 10.0, 'query': '1'},
{'duration_ms': 7.0, 'query': '6'},
],
}


@pytest.fixture
def test_db(postgresql):
import psycopg2
from talisker.postgresql import TaliskerConnection

with psycopg2.connect(postgresql.dsn) as ddl:
with ddl.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS test (
id INTEGER PRIMARY KEY,
value TEXT
);
""")
cur.execute("INSERT INTO test VALUES (1, 'foo')")

return TaliskerConnection(postgresql.dsn)


@require_module('psycopg2')
def test_sql_crumbs_functional_explain_disabled(test_db, context):
cur = test_db.cursor()

q = 'SELECT * FROM test WHERE id = %s'
cur.execute(q, (1,))
cur.fetchall()
talisker.sentry.get_client().captureMessage('test')
msg = context.sentry[0]
sql_crumbs = [
c for c in msg['breadcrumbs']['values'] if c['category'] == 'sql'
]
data = sql_crumbs[0]['data']
assert data['query'] == talisker.postgresql.prettify_sql(q)
assert 'plan' not in data
assert 'duration_ms' in data
assert 'connection' in data


@require_module('psycopg2')
def test_sql_crumbs_functional_explain_config(test_db, context, config):
config['TALISKER_EXPLAIN_SQL'] = '1'

cur = test_db.cursor()

q = 'SELECT * FROM test WHERE id = %s'
cur.execute(q, (1,))
cur.fetchall()
talisker.sentry.get_client().captureMessage('test')
msg = context.sentry[0]
sql_crumbs = [
c for c in msg['breadcrumbs']['values'] if c['category'] == 'sql'
]
data = sql_crumbs[0]['data']
assert data['query'] == talisker.postgresql.prettify_sql(q)
assert data['plan'].startswith('Index Scan using test_pkey on test')
assert 'duration_ms' in data
assert 'connection' in data


@require_module('psycopg2')
def test_sql_crumbs_functional_explain_context(test_db, context):
talisker.Context.set_debug()
cur = test_db.cursor()

q = 'SELECT * FROM test WHERE id = %s'
cur.execute(q, (1,))
cur.fetchall()
talisker.sentry.get_client().captureMessage('test')
msg = context.sentry[0]
sql_crumbs = [
c for c in msg['breadcrumbs']['values'] if c['category'] == 'sql'
]
data = sql_crumbs[0]['data']
assert data['query'] == talisker.postgresql.prettify_sql(q)
assert data['plan'].startswith('Index Scan using test_pkey on test')
assert 'duration_ms' in data
assert 'connection' in data


def test_proxy_mixin():
client = talisker.sentry.get_client()
lh = talisker.sentry.get_log_handler()
Expand Down

0 comments on commit 3a90b62

Please sign in to comment.