From df6a91a925ae2eb3d02dffc93029f6478be14dae Mon Sep 17 00:00:00 2001 From: Fantix King Date: Fri, 8 Mar 2024 17:24:43 -0500 Subject: [PATCH] Add more metrics (#6447) * branches_current * graphql_query_compilations_total * sql_queries_total and sql_compilations_total * query_compilation_duration * client_connection_duration * queries_per_connection * transaction_serialization_errors_total * connection_errors_total * query_size * auth_api_calls_total and auth_api_calls_total * auth_providers * auth_successful_logins_total --- docs/reference/http.rst | 63 ++++++++++++++++++-- edb/common/prometheus.py | 4 +- edb/graphql/extension.pyx | 53 ++++++++++++----- edb/server/dbview/dbview.pxd | 2 + edb/server/dbview/dbview.pyx | 37 ++++++++++++ edb/server/metrics.py | 87 ++++++++++++++++++++++++++++ edb/server/pgcon/pgcon.pyx | 17 ++++++ edb/server/protocol/auth_ext/http.py | 5 +- edb/server/protocol/binary.pyx | 13 +++++ edb/server/protocol/execute.pyx | 5 ++ edb/server/protocol/frontend.pxd | 2 + edb/server/protocol/frontend.pyx | 27 ++++++++- edb/server/protocol/pg_ext.pyx | 72 +++++++++++++++++------ edb/server/protocol/protocol.pxd | 1 + edb/server/protocol/protocol.pyx | 27 ++++++++- 15 files changed, 369 insertions(+), 46 deletions(-) diff --git a/docs/reference/http.rst b/docs/reference/http.rst index 31bb0ce2ee..6f34f1fd02 100644 --- a/docs/reference/http.rst +++ b/docs/reference/http.rst @@ -76,8 +76,8 @@ Retrieve instance metrics. All EdgeDB instances expose a Prometheus-compatible endpoint available via GET request. The following metrics are made available. -Processes -^^^^^^^^^ +System +^^^^^^ ``compiler_process_spawns_total`` **Counter.** Total number of compiler processes spawned. @@ -85,6 +85,9 @@ Processes ``compiler_processes_current`` **Gauge.** Current number of active compiler processes. +``branches_current`` + **Gauge.** Current number of branches. + Backend connections and performance ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``backend_connections_total`` @@ -117,27 +120,75 @@ Client connections ``client_connections_idle_total`` **Counter.** Total number of forcefully closed idle client connections. -Query compilation -^^^^^^^^^^^^^^^^^ +``client_connection_duration`` + **Histogram.** Time a client connection is open. + +Queries and compilation +^^^^^^^^^^^^^^^^^^^^^^^ ``edgeql_query_compilations_total`` **Counter.** Number of compiled/cached queries or scripts since instance startup. A query is compiled and then cached on first use, increasing the ``path="compiler"`` parameter. Subsequent uses of the same query only use the cache, thus only increasing the ``path="cache"`` parameter. - - ``edgeql_query_compilation_duration`` + Deprecated in favor of ``query_compilation_duration[interface="edgeql"]``. + **Histogram.** Time it takes to compile an EdgeQL query or script, in seconds. +``graphql_query_compilations_total`` + **Counter.** Number of compiled/cached GraphQL queries since instance + startup. A query is compiled and then cached on first use, increasing the + ``path="compiler"`` parameter. Subsequent uses of the same query only use + the cache, thus only increasing the ``path="cache"`` parameter. + +``sql_queries_total`` + **Counter.** Number of SQL queries since instance startup. + +``sql_compilations_total`` + **Counter.** Number of SQL compilations since instance startup. + +``query_compilation_duration`` + **Histogram.** Time it takes to compile a query or script, in seconds. + +``queries_per_connection`` + **Histogram.** Number of queries per connection. + +``query_size`` + **Histogram.** Number of bytes in a query, where the label + ``interface=edgeql`` means the size of an EdgeQL query, ``=graphql`` for a + GraphQL query, ``=sql`` for a readonly SQL query from the user, and + ``=compiled`` for a backend SQL query compiled and issued by the server. + +Auth Extension +^^^^^^^^^^^^^^ + +``auth_api_calls_total`` + **Counter.** Number of API calls to the Auth extension. + +``auth_ui_renders_total`` + **Counter.** Number of UI pages rendered by the Auth extension. + +``auth_providers`` + **Histogram.** Number of Auth providers configured. + +``auth_successful_logins_total`` + **Counter.** Number of successful logins in the Auth extension. + Errors ^^^^^^ ``background_errors_total`` **Counter.** Number of unhandled errors in background server routines. +``transaction_serialization_errors_total`` + **Counter.** Number of transaction serialization errors. + +``connection_errors_total`` + **Counter.** Number of network connection errors. + .. _ref_reference_http_querying: Querying diff --git a/edb/common/prometheus.py b/edb/common/prometheus.py index fbabb6f2a9..f286806906 100644 --- a/edb/common/prometheus.py +++ b/edb/common/prometheus.py @@ -171,7 +171,7 @@ def new_labeled_gauge( /, *, unit: Unit | None = None, - labels: tuple[str], + labels: tuple[str, ...], ) -> LabeledGauge: gauge = LabeledGauge(self, name, desc, unit, labels=labels) self._add_metric(gauge) @@ -198,7 +198,7 @@ def new_labeled_histogram( *, unit: Unit | None = None, buckets: list[float] | None = None, - labels: tuple[str], + labels: tuple[str, ...], ) -> LabeledHistogram: hist = LabeledHistogram( self, name, desc, unit, buckets=buckets, labels=labels diff --git a/edb/graphql/extension.pyx b/edb/graphql/extension.pyx index f9045e819c..aabba5bab6 100644 --- a/edb/graphql/extension.pyx +++ b/edb/graphql/extension.pyx @@ -29,6 +29,7 @@ import cython import http import json import logging +import time import urllib.parse from graphql.language import lexer as gql_lexer @@ -37,7 +38,7 @@ from edb import _graphql_rewrite from edb import errors from edb.graphql import errors as gql_errors from edb.server.dbview cimport dbview -from edb.server import compiler +from edb.server import compiler, metrics from edb.server import defines as edbdef from edb.server.pgcon import errors as pgerrors from edb.server.protocol import execute @@ -96,6 +97,7 @@ async def handle_request( globals = None deprecated_globals = None query = None + query_bytes_len = 0 try: if request.method == b'POST': @@ -105,10 +107,12 @@ async def handle_request( raise TypeError( 'the body of the request must be a JSON object') query = body.get('query') + query_bytes_len = len(query.encode('utf-8')) operation_name = body.get('operationName') variables = body.get('variables') deprecated_globals = body.get('globals') elif request.content_type == 'application/graphql': + query_bytes_len = len(request.body) query = request.body.decode('utf-8') else: raise TypeError( @@ -122,6 +126,7 @@ async def handle_request( query = qs.get('query') if query is not None: query = query[0] + query_bytes_len = len(query.encode('utf-8')) operation_name = qs.get('operationName') if operation_name is not None: @@ -148,6 +153,9 @@ async def handle_request( if not query: raise TypeError('invalid GraphQL request: query is missing') + metrics.query_size.observe( + query_bytes_len, tenant.get_instance_name(), 'graphql' + ) if (operation_name is not None and not isinstance(operation_name, str)): @@ -234,21 +242,28 @@ async def compile( ): server = tenant.server compiler_pool = server.get_compiler_pool() - return await compiler_pool.compile_graphql( - db.name, - db.user_schema_pickle, - tenant.get_global_schema_pickle(), - db.reflection_cache, - db.db_config, - db._index.get_compilation_system_config(), - query, - tokens, - substitutions, - operation_name, - variables, - client_id=tenant.client_id, - ) - + started_at = time.monotonic() + try: + return await compiler_pool.compile_graphql( + db.name, + db.user_schema_pickle, + tenant.get_global_schema_pickle(), + db.reflection_cache, + db.db_config, + db._index.get_compilation_system_config(), + query, + tokens, + substitutions, + operation_name, + variables, + client_id=tenant.client_id, + ) + finally: + metrics.query_compilation_duration.observe( + time.monotonic() - started_at, + tenant.get_instance_name(), + "graphql", + ) async def _execute(db, tenant, query, operation_name, variables, globals): dbver = db.dbver @@ -348,11 +363,17 @@ async def _execute(db, tenant, query, operation_name, variables, globals): query_cache[cache_key2] = qug, gql_op else: query_cache[cache_key] = qug, gql_op + metrics.graphql_query_compilations.inc( + 1.0, tenant.get_instance_name(), 'compiler' + ) else: qug, gql_op = entry # This is at least the second time this query is used # and it's safe to cache. use_prep_stmt = True + metrics.graphql_query_compilations.inc( + 1.0, tenant.get_instance_name(), 'cache' + ) compiled = dbview.CompiledQuery(query_unit_group=qug) diff --git a/edb/server/dbview/dbview.pxd b/edb/server/dbview/dbview.pxd index badcaed1e9..2008f0a4ba 100644 --- a/edb/server/dbview/dbview.pxd +++ b/edb/server/dbview/dbview.pxd @@ -58,6 +58,7 @@ cdef class DatabaseIndex: object _cached_compiler_args cdef invalidate_caches(self) + cdef inline set_current_branches(self) cdef class Database: @@ -87,6 +88,7 @@ cdef class Database: cdef _cache_compiled_query(self, key, compiled) cdef _new_view(self, query_cache, protocol_version) cdef _remove_view(self, view) + cdef _observe_auth_ext_config(self) cdef _update_backend_ids(self, new_types) cdef _set_and_signal_new_user_schema( self, diff --git a/edb/server/dbview/dbview.pyx b/edb/server/dbview/dbview.pyx index 0551238d8c..e667be6808 100644 --- a/edb/server/dbview/dbview.pyx +++ b/edb/server/dbview/dbview.pyx @@ -138,6 +138,7 @@ cdef class Database: self.reflection_cache = reflection_cache self.backend_ids = backend_ids self.extensions = extensions + self._observe_auth_ext_config() @property def server(self): @@ -176,8 +177,27 @@ cdef class Database: self.reflection_cache = reflection_cache if db_config is not None: self.db_config = db_config + self._observe_auth_ext_config() self._invalidate_caches() + cdef _observe_auth_ext_config(self): + key = "ext::auth::AuthConfig::providers" + if ( + self.db_config is not None and + self.user_config_spec is not None and + key in self.user_config_spec + ): + providers = config.lookup( + key, + self.db_config, + spec=self.user_config_spec, + ) + metrics.auth_providers.set( + len(providers), + self.tenant.get_instance_name(), + self.name, + ) + cdef _update_backend_ids(self, new_types): self.backend_ids.update(new_types) @@ -1192,6 +1212,11 @@ cdef class DatabaseConnectionView: time.monotonic() - started_at, self.tenant.get_instance_name(), ) + metrics.query_compilation_duration.observe( + time.monotonic() - started_at, + self.tenant.get_instance_name(), + "edgeql", + ) unit_group, self._last_comp_state, self._last_comp_state_id = result @@ -1352,10 +1377,22 @@ cdef class DatabaseIndex: ext_config_settings=ext_config_settings, ) self._dbs[dbname] = db + self.set_current_branches() return db def unregister_db(self, dbname): self._dbs.pop(dbname) + self.set_current_branches() + + cdef inline set_current_branches(self): + metrics.current_branches.set( + sum( + 1 + for dbname in self._dbs + if dbname != defines.EDGEDB_SYSTEM_DB + ), + self._tenant.get_instance_name(), + ) def iter_dbs(self): return iter(self._dbs.values()) diff --git a/edb/server/metrics.py b/edb/server/metrics.py index 8488665a90..c83ba70759 100644 --- a/edb/server/metrics.py +++ b/edb/server/metrics.py @@ -32,6 +32,12 @@ 'Current number of active compiler processes.' ) +current_branches = registry.new_labeled_gauge( + 'branches_current', + 'Current number of branches.', + labels=('tenant',), +) + total_backend_connections = registry.new_labeled_counter( 'backend_connections_total', 'Total number of backend connections established.', @@ -88,6 +94,13 @@ labels=('tenant',), ) +client_connection_duration = registry.new_labeled_histogram( + 'client_connection_duration', + 'Time a client connection is open.', + unit=prom.Unit.SECONDS, + labels=('tenant', 'interface'), +) + edgeql_query_compilations = registry.new_labeled_counter( 'edgeql_query_compilations_total', 'Number of compiled/cached queries or scripts.', @@ -101,14 +114,88 @@ labels=('tenant',), ) +graphql_query_compilations = registry.new_labeled_counter( + 'graphql_query_compilations_total', + 'Number of compiled/cached GraphQL queries.', + labels=('tenant', 'path') +) + +query_compilation_duration = registry.new_labeled_histogram( + 'query_compilation_duration', + 'Time it takes to compile a query or script.', + unit=prom.Unit.SECONDS, + labels=('tenant', 'interface'), +) + +sql_queries = registry.new_labeled_counter( + 'sql_queries_total', + 'Number of SQL queries.', + labels=('tenant',) +) + +sql_compilations = registry.new_labeled_counter( + 'sql_compilations_total', + 'Number of SQL compilations.', + labels=('tenant',) +) + +queries_per_connection = registry.new_labeled_histogram( + 'queries_per_connection', + 'Number of queries per connection.', + labels=('tenant', 'interface'), +) + +query_size = registry.new_labeled_histogram( + 'query_size', + 'The size of a query.', + unit=prom.Unit.BYTES, + labels=('tenant', 'interface'), +) + background_errors = registry.new_labeled_counter( 'background_errors_total', 'Number of unhandled errors in background server routines.', labels=('tenant', 'source') ) +transaction_serialization_errors = registry.new_labeled_counter( + 'transaction_serialization_errors_total', + 'Number of transaction serialization errors.', + labels=('tenant',) +) + +connection_errors = registry.new_labeled_counter( + 'connection_errors_total', + 'Number of network connection errors.', + labels=('tenant',) +) + ha_events_total = registry.new_labeled_counter( "ha_events_total", "Number of each high-availability watch event.", labels=("dsn", "event"), ) + +auth_api_calls = registry.new_labeled_counter( + "auth_api_calls_total", + "Number of API calls to the Auth extension.", + labels=("tenant",), +) + +auth_ui_renders = registry.new_labeled_counter( + "auth_ui_renders_total", + "Number of UI pages rendered by the Auth extension.", + labels=("tenant",), +) + +auth_providers = registry.new_labeled_gauge( + 'auth_providers', + 'Number of Auth providers configured.', + labels=('tenant', 'branch'), +) + +auth_successful_logins = registry.new_labeled_counter( + "auth_successful_logins_total", + "Number of successful logins in the Auth extension.", + labels=("tenant",), +) diff --git a/edb/server/pgcon/pgcon.pyx b/edb/server/pgcon/pgcon.pyx index 2089a0431f..68a855ec18 100644 --- a/edb/server/pgcon/pgcon.pyx +++ b/edb/server/pgcon/pgcon.pyx @@ -968,6 +968,11 @@ cdef class PGConnection: out.write_buffer(buf.end_message()) parse_array[idx] = True parsed.add(stmt_name) + metrics.query_size.observe( + len(query_unit.sql[0]), + self.get_tenant_label(), + 'compiled', + ) buf = WriteBuffer.new_message(b'B') buf.write_bytestring(b'') # portal name @@ -987,6 +992,9 @@ cdef class PGConnection: buf.write_bytestring(sql) buf.write_int16(0) out.write_buffer(buf.end_message()) + metrics.query_size.observe( + len(sql), self.get_tenant_label(), 'compiled' + ) buf = WriteBuffer.new_message(b'B') buf.write_bytestring(b'') # portal name @@ -1233,6 +1241,9 @@ cdef class PGConnection: buf.write_int16(0) out.write_buffer(buf.end_message()) i += 1 + metrics.query_size.observe( + len(sql), self.get_tenant_label(), 'compiled' + ) else: if len(query.sql) != 1: raise errors.InternalServerError( @@ -1244,6 +1255,9 @@ cdef class PGConnection: buf.write_bytestring(query.sql[0]) buf.write_int16(0) out.write_buffer(buf.end_message()) + metrics.query_size.observe( + len(query.sql[0]), self.get_tenant_label(), 'compiled' + ) assert bind_data is not None if stmt_name == b'' and msgs_num > 1: @@ -1678,6 +1692,9 @@ cdef class PGConnection: msg_buf.write_bytestring(sql_text) msg_buf.write_bytes(data) buf.write_buffer(msg_buf.end_message()) + metrics.query_size.observe( + len(sql_text), self.get_tenant_label(), 'compiled' + ) if self.debug: self.debug_print( 'Parse', action.stmt_name, sql_text, data diff --git a/edb/server/protocol/auth_ext/http.py b/edb/server/protocol/auth_ext/http.py index e051bcd0cf..2f7e5ab8c2 100644 --- a/edb/server/protocol/auth_ext/http.py +++ b/edb/server/protocol/auth_ext/http.py @@ -38,7 +38,7 @@ from edb.common import debug from edb.common import markup from edb.ir import statypes -from edb.server import tenant as edbtenant +from edb.server import tenant as edbtenant, metrics from edb.server.config.types import CompositeConfigType from . import ( @@ -1660,6 +1660,9 @@ def _make_session_token(self, identity_id: str) -> str: claims=claims, ) session_token.make_signed_token(signing_key) + metrics.auth_successful_logins.inc( + 1.0, self.tenant.get_instance_name() + ) return session_token.serialize() def _get_from_claims(self, state: str, key: str) -> str: diff --git a/edb/server/protocol/binary.pyx b/edb/server/protocol/binary.pyx index e578f37974..689cec91b4 100644 --- a/edb/server/protocol/binary.pyx +++ b/edb/server/protocol/binary.pyx @@ -148,6 +148,7 @@ cdef inline bint parse_boolean(value: bytes, header: str): cdef class EdgeConnection(frontend.FrontendConnection): + interface = "edgeql" def __init__( self, @@ -790,6 +791,10 @@ cdef class EdgeConnection(frontend.FrontendConnection): if not query: raise errors.BinaryProtocolError('empty query') + metrics.query_size.observe( + len(query), self.get_tenant_label(), 'edgeql' + ) + _dbview = self.get_dbview() state_tid = self.buffer.read_bytes(16) state_data = self.buffer.read_len_prefixed_bytes() @@ -905,6 +910,8 @@ cdef class EdgeConnection(frontend.FrontendConnection): extra_blobs=query_req.source.extra_blobs(), ) + self._query_count += 1 + # Clear the _last_anon_compiled so that the next Execute - if # identical - will always lookup in the cache and honor the # `cacheable` flag to compile the query again. @@ -1138,6 +1145,10 @@ cdef class EdgeConnection(frontend.FrontendConnection): fields = {} if isinstance(exc, errors.EdgeDBError): fields.update(exc._attrs) + if isinstance(exc, errors.TransactionSerializationError): + metrics.transaction_serialization_errors.inc( + 1.0, self.get_tenant_label() + ) try: formatted_error = exc.__formatted_error__ @@ -1803,6 +1814,7 @@ def new_edge_connection( auth_data: bytes = b'', protocol_version: edbdef.ProtocolVersion = edbdef.CURRENT_PROTOCOL, conn_params: dict[str, str] | None = None, + connection_made_at: float | None = None, ): return EdgeConnection( server, @@ -1813,6 +1825,7 @@ def new_edge_connection( auth_data=auth_data, protocol_version=protocol_version, conn_params=conn_params, + connection_made_at=connection_made_at, ) diff --git a/edb/server/protocol/execute.pyx b/edb/server/protocol/execute.pyx index 02f491bdde..78baf73034 100644 --- a/edb/server/protocol/execute.pyx +++ b/edb/server/protocol/execute.pyx @@ -42,6 +42,7 @@ from edb.edgeql import qltypes from edb.server import compiler from edb.server import config from edb.server import defines as edbdef +from edb.server import metrics from edb.server.compiler import errormech from edb.server.compiler cimport rpc from edb.server.dbview cimport dbview @@ -694,6 +695,10 @@ async def parse_execute_json( query_cache=query_cache_enabled, protocol_version=edbdef.CURRENT_PROTOCOL, ) + if use_metrics: + metrics.query_size.observe( + len(query.encode('utf-8')), tenant.get_instance_name(), 'edgeql' + ) query_req = rpc.CompilationRequest( db.server.compilation_config_serializer diff --git a/edb/server/protocol/frontend.pxd b/edb/server/protocol/frontend.pxd index 36c2e2dd0d..47b13f7563 100644 --- a/edb/server/protocol/frontend.pxd +++ b/edb/server/protocol/frontend.pxd @@ -46,6 +46,8 @@ cdef class FrontendConnection(AbstractFrontendConnection): object _transport WriteBuffer _write_buf object _write_waiter + object connection_made_at + int _query_count ReadBuffer buffer object _msg_take_waiter diff --git a/edb/server/protocol/frontend.pyx b/edb/server/protocol/frontend.pyx index 9713084722..1d7829c4de 100644 --- a/edb/server/protocol/frontend.pyx +++ b/edb/server/protocol/frontend.pyx @@ -26,7 +26,7 @@ from edgedb import scram from edb import errors from edb.common import debug -from edb.server import args as srvargs +from edb.server import args as srvargs, metrics from edb.server.pgcon import errors as pgerror from . cimport auth_helpers @@ -46,6 +46,7 @@ cdef class AbstractFrontendConnection: cdef class FrontendConnection(AbstractFrontendConnection): + interface = "frontend" def __init__( self, @@ -55,6 +56,7 @@ cdef class FrontendConnection(AbstractFrontendConnection): passive: bool, transport: srvargs.ServerConnTransport, external_auth: bool, + connection_made_at: float | None = None, ): self._id = server.on_binary_client_created() self.server = server @@ -66,6 +68,8 @@ cdef class FrontendConnection(AbstractFrontendConnection): self._pinned_pgcon_in_tx = False self._get_pgcon_cc = 0 + self.connection_made_at = connection_made_at + self._query_count = 0 self._transport = None self._write_buf = None self._write_waiter = None @@ -405,7 +409,12 @@ cdef class FrontendConnection(AbstractFrontendConnection): # in this situation we just silently exit. pass - except (ConnectionError, pgerror.BackendQueryCancelledError): + except ConnectionError: + metrics.connection_errors.inc( + 1.0, self.get_tenant_label(), + ) + + except pgerror.BackendQueryCancelledError: pass except Exception as ex: @@ -491,6 +500,20 @@ cdef class FrontendConnection(AbstractFrontendConnection): # 3. We can interrupt some operations like auth with a CancelledError. # Again, those operations don't mutate global state. + if self.connection_made_at is not None: + tenant_label = self.get_tenant_label() + metrics.client_connection_duration.observe( + time.monotonic() - self.connection_made_at, + tenant_label, + self.interface, + ) + if self.authed: + metrics.queries_per_connection.observe( + self._query_count, tenant_label, self.interface + ) + if isinstance(exc, ConnectionError): + metrics.connection_errors.inc(1.0, tenant_label) + if (self._msg_take_waiter is not None and not self._msg_take_waiter.done()): # We're parsing the protocol. We can abort that. diff --git a/edb/server/protocol/pg_ext.pyx b/edb/server/protocol/pg_ext.pyx index 34e0ff8fd0..13594afe7b 100644 --- a/edb/server/protocol/pg_ext.pyx +++ b/edb/server/protocol/pg_ext.pyx @@ -27,6 +27,7 @@ import hashlib import json import os import sys +import time from collections import deque cimport cython @@ -37,7 +38,7 @@ from edb import errors from edb.common import debug from edb.pgsql.parser import exceptions as parser_errors from edb.server import args as srvargs -from edb.server import defines +from edb.server import defines, metrics from edb.server.compiler import dbstate from edb.server.pgcon import errors as pgerror from edb.server.pgcon.pgcon cimport PGAction, PGMessage @@ -340,6 +341,8 @@ cdef class ConnectionView: cdef class PgConnection(frontend.FrontendConnection): + interface = "sql" + def __init__(self, server, sslctx, endpoint_security, **kwargs): super().__init__(server, None, **kwargs) self._dbview = ConnectionView() @@ -393,7 +396,10 @@ cdef class PgConnection(frontend.FrontendConnection): buf = WriteBuffer.new_message(b'E') if isinstance(exc, pgerror.BackendError): - pass + if exc.code_is(pgerror.ERROR_SERIALIZATION_FAILURE): + metrics.transaction_serialization_errors.inc( + 1.0, self.get_tenant_label() + ) elif isinstance(exc, parser_errors.PSqlUnsupportedError): exc = pgerror.FeatureNotSupported(str(exc)) elif isinstance(exc, parser_errors.PSqlParseError): @@ -423,6 +429,10 @@ cdef class PgConnection(frontend.FrontendConnection): str(exc), **args, ) + if isinstance(exc, errors.TransactionSerializationError): + metrics.transaction_serialization_errors.inc( + 1.0, self.get_tenant_label() + ) else: exc = pgerror.new( pgerror.ERROR_INTERNAL_ERROR, @@ -806,11 +816,16 @@ cdef class PgConnection(frontend.FrontendConnection): elif mtype == b'Q': # Query try: - query_str = self.buffer.read_null_str().decode("utf8") + query = self.buffer.read_null_str() + metrics.query_size.observe( + len(query), self.get_tenant_label(), 'sql' + ) + query_str = query.decode("utf8") self.buffer.finish_message() if self.debug: self.debug_print("Query", query_str) actions = await self.simple_query(query_str) + del query_str, query except Exception as ex: self.write_error(ex) self.write(self.ready_for_query()) @@ -882,6 +897,10 @@ cdef class PgConnection(frontend.FrontendConnection): dbv = self._dbview query_units = await self.compile(query_str, dbv) already_in_implicit_tx = dbv._in_tx_implicit + metrics.sql_queries.inc( + len(query_units), self.tenant.get_instance_name() + ) + self._query_count += len(query_units) if not already_in_implicit_tx: actions.append(PGMessage(PGAction.START_IMPLICIT_TX)) @@ -991,6 +1010,9 @@ cdef class PgConnection(frontend.FrontendConnection): data = self.buffer.consume_message() if self.debug: self.debug_print("Parse", repr(stmt_name), query_str, data) + metrics.query_size.observe( + len(query_bytes), self.get_tenant_label(), 'sql' + ) with managed_error(): if ( @@ -1088,6 +1110,8 @@ cdef class PgConnection(frontend.FrontendConnection): if self.debug: self.debug_print("Execute", repr(portal_name), max_rows) + metrics.sql_queries.inc(1.0, self.tenant.get_instance_name()) + self._query_count += 1 with managed_error(): unit = dbv.find_portal(portal_name) actions.append( @@ -1433,21 +1457,32 @@ cdef class PgConnection(frontend.FrontendConnection): # the next identical query could get recompiled on the new schema. schema_version = self.database.schema_version compiler_pool = self.server.get_compiler_pool() - result = await compiler_pool.compile_sql( - self.dbname, - self.database.user_schema_pickle, - self.database._index._global_schema_pickle, - self.database.reflection_cache, - self.database.db_config, - self.database._index.get_compilation_system_config(), - query_str, - dbv.fe_transaction_state(), - self.sql_prepared_stmts_map, - self.dbname, - self.username, - client_id=self.tenant.client_id, - ) + started_at = time.monotonic() + try: + result = await compiler_pool.compile_sql( + self.dbname, + self.database.user_schema_pickle, + self.database._index._global_schema_pickle, + self.database.reflection_cache, + self.database.db_config, + self.database._index.get_compilation_system_config(), + query_str, + dbv.fe_transaction_state(), + self.sql_prepared_stmts_map, + self.dbname, + self.username, + client_id=self.tenant.client_id, + ) + finally: + metrics.query_compilation_duration.observe( + time.monotonic() - started_at, + self.tenant.get_instance_name(), + "sql", + ) self.database.cache_compiled_sql(key, result, schema_version) + metrics.sql_compilations.inc( + len(result), self.tenant.get_instance_name() + ) if self.debug: self.debug_print("Compile result", result) return result @@ -1491,7 +1526,7 @@ cdef class PgConnection(frontend.FrontendConnection): return qu -def new_pg_connection(server, sslctx, endpoint_security): +def new_pg_connection(server, sslctx, endpoint_security, connection_made_at): return PgConnection( server, sslctx, @@ -1499,4 +1534,5 @@ def new_pg_connection(server, sslctx, endpoint_security): passive=False, transport=srvargs.ServerConnTransport.TCP_PG, external_auth=False, + connection_made_at=connection_made_at, ) diff --git a/edb/server/protocol/protocol.pxd b/edb/server/protocol/protocol.pxd index 56a7ee6277..c4ab0b376f 100644 --- a/edb/server/protocol/protocol.pxd +++ b/edb/server/protocol/protocol.pxd @@ -67,6 +67,7 @@ cdef class HttpProtocol: object binary_endpoint_security object http_endpoint_security object tenant + object connection_made_at HttpRequest current_request diff --git a/edb/server/protocol/protocol.pyx b/edb/server/protocol/protocol.pyx index 0584088b07..5fe3014e27 100644 --- a/edb/server/protocol/protocol.pyx +++ b/edb/server/protocol/protocol.pyx @@ -26,6 +26,7 @@ import http import http.cookies import re import ssl +import time import urllib.parse import httptools @@ -37,7 +38,7 @@ from edb.common import markup from edb.graphql import extension as graphql_ext from edb.server import args as srvargs -from edb.server import config +from edb.server import config, metrics as srv_metrics from edb.server.protocol cimport binary from edb.server.protocol import binary from edb.server.protocol import pg_ext @@ -122,12 +123,24 @@ cdef class HttpProtocol: self.is_tls = False def connection_made(self, transport): + self.connection_made_at = time.monotonic() self.transport = transport def connection_lost(self, exc): + srv_metrics.client_connection_duration.observe( + time.monotonic() - self.connection_made_at, + self.get_tenant_label(), + "http", + ) self.transport = None self.unprocessed = None + def get_tenant_label(self): + if self.tenant is None: + return "unknown" + else: + return self.tenant.get_instance_name() + def pause_writing(self): pass @@ -192,6 +205,7 @@ cdef class HttpProtocol: self.server, self.sslctx_pgext, self.binary_endpoint_security, + connection_made_at=self.connection_made_at, ) self.transport.set_protocol(pg_ext_conn) pg_ext_conn.connection_made(self.transport) @@ -352,6 +366,7 @@ cdef class HttpProtocol: self.server, self.tenant, external_auth=self.external_auth, + connection_made_at=self.connection_made_at, ) self.transport.set_protocol(binproto) binproto.connection_made(self.transport) @@ -643,6 +658,16 @@ cdef class HttpProtocol: tenant=self.tenant, ) await handler.handle_request(request, response, args) + if args: + if args[0] == 'ui': + if not (len(args) > 1 and args[1] == "_static"): + srv_metrics.auth_ui_renders.inc( + 1.0, self.get_tenant_label() + ) + else: + srv_metrics.auth_api_calls.inc( + 1.0, self.get_tenant_label() + ) elif route == 'auth': if await self._handle_cors(