Skip to content

Commit

Permalink
Add more metrics (#6447)
Browse files Browse the repository at this point in the history
* branches_current
* graphql_query_compilations_total
* sql_queries_total and sql_compilations_total
* query_compilation_duration
* client_connection_duration
* queries_per_connection
* transaction_serialization_errors_total
* connection_errors_total
* query_size
* auth_api_calls_total and auth_api_calls_total
* auth_providers
* auth_successful_logins_total
  • Loading branch information
fantix committed Mar 8, 2024
1 parent dca360a commit df6a91a
Show file tree
Hide file tree
Showing 15 changed files with 369 additions and 46 deletions.
63 changes: 57 additions & 6 deletions docs/reference/http.rst
Expand Up @@ -76,15 +76,18 @@ Retrieve instance metrics.
All EdgeDB instances expose a Prometheus-compatible endpoint available via GET
request. The following metrics are made available.

Processes
^^^^^^^^^
System
^^^^^^

``compiler_process_spawns_total``
**Counter.** Total number of compiler processes spawned.

``compiler_processes_current``
**Gauge.** Current number of active compiler processes.

``branches_current``
**Gauge.** Current number of branches.

Backend connections and performance
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
``backend_connections_total``
Expand Down Expand Up @@ -117,27 +120,75 @@ Client connections
``client_connections_idle_total``
**Counter.** Total number of forcefully closed idle client connections.

Query compilation
^^^^^^^^^^^^^^^^^
``client_connection_duration``
**Histogram.** Time a client connection is open.

Queries and compilation
^^^^^^^^^^^^^^^^^^^^^^^

``edgeql_query_compilations_total``
**Counter.** Number of compiled/cached queries or scripts since instance
startup. A query is compiled and then cached on first use, increasing the
``path="compiler"`` parameter. Subsequent uses of the same query only use
the cache, thus only increasing the ``path="cache"`` parameter.



``edgeql_query_compilation_duration``
Deprecated in favor of ``query_compilation_duration[interface="edgeql"]``.

**Histogram.** Time it takes to compile an EdgeQL query or script, in
seconds.

``graphql_query_compilations_total``
**Counter.** Number of compiled/cached GraphQL queries since instance
startup. A query is compiled and then cached on first use, increasing the
``path="compiler"`` parameter. Subsequent uses of the same query only use
the cache, thus only increasing the ``path="cache"`` parameter.

``sql_queries_total``
**Counter.** Number of SQL queries since instance startup.

``sql_compilations_total``
**Counter.** Number of SQL compilations since instance startup.

``query_compilation_duration``
**Histogram.** Time it takes to compile a query or script, in seconds.

``queries_per_connection``
**Histogram.** Number of queries per connection.

``query_size``
**Histogram.** Number of bytes in a query, where the label
``interface=edgeql`` means the size of an EdgeQL query, ``=graphql`` for a
GraphQL query, ``=sql`` for a readonly SQL query from the user, and
``=compiled`` for a backend SQL query compiled and issued by the server.

Auth Extension
^^^^^^^^^^^^^^

``auth_api_calls_total``
**Counter.** Number of API calls to the Auth extension.

``auth_ui_renders_total``
**Counter.** Number of UI pages rendered by the Auth extension.

``auth_providers``
**Histogram.** Number of Auth providers configured.

``auth_successful_logins_total``
**Counter.** Number of successful logins in the Auth extension.

Errors
^^^^^^

``background_errors_total``
**Counter.** Number of unhandled errors in background server routines.

``transaction_serialization_errors_total``
**Counter.** Number of transaction serialization errors.

``connection_errors_total``
**Counter.** Number of network connection errors.

.. _ref_reference_http_querying:

Querying
Expand Down
4 changes: 2 additions & 2 deletions edb/common/prometheus.py
Expand Up @@ -171,7 +171,7 @@ def new_labeled_gauge(
/,
*,
unit: Unit | None = None,
labels: tuple[str],
labels: tuple[str, ...],
) -> LabeledGauge:
gauge = LabeledGauge(self, name, desc, unit, labels=labels)
self._add_metric(gauge)
Expand All @@ -198,7 +198,7 @@ def new_labeled_histogram(
*,
unit: Unit | None = None,
buckets: list[float] | None = None,
labels: tuple[str],
labels: tuple[str, ...],
) -> LabeledHistogram:
hist = LabeledHistogram(
self, name, desc, unit, buckets=buckets, labels=labels
Expand Down
53 changes: 37 additions & 16 deletions edb/graphql/extension.pyx
Expand Up @@ -29,6 +29,7 @@ import cython
import http
import json
import logging
import time
import urllib.parse

from graphql.language import lexer as gql_lexer
Expand All @@ -37,7 +38,7 @@ from edb import _graphql_rewrite
from edb import errors
from edb.graphql import errors as gql_errors
from edb.server.dbview cimport dbview
from edb.server import compiler
from edb.server import compiler, metrics
from edb.server import defines as edbdef
from edb.server.pgcon import errors as pgerrors
from edb.server.protocol import execute
Expand Down Expand Up @@ -96,6 +97,7 @@ async def handle_request(
globals = None
deprecated_globals = None
query = None
query_bytes_len = 0

try:
if request.method == b'POST':
Expand All @@ -105,10 +107,12 @@ async def handle_request(
raise TypeError(
'the body of the request must be a JSON object')
query = body.get('query')
query_bytes_len = len(query.encode('utf-8'))
operation_name = body.get('operationName')
variables = body.get('variables')
deprecated_globals = body.get('globals')
elif request.content_type == 'application/graphql':
query_bytes_len = len(request.body)
query = request.body.decode('utf-8')
else:
raise TypeError(
Expand All @@ -122,6 +126,7 @@ async def handle_request(
query = qs.get('query')
if query is not None:
query = query[0]
query_bytes_len = len(query.encode('utf-8'))

operation_name = qs.get('operationName')
if operation_name is not None:
Expand All @@ -148,6 +153,9 @@ async def handle_request(

if not query:
raise TypeError('invalid GraphQL request: query is missing')
metrics.query_size.observe(
query_bytes_len, tenant.get_instance_name(), 'graphql'
)

if (operation_name is not None and
not isinstance(operation_name, str)):
Expand Down Expand Up @@ -234,21 +242,28 @@ async def compile(
):
server = tenant.server
compiler_pool = server.get_compiler_pool()
return await compiler_pool.compile_graphql(
db.name,
db.user_schema_pickle,
tenant.get_global_schema_pickle(),
db.reflection_cache,
db.db_config,
db._index.get_compilation_system_config(),
query,
tokens,
substitutions,
operation_name,
variables,
client_id=tenant.client_id,
)

started_at = time.monotonic()
try:
return await compiler_pool.compile_graphql(
db.name,
db.user_schema_pickle,
tenant.get_global_schema_pickle(),
db.reflection_cache,
db.db_config,
db._index.get_compilation_system_config(),
query,
tokens,
substitutions,
operation_name,
variables,
client_id=tenant.client_id,
)
finally:
metrics.query_compilation_duration.observe(
time.monotonic() - started_at,
tenant.get_instance_name(),
"graphql",
)

async def _execute(db, tenant, query, operation_name, variables, globals):
dbver = db.dbver
Expand Down Expand Up @@ -348,11 +363,17 @@ async def _execute(db, tenant, query, operation_name, variables, globals):
query_cache[cache_key2] = qug, gql_op
else:
query_cache[cache_key] = qug, gql_op
metrics.graphql_query_compilations.inc(
1.0, tenant.get_instance_name(), 'compiler'
)
else:
qug, gql_op = entry
# This is at least the second time this query is used
# and it's safe to cache.
use_prep_stmt = True
metrics.graphql_query_compilations.inc(
1.0, tenant.get_instance_name(), 'cache'
)

compiled = dbview.CompiledQuery(query_unit_group=qug)

Expand Down
2 changes: 2 additions & 0 deletions edb/server/dbview/dbview.pxd
Expand Up @@ -58,6 +58,7 @@ cdef class DatabaseIndex:
object _cached_compiler_args

cdef invalidate_caches(self)
cdef inline set_current_branches(self)


cdef class Database:
Expand Down Expand Up @@ -87,6 +88,7 @@ cdef class Database:
cdef _cache_compiled_query(self, key, compiled)
cdef _new_view(self, query_cache, protocol_version)
cdef _remove_view(self, view)
cdef _observe_auth_ext_config(self)
cdef _update_backend_ids(self, new_types)
cdef _set_and_signal_new_user_schema(
self,
Expand Down
37 changes: 37 additions & 0 deletions edb/server/dbview/dbview.pyx
Expand Up @@ -138,6 +138,7 @@ cdef class Database:
self.reflection_cache = reflection_cache
self.backend_ids = backend_ids
self.extensions = extensions
self._observe_auth_ext_config()

@property
def server(self):
Expand Down Expand Up @@ -176,8 +177,27 @@ cdef class Database:
self.reflection_cache = reflection_cache
if db_config is not None:
self.db_config = db_config
self._observe_auth_ext_config()
self._invalidate_caches()

cdef _observe_auth_ext_config(self):
key = "ext::auth::AuthConfig::providers"
if (
self.db_config is not None and
self.user_config_spec is not None and
key in self.user_config_spec
):
providers = config.lookup(
key,
self.db_config,
spec=self.user_config_spec,
)
metrics.auth_providers.set(
len(providers),
self.tenant.get_instance_name(),
self.name,
)

cdef _update_backend_ids(self, new_types):
self.backend_ids.update(new_types)

Expand Down Expand Up @@ -1192,6 +1212,11 @@ cdef class DatabaseConnectionView:
time.monotonic() - started_at,
self.tenant.get_instance_name(),
)
metrics.query_compilation_duration.observe(
time.monotonic() - started_at,
self.tenant.get_instance_name(),
"edgeql",
)

unit_group, self._last_comp_state, self._last_comp_state_id = result

Expand Down Expand Up @@ -1352,10 +1377,22 @@ cdef class DatabaseIndex:
ext_config_settings=ext_config_settings,
)
self._dbs[dbname] = db
self.set_current_branches()
return db

def unregister_db(self, dbname):
self._dbs.pop(dbname)
self.set_current_branches()

cdef inline set_current_branches(self):
metrics.current_branches.set(
sum(
1
for dbname in self._dbs
if dbname != defines.EDGEDB_SYSTEM_DB
),
self._tenant.get_instance_name(),
)

def iter_dbs(self):
return iter(self._dbs.values())
Expand Down

0 comments on commit df6a91a

Please sign in to comment.