Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bigquery_etl/cli/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ def _view_dependencies(artifact_files, sql_dir):
)
project, dataset, name = dependency_components

file_path = Path(view.path).parent.parent.parent / dataset / name
file_path = Path(sql_dir) / project / dataset / name

file_exists_for_dependency = False
for file in [VIEW_FILE, QUERY_FILE, QUERY_SCRIPT, MATERIALIZED_VIEW]:
Expand Down
24 changes: 24 additions & 0 deletions sql_generators/telemetry_health/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Telemetry Health

Generates queries for the Glean Health Scorecard, which monitors telemetry quality across Firefox applications.

Each query combines data from Firefox Desktop, Firefox Android (Fenix), and Firefox iOS into a single table with an `application` column for filtering. All queries use a 1% sample (`sample_id = 0`).

## Generated Queries

Generated tables are written to `monitoring_derived` with user-facing views in `monitoring`.

- `glean_errors_v1`: Counts Glean metrics (from metrics pings) with recording errors exceeding 1% of sampled clients per day
- `ping_latency_v1`: Reports latency percentiles (p95, median) for baseline pings: collection-to-submission, submission-to-ingestion, and collection-to-ingestion
- `ping_volume_p80_v1`: 80th percentile of baseline ping count per client per day
- `sequence_holes_v1`: Percentage of sampled clients with sequence number gaps in baseline pings within a single day

## Running the Generator

```bash
./bqetl generate telemetry_health
```

## Configuration

Application list and query settings are defined in `templates/templating.yaml`.
102 changes: 102 additions & 0 deletions sql_generators/telemetry_health/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""Telemetry health scorecard query generation."""

import os
from pathlib import Path

import click
import yaml
from jinja2 import Environment, FileSystemLoader

from bigquery_etl.cli.utils import use_cloud_function_option
from bigquery_etl.format_sql.formatter import reformat
from bigquery_etl.util.common import write_sql

FILE_PATH = Path(os.path.dirname(__file__))


def generate_queries(project, path, write_dir):
"""Generate telemetry health queries."""
with open(Path(path) / "templating.yaml", "r") as f:
template_config = yaml.safe_load(f) or {}

applications = template_config.get("applications", [])
application_names = template_config.get("application_names", {})

for query, args in template_config.get("queries", {}).items():
template_query_dir = FILE_PATH / "templates" / query
env = Environment(
loader=FileSystemLoader(template_query_dir),
keep_trailing_newline=True,
)

sql_template = env.get_template("query.sql")
metadata_template = env.get_template("metadata.yaml")

destination_dataset = args.get("destination_dataset", "monitoring_derived")

render_args = {
"project_id": project,
"applications": applications,
"application_names": application_names,
"destination_table": query,
**args,
}

write_sql(
write_dir / project,
f"{project}.{destination_dataset}.{query}",
"query.sql",
reformat(sql_template.render(**render_args)),
)

write_path = Path(write_dir) / project / destination_dataset / query
(write_path / "metadata.yaml").write_text(
metadata_template.render(**render_args)
)

schema_path = template_query_dir / "schema.yaml"
if schema_path.exists():
(write_path / "schema.yaml").write_text(schema_path.read_text())

# Generate user-facing view (strip _derived suffix and _v1 from name)
view_dataset = destination_dataset.replace("_derived", "")
view_name = query.rsplit("_v", 1)[0]
view_sql = f"""CREATE OR REPLACE VIEW
`{project}.{view_dataset}.{view_name}`
AS
SELECT
*
FROM
`{project}.{destination_dataset}.{query}`
"""
view_path = Path(write_dir) / project / view_dataset / view_name
view_path.mkdir(parents=True, exist_ok=True)
(view_path / "view.sql").write_text(view_sql)


@click.command("generate")
@click.option(
"--target-project",
"--target_project",
help="Which project the queries should be written to.",
default="moz-fx-data-shared-prod",
)
@click.option(
"--path",
help="Where query directories will be searched for.",
default="sql_generators/telemetry_health/templates",
required=False,
type=click.Path(file_okay=False),
)
@click.option(
"--output-dir",
"--output_dir",
help="The location to write to. Defaults to sql/.",
default=Path("sql"),
type=click.Path(file_okay=False),
)
@use_cloud_function_option
def generate(target_project, path, output_dir, use_cloud_function):
"""Generate the telemetry health queries."""
output_dir = Path(output_dir)
generate_queries(target_project, path, output_dir)
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
friendly_name: Telemetry Health Glean Errors
description: |
Counts the number of Glean metrics with recording errors that exceed 1% of clients per day.
Combines data from Firefox Desktop, Firefox Android, and Firefox iOS.
owners:
- [email protected]
labels:
incremental: true
schedule: daily
scheduling:
dag_name: bqetl_monitoring
bigquery:
time_partitioning:
type: day
field: submission_date
require_partition_filter: true
expiration_days: 775
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
-- Query for telemetry health glean errors across all applications
{% for app_id in applications %}
(
WITH sample AS (
SELECT
"{{ application_names[app_id] }}" AS application,
client_info.client_id,
normalized_channel,
DATE(submission_timestamp) AS submission_date,
metrics.labeled_counter.glean_error_invalid_value AS ev,
metrics.labeled_counter.glean_error_invalid_label AS el,
metrics.labeled_counter.glean_error_invalid_state AS es,
metrics.labeled_counter.glean_error_invalid_overflow AS eo
FROM
`{{ project_id }}.{{ app_id }}_stable.metrics_v1`
WHERE
sample_id = 0
AND DATE(submission_timestamp) = @submission_date
),
-- Denominator: distinct clients per app and day
app_day_totals AS (
SELECT
application,
submission_date,
normalized_channel,
COUNT(DISTINCT client_id) AS total_clients
FROM
sample
GROUP BY
application,
submission_date,
normalized_channel
),
-- Numerator per metric key: distinct clients with any error for that key on that day
metric_clients_by_day AS (
SELECT
s.application,
s.normalized_channel,
s.submission_date,
e.key AS metric_key,
COUNT(DISTINCT s.client_id) AS clients_with_error
FROM
sample AS s
JOIN
UNNEST(ARRAY_CONCAT(IFNULL(ev, []), IFNULL(el, []), IFNULL(es, []), IFNULL(eo, []))) AS e
WHERE
NOT STARTS_WITH(e.key, 'glean')
AND NOT STARTS_WITH(e.key, 'fog')
AND e.value > 0
GROUP BY
s.application,
s.submission_date,
s.normalized_channel,
metric_key
)
SELECT
m.application,
m.normalized_channel,
m.submission_date,
COUNTIF(SAFE_DIVIDE(m.clients_with_error, t.total_clients) > 0.01) AS num_metrics_over_1pct
FROM
metric_clients_by_day AS m
JOIN
app_day_totals AS t
USING (application, submission_date, normalized_channel)
GROUP BY
m.application,
m.submission_date,
m.normalized_channel
)
{% if not loop.last %}
UNION ALL
{% endif %}
{% endfor %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
fields:
- name: application
type: STRING
mode: NULLABLE
- name: normalized_channel
type: STRING
mode: NULLABLE
- name: submission_date
type: DATE
mode: NULLABLE
- name: num_metrics_over_1pct
type: INTEGER
mode: NULLABLE
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
friendly_name: Telemetry Health Ping Latency
description: |
Reports latency percentiles (p95, median) for collection-to-submission, submission-to-ingestion,
and collection-to-ingestion for telemetry pings per day.
Combines data from Firefox Desktop, Firefox Android, and Firefox iOS.
owners:
- [email protected]
labels:
incremental: true
schedule: daily
scheduling:
dag_name: bqetl_monitoring
bigquery:
time_partitioning:
type: day
field: submission_date
require_partition_filter: true
expiration_days: 775
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
-- Query for telemetry health ping latency across all applications
{% for app_id in applications %}
(
WITH sample AS (
SELECT
"{{ application_names[app_id] }}" AS application,
normalized_channel AS channel,
metadata.header.parsed_date,
ping_info.parsed_end_time,
submission_timestamp,
FROM
`{{ project_id }}.{{ app_id }}.baseline`
WHERE
sample_id = 0
AND DATE(submission_timestamp) = @submission_date
),
latency_quantiles AS (
SELECT
application,
channel,
DATE(submission_timestamp) AS submission_date,
APPROX_QUANTILES(
TIMESTAMP_DIFF(parsed_date, parsed_end_time, SECOND),
100
) AS collection_to_submission_latency,
APPROX_QUANTILES(
TIMESTAMP_DIFF(submission_timestamp, parsed_date, SECOND),
100
) AS submission_to_ingestion_latency,
APPROX_QUANTILES(
TIMESTAMP_DIFF(submission_timestamp, parsed_end_time, SECOND),
100
) AS collection_to_ingestion_latency
FROM
sample
GROUP BY
application,
channel,
submission_date
)
SELECT
application,
channel,
submission_date,
collection_to_submission_latency[OFFSET(95)] AS collection_to_submission_latency_p95,
collection_to_submission_latency[OFFSET(50)] AS collection_to_submission_latency_median,
submission_to_ingestion_latency[OFFSET(95)] AS submission_to_ingestion_latency_p95,
submission_to_ingestion_latency[OFFSET(50)] AS submission_to_ingestion_latency_median,
collection_to_ingestion_latency[OFFSET(95)] AS collection_to_ingestion_latency_p95,
collection_to_ingestion_latency[OFFSET(50)] AS collection_to_ingestion_latency_median
FROM
latency_quantiles
)
{% if not loop.last %}
UNION ALL
{% endif %}
{% endfor %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
fields:
- name: application
type: STRING
mode: NULLABLE
- name: channel
type: STRING
mode: NULLABLE
- name: submission_date
type: DATE
mode: NULLABLE
- name: collection_to_submission_latency_p95
type: INTEGER
mode: NULLABLE
- name: collection_to_submission_latency_median
type: INTEGER
mode: NULLABLE
- name: submission_to_ingestion_latency_p95
type: INTEGER
mode: NULLABLE
- name: submission_to_ingestion_latency_median
type: INTEGER
mode: NULLABLE
- name: collection_to_ingestion_latency_p95
type: INTEGER
mode: NULLABLE
- name: collection_to_ingestion_latency_median
type: INTEGER
mode: NULLABLE
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
friendly_name: Telemetry Health Ping Volume P80
description: |
Calculates the 80th percentile of ping volume per client per day.
Combines data from Firefox Desktop, Firefox Android, and Firefox iOS.
owners:
- [email protected]
labels:
incremental: true
schedule: daily
scheduling:
dag_name: bqetl_monitoring
bigquery:
time_partitioning:
type: day
field: submission_date
require_partition_filter: true
expiration_days: 775
Loading