Update status.io metrics upgrade script to use the dynamic infra metrics
- Use thanos instead of old prometheus
- Several webapps generate the metrics in thanos, so the query needs to be updated to avoid summing all the metrics
- Support the future 'pending' status
Related to swh/infra/sysadm-environment#5227 (closed)
otocatalog diff
*** Running octocatalog-diff on host pergamon.softwareheritage.org
I, [2024-01-29T09:48:20.639931 #673046] INFO -- : Catalogs compiled for pergamon.softwareheritage.org
W, [2024-01-29T09:48:21.559760 #673046] WARN -- : Resource File[/tmp/ocd-ipc-20240129-673046-ct93lx/ocd-builddir-20240129-673053-1ncikmc/routes.yaml] appears to depend on catalog compilation directory. Suppressed from results.
W, [2024-01-29T09:48:21.559820 #673046] WARN -- : Resource File[/tmp/ocd-ipc-20240129-673046-ct93lx/ocd-builddir-20240129-673052-16c54jm/routes.yaml] appears to depend on catalog compilation directory. Suppressed from results.
W, [2024-01-29T09:48:21.559966 #673046] WARN -- : Resource key Ini_setting[puppetdbserver_urls] parameters => path may depend on catalog compilation directory, but there may be differences. This is included in results for now, but please verify.
W, [2024-01-29T09:48:21.559981 #673046] WARN -- : Resource key Ini_setting[puppetdbserver_urls] parameters => path appears to depend on catalog compilation directory. Suppressed from results.
W, [2024-01-29T09:48:21.560011 #673046] WARN -- : Resource key Ini_setting[soft_write_failure] parameters => path may depend on catalog compilation directory, but there may be differences. This is included in results for now, but please verify.
W, [2024-01-29T09:48:21.560021 #673046] WARN -- : Resource key Ini_setting[soft_write_failure] parameters => path appears to depend on catalog compilation directory. Suppressed from results.
I, [2024-01-29T09:48:21.560137 #673046] INFO -- : Diffs computed for pergamon.softwareheritage.org
diff origin/production/pergamon.softwareheritage.org current/pergamon.softwareheritage.org
*******************************************
Concat_fragment[profile::cron::statusio_scn_metrics] =>
parameters =>
content =>
@@ -1,2 +1,2 @@
# Cron snippet statusio_scn_metrics
-*/5 * * * * root chronic sh -c '/usr/local/bin/statusio_export_archive_counters.py -m swh_web_accepted_save_requests --api-id status_io::api_id --api-key status_io::api_key --status-page-id status_io::status_page --metric-id status_io::metrics::scn -f environment=production -f "load_task_status=~scheduled|not_yet_scheduled" -f instance=moma.internal.softwareheritage.org'__
+*/5 * * * * root chronic sh -c '/usr/local/bin/statusio_export_archive_counters.py --api-id status_io::api_id --api-key status_io::api_key --status-page-id status_io::status_page --metric-id status_io::metrics::scn -q "sum (max by (load_task_status) (swh_web_accepted_save_requests{environment="production", load_task_status=~"pending|scheduled|not_yet_scheduled"}))" -s thanos.internal.admin.swh.network -p 19191'__
*******************************************
File[/etc/bind/keys/local-update] =>
parameters =>
content =>
@@ -2,4 +2,4 @@
key local-update {
algorithm hmac-sha256;
- secret "0nYmgHmmAjpiH96vPDV8/PujaHgyVGQ/3yN/4QZeDGJvO5Gh8xQTwr+IXwVelCqPqDnfvhF37LkEAPrwxutT7w==";
+ secret "jUU2ArBP0s0yfm06aGkvx2E7OLpfrOtgrsHJ3DMUzneCpbzwBRBNIfI05GHli2hcnjsnyrRJrhD2yChYdXm+eg==";
};
*******************************************
File[/etc/bind/rndc.key] =>
parameters =>
content =>
@@ -2,4 +2,4 @@
key rndc-key {
algorithm hmac-md5;
- secret "6WCLMWnCIcaIf0HbKESBEPX6IMBaDWHbwL4f0dzlRyuvS2oZXr7bEAyCclg8esqC+3ctOVN5tBDJRKfOgWdGWA==";
+ secret "q79E6IvRibFbRWpfSSuA0FlmkncQtP6QX9P6utydjUrskZZK7bPtSB+mo0YbVobMzqyGNlCwn6qVRZuaeGQzDg==";
};
*******************************************
File[/usr/local/bin/statusio_export_archive_counters.py] =>
parameters =>
content =>
@@ -3,5 +3,5 @@
_
# python3 update_metrics.py -m swh_web_accepted_save_requests --api-id 1234 --api-key 456 --status-page-id 123 \
-# --metric-id 456 -f environment="production" -f "load_task_status=~scheduled|not_yet_scheduled" -f instance=moma.internal.softwareheritage.org
+# --metric-id 456 -f environment="production" -f "load_task_status=~scheduled|not_yet_scheduled|pending" -f instance=thanos.internal.softwareheritage.org
import statusio
import requests
@@ -23,30 +23,17 @@
_
_
-def escape_filter(filter: str) -> str:
- if "=~" in filter:
- separator = "=~"
- else:
- separator = "="
-
- terms = filter.split(separator)
-
- return f'{terms[0]}{separator}"{terms[1]}"'
-
_
def get_prometheus_values(
prometheus_url: str,
- metric: str,
- filters: List[str],
+ query: str,
start: int,
end: int,
interval: int,
) -> List[List]:
- escaped_filters = [escape_filter(filter) for filter in filters]
-
- metric_filters = ",".join(escaped_filters)
_
- url = f"{prometheus_url}?query=sum({metric}{{{metric_filters}}})&start={start}&end={end}&step={interval}"
+ url = f"{prometheus_url}?query={query}&start={start}&end={end}&step={interval}"
_
response = requests.get(url)
+
if response.ok == False:
raise ValueError(f"Unable to get prometheus metrics: {response.text}")
@@ -83,14 +70,8 @@
)
@click.option(
- "--prometheus-metric",
- "-m",
+ "--prometheus-query",
+ "-q",
required=True,
- help="Prometheus metric to query",
-)
-@click.option(
- "--prometheus-filter",
- "-f",
- multiple=True,
- help="Prometheus metric to query",
+ help="Prometheus query to select the metrics",
)
@click.option(
@@ -117,6 +98,5 @@
prometheus_server: str,
prometheus_port: int,
- prometheus_metric: str,
- prometheus_filter: List[str],
+ prometheus_query: str,
api_id: str,
api_key: str,
@@ -140,6 +120,5 @@
raw_values = get_prometheus_values(
prometheus_url,
- prometheus_metric,
- prometheus_filter,
+ prometheus_query,
day_start.timestamp(),
current_time.timestamp(),
@@ -152,6 +131,5 @@
raw_values = get_prometheus_values(
prometheus_url,
- prometheus_metric,
- prometheus_filter,
+ prometheus_query,
week_start.timestamp(),
current_time.timestamp(),
@@ -164,6 +142,5 @@
raw_values = get_prometheus_values(
prometheus_url,
- prometheus_metric,
- prometheus_filter,
+ prometheus_query,
month_start.timestamp(),
current_time.timestamp(),
@@ -191,5 +168,5 @@
)
_
- # this line will be sent by email via cron_
+ # this line will be sent by email via cron
# if the return code is not 0
print(result)
*******************************************
Profile::Cron::D[statusio_scn_metrics] =>
parameters =>
command =>
- chronic sh -c '/usr/local/bin/statusio_export_archive_counters.py -m swh_web_accepted_save_requests --api-id status_io::api_id --api-key status_io::api_key --status-page-id status_io::status_page --metric-id status_io::metrics::scn -f environment=production -f "load_task_status=~scheduled|not_yet_scheduled" -f instance=moma.internal.softwareheritage.org'__
+ chronic sh -c '/usr/local/bin/statusio_export_archive_counters.py --api-id status_io::api_id --api-key status_io::api_key --status-page-id status_io::status_page --metric-id status_io::metrics::scn -q "sum (max by (load_task_status) (swh_web_accepted_save_requests{environment="production", load_task_status=~"pending|scheduled|not_yet_scheduled"}))" -s thanos.internal.admin.swh.network -p 19191'__
*******************************************
*** End octocatalog-diff on pergamon.softwareheritage.org