From a6d6265ed6ae16873f90ffc5ea837ef4efe104cd Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Wed, 29 Apr 2026 10:32:10 +0200 Subject: [PATCH 01/54] Add metrics migration section 1 dependencies --- TASKS.md | 395 ++++++++++++++++++++++++++++ quickwit/Cargo.lock | 162 +++++++++++- quickwit/Cargo.toml | 6 + quickwit/quickwit-cli/Cargo.toml | 2 + quickwit/quickwit-common/Cargo.toml | 5 + 5 files changed, 565 insertions(+), 5 deletions(-) create mode 100644 TASKS.md diff --git a/TASKS.md b/TASKS.md new file mode 100644 index 00000000000..037bc57f444 --- /dev/null +++ b/TASKS.md @@ -0,0 +1,395 @@ +# Metrics-RS Macro Migration Tasks + +This task list breaks `PLAN.md` into reviewable sections. Each section should be implemented and reviewed independently unless its prerequisites say otherwise. + +## Section 0: Ground Rules And Compatibility Targets + +Status: Done. + +Goal: make the migration safe to review by preserving externally visible behavior. + +Tasks: +- [x] Preserve metric names as `quickwit_{subsystem}_{name}`. +- [x] Preserve empty-subsystem names as `quickwit_{name}`, without a double underscore. +- [x] Preserve existing label names and label values. +- [x] Preserve `/metrics` as the Prometheus text endpoint. +- [x] Do not preserve the old `new_counter`, `new_gauge`, `new_histogram`, or `*Vec` APIs as compatibility shims unless a later section proves a temporary shim is needed for incremental compilation. +- [x] Treat Quickwit metric ingestion features and metrics index logic as unrelated to this migration. + +Review checklist: +- [x] No compatibility target is contradicted by later implementation sections. +- [x] Any intentional metric name, label, or bucket change is explicitly called out in the PR. + +## Section 1: Add Workspace Dependencies + +Status: Done. + +Goal: add the metrics-rs exporter and metricspp support dependencies without changing behavior. + +Tasks: +- [x] Add workspace dependencies in `quickwit/Cargo.toml`: + - [x] `metrics-util` + - [x] `metrics-exporter-prometheus` + - [x] `metrics-exporter-otel` + - [x] `inventory` + - [x] `const_format` + - [x] `atomic_float` +- [x] Add the needed dependencies to `quickwit/quickwit-common/Cargo.toml`. +- [x] Add exporter dependencies to the crate that installs recorders, expected to be `quickwit-cli`. +- [x] Keep `prometheus` temporarily until all direct usages are migrated. +- [x] Run a dependency update/build check to refresh `quickwit/Cargo.lock`. + +Validation: +- [x] `cargo check -p quickwit-common` +- [x] `cargo check -p quickwit-cli` + +Review checklist: +- [x] Dependency versions match the existing workspace style. +- [x] New dependencies are only added to crates that use them. + +## Section 2: Port Metricspp Core Into quickwit-common + +Goal: implement the new `quickwit_common::metrics` API while keeping the scope local to `quickwit-common`. + +Tasks: +- Move the current `quickwit/quickwit-common/src/metrics.rs` implementation into a module layout such as `quickwit/quickwit-common/src/metrics/`. +- Port metricspp core types: + - `Counter` + - `Gauge` + - `Histogram` + - `GaugeGuard` + - observable `CounterShadow` and `GaugeShadow` + - `MetricInfo` + - `HistogramConfig` +- Port macros: + - `counter!` + - `gauge!` + - `histogram!` + - hidden helper macros for key names, metadata, and label counts. +- Keep the public module path as `quickwit_common::metrics`. +- Implement `SYSTEM = "quickwit"`. +- Implement `describe_metrics()`. +- Implement `metrics_info()` and `histogram_buckets()` for inventory introspection. +- Ensure the macros work for: + - base metrics with no labels + - base metrics with static labels + - parent metrics with dynamic label values + - nested parent extension + - observable counters and gauges +- Add or keep bucket helper functions if call sites still rely on `linear_buckets` and `exponential_buckets`. + +Quickwit-specific adjustments: +- Extend `GaugeGuard` or add an equivalent guard so existing in-flight byte/count use cases can add and subtract variable deltas over the guard lifetime. +- Decide how to handle `OwnedGaugeGuard` use sites. Prefer adapting the new guard to cover the same behavior instead of keeping a Prometheus-specific type. +- Provide a histogram timer helper if needed by existing `start_timer()` call sites, or plan those call sites for manual `Instant` plus `record()`. + +Validation: +- `cargo test -p quickwit-common metrics` + +Review checklist: +- The port is inside `quickwit-common`; no new Cargo crate is introduced. +- The API does not expose Prometheus crate types. +- `observable: true` is opt-in and only affects counters/gauges. + +## Section 3: Port Metricspp Unit Tests + +Goal: validate the new metrics primitives before migrating call sites. + +Tasks: +- Port metricspp tests into `quickwit-common`. +- Cover: + - counter increment and absolute values + - gauge set/increment/decrement + - histogram record + - static labels + - parent labels + - dynamic parent labels + - nested parent extension + - observable counter and gauge `get()` + - non-observable sentinel values + - `GaugeGuard` + - histogram bucket inventory + - `describe_metrics()` +- Use `metrics_util::debugging::DebuggingRecorder` for value assertions where possible. + +Validation: +- `cargo test -p quickwit-common metrics` + +Review checklist: +- Tests assert key names include the `quickwit` prefix. +- Tests cover empty subsystem behavior. +- Tests do not depend on the Prometheus crate registry. + +## Section 4: Install Global Metrics Recorder And Prometheus Handle + +Goal: install one global metrics-rs recorder during CLI startup and keep `/metrics` working. + +Tasks: +- Replace Prometheus crate registration/gathering with a metrics-rs recorder setup. +- Install a Prometheus recorder in `quickwit-cli` startup. +- Store a `metrics_exporter_prometheus::PrometheusHandle` somewhere accessible to `quickwit_common::metrics::metrics_text_payload()` or directly to `quickwit-serve`'s metrics handler. +- Configure Prometheus histogram buckets from `quickwit_common::metrics::histogram_buckets()` before metrics are first used. +- Call `quickwit_common::metrics::describe_metrics()` after installing the recorder. +- Keep the existing `/metrics` route in `quickwit-serve`. +- Replace `quickwit_common::metrics::metrics_text_payload()` internals with Prometheus handle rendering. + +Ordering risk: +- Metrics declared through `LazyLock` register on first access. The global recorder must be installed before production metrics are first accessed. Check current startup order around runtime metrics, build info metrics, jemalloc metrics, and CLI setup. + +Validation: +- `cargo test -p quickwit-serve metrics_api` +- A manual or integration scrape returns Prometheus text. + +Review checklist: +- There is exactly one global recorder installation path in production startup. +- `/metrics` does not call `prometheus::gather()`. +- Histogram buckets are configured before metrics registration. + +## Section 5: Preserve DogStatsD And Invariant Metrics + +Goal: keep existing DogStatsD and invariant behavior while using the new recorder path. + +Tasks: +- Replace direct `metrics_exporter_dogstatsd::DogStatsDBuilder::install()` if it conflicts with the single global recorder. +- Fan out metrics to: + - Prometheus + - DogStatsD + - optional OTLP metrics + - the existing invariant recorder path +- Keep existing DogStatsD global labels and prefix behavior. +- Update invariant metrics from raw `metrics::counter!` calls only if needed to fit the fanout recorder. + +Validation: +- `cargo test -p quickwit-cli logger` +- Existing invariant tests, if any, still pass. + +Review checklist: +- DogStatsD is not silently disabled. +- Existing global labels are preserved. +- Recorder fanout does not double-record any metric. + +## Section 6: Add Optional OTLP Metrics Export + +Goal: add OTLP metrics export behind the existing telemetry flag. + +Tasks: +- Enable OTLP metrics only when `QW_ENABLE_OPENTELEMETRY_OTLP_EXPORTER=true`. +- Read `OTEL_EXPORTER_OTLP_METRICS_PROTOCOL`. +- Fall back to `OTEL_EXPORTER_OTLP_PROTOCOL`. +- Support: + - `grpc` + - `http/protobuf` + - `http/json` +- Reuse the existing protocol parsing style from tracing/log export where possible. +- Configure OTLP histogram buckets from `quickwit_common::metrics::histogram_buckets()`. +- Add in-memory exporter tests if the dependency stack supports it cleanly. + +Validation: +- `cargo test -p quickwit-cli logger` +- OTLP metrics test with in-memory or local test exporter. + +Review checklist: +- OTLP traces/logs behavior remains unchanged. +- Metrics protocol env vars do not affect trace/log protocol selection. +- Unsupported protocol errors are clear. + +## Section 7: Replace Build Info Metric + +Goal: replace `register_info("build_info", ...)` with the new metrics API. + +Tasks: +- Remove `register_info` usage from `quickwit-cli`. +- Declare a build info counter metric with build labels. +- Set the metric to `1` using the new counter API. +- Preserve the Prometheus output shape as closely as possible: + - name: `quickwit_build_info` + - labels: build date, commit hash, version, optional tags, target + - value: `1` +- Account for dynamic label values from `BuildInfo`. + +Validation: +- `/metrics` output contains `quickwit_build_info`. +- `cargo test -p quickwit-cli logger` + +Review checklist: +- Build labels match the previous labels. +- The metric is registered after the global recorder is installed. + +## Section 8: Migrate quickwit-common Internal Metrics + +Goal: convert `quickwit-common` call sites and helpers to the new API. + +Tasks: +- Convert `quickwit-common/src/metrics.rs` consumers to the new module layout. +- Convert `MEMORY_METRICS` and in-flight gauges. +- Convert `quickwit-common/src/tower/metrics.rs`. +- Convert `quickwit-common/src/tower/circuit_breaker.rs`. +- Convert `quickwit-common/src/thread_pool.rs`. +- Convert `quickwit-common/src/stream_utils.rs`. +- Remove direct Prometheus crate imports from `quickwit-common`. + +Special cases: +- Tests in tower metrics currently read `.get()` from counters. Mark those test metrics `observable: true` or use `DebuggingRecorder`. +- In-flight data guards rely on variable add/sub behavior. + +Validation: +- `cargo test -p quickwit-common metrics` +- `cargo test -p quickwit-common tower` +- `cargo check -p quickwit-common --all-features` + +Review checklist: +- `quickwit-common` no longer exposes Prometheus types. +- Guard behavior is preserved for byte accounting. + +## Section 9: Migrate Server And Search Metrics + +Goal: migrate the REST/gRPC/search-facing metric modules and their call sites. + +Tasks: +- Convert `quickwit-serve/src/metrics.rs`. +- Convert HTTP request metrics in `quickwit-serve/src/rest.rs`. +- Convert `quickwit-serve` circuit breaker metric call sites. +- Convert `quickwit-search/src/metrics.rs`. +- Convert `quickwit-search/src/metrics_trackers.rs`. +- Convert search permit and scroll context gauge guards. +- Convert histogram timers in search code. + +Special cases: +- `SplitSearchOutcomeCounters` has local unregistered counters. Decide whether to replace with observable local counters, `DebuggingRecorder` in tests, or a small non-exported local helper type. + +Validation: +- `cargo test -p quickwit-serve metrics_api` +- `cargo test -p quickwit-search metrics` +- `cargo check -p quickwit-serve --all-features` +- `cargo check -p quickwit-search --all-features` + +Review checklist: +- HTTP metric names and labels match previous output. +- Search display/debug code that reads counters remains correct. + +## Section 10: Migrate Indexing, Ingest, And Storage Metrics + +Goal: migrate the high-volume ingestion, indexing, and storage metric modules. + +Tasks: +- Convert `quickwit-indexing/src/metrics.rs`. +- Convert indexing actors that use counters, gauges, and gauge guards. +- Convert `quickwit-ingest/src/metrics.rs`. +- Convert `quickwit-ingest/src/ingest_v2/metrics.rs`. +- Convert ingest router, ingester, and replication call sites. +- Convert `quickwit-storage/src/metrics.rs`. +- Convert object storage request counters and histograms. +- Convert cache metrics. +- Convert histogram timers in object storage code. + +Validation: +- `cargo check -p quickwit-indexing --all-features` +- `cargo check -p quickwit-ingest --all-features` +- `cargo check -p quickwit-storage --all-features` +- Run focused unit tests in changed modules. + +Review checklist: +- High-cardinality dynamic labels use parent extension consistently. +- Per-index metric behavior through `index_label()` is preserved. +- In-flight byte gauges balance on drop as before. + +## Section 11: Migrate Remaining Service Crates + +Goal: remove Prometheus-specific metric types from the remaining Quickwit-owned crates. + +Tasks: +- Convert `quickwit-jaeger/src/metrics.rs` and Jaeger call sites. +- Convert `quickwit-cluster/src/metrics.rs` and cluster call sites. +- Convert `quickwit-actors` mailbox/backpressure metrics. +- Convert `quickwit-opentelemetry` OTLP ingest metrics. +- Convert `quickwit-lambda-client` metrics. +- Convert `quickwit-parquet-engine` metrics. +- Convert `quickwit-metastore` Postgres metrics. +- Convert `quickwit-control-plane` metrics. +- Convert `quickwit-janitor` metrics if present. + +Validation: +- `cargo check -p quickwit-jaeger --all-features` +- `cargo check -p quickwit-cluster --all-features` +- `cargo check -p quickwit-actors --all-features` +- `cargo check -p quickwit-opentelemetry --all-features` +- `cargo check -p quickwit-lambda-client --all-features` +- `cargo check -p quickwit-parquet-engine --all-features` +- `cargo check -p quickwit-metastore --all-features` +- `cargo check -p quickwit-control-plane --all-features` + +Review checklist: +- No remaining Quickwit-owned metric modules import Prometheus crate metric types. +- Counter/gauge/histogram operation names are migrated consistently. + +## Section 12: Remove Direct Prometheus Usage From Quickwit-Owned Code + +Goal: finish the backend migration and remove obsolete dependencies. + +Tasks: +- Search for direct Prometheus imports: + - `prometheus::` + - `IntCounter` + - `IntGauge` + - `HistogramVec` + - `IntCounterVec` + - `IntGaugeVec` + - `new_counter` + - `new_gauge` + - `new_histogram` + - `register_info` +- Remove old constructor functions and vector wrapper types. +- Remove `prometheus` from `quickwit-common` dependencies if no longer needed. +- Keep any third-party or generated Prometheus references only if they are not Quickwit-owned migration targets. +- Update docs and comments that describe Prometheus crate semantics. + +Validation: +- `rg "prometheus::|IntCounter|IntGauge|HistogramVec|IntCounterVec|IntGaugeVec|new_counter|new_gauge|new_histogram|register_info" quickwit -g '*.rs'` +- `cargo check --workspace --all-features` + +Review checklist: +- No compatibility shim remains by accident. +- Dependency cleanup does not remove unrelated Prometheus functionality. + +## Section 13: Add /metrics Integration Coverage + +Goal: prove the Prometheus rendering contract survived the migration. + +Tasks: +- Add integration tests for `/metrics` output. +- Assert: + - metric names + - labels + - descriptions/help text + - histogram bucket boundaries + - build info metric +- Include at least one counter, gauge, and histogram. +- Include at least one labeled child metric. + +Validation: +- `cargo test -p quickwit-serve metrics_api` +- Relevant integration test target if tests live outside `quickwit-serve`. + +Review checklist: +- Tests validate Prometheus text output, not internal implementation details. +- Tests avoid depending on global metric state in a flaky way. + +## Section 14: Final Workspace Verification + +Goal: verify the whole migration after all sections land. + +Tasks: +- Run focused tests: + - `cargo test -p quickwit-common metrics` + - `cargo test -p quickwit-serve metrics_api` + - `cargo test -p quickwit-cli logger` +- Run broader checks: + - `cargo clippy --workspace --tests --all-features` + - `make fmt` +- If runtime confidence is needed, start a local Quickwit node and scrape `/metrics`. +- Document any tests that are skipped due to environment requirements. + +Review checklist: +- Formatting uses the repository entrypoint `make fmt`. +- Clippy and tests are reported with exact failures if they cannot fully pass. +- The final PR description lists any expected metric-output differences. diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 0a6807c3884..298ca6cc003 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -704,6 +704,12 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "atomic_float" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628d228f918ac3b82fe590352cc719d30664a0c13ca3a60266fe02c7132d480a" + [[package]] name = "autocfg" version = "1.5.0" @@ -2324,6 +2330,27 @@ version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "413d67b29ef1021b4d60f4aa1e925ca031751e213832b4b1d588fae623c05c60" +[[package]] +name = "const_format" +version = "0.2.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4481a617ad9a412be3b97c5d403fef8ed023103368908b9c50af598ff467cc1e" +dependencies = [ + "const_format_proc_macros", + "konst", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "constant_time_eq" version = "0.4.2" @@ -2454,6 +2481,12 @@ dependencies = [ "itertools 0.13.0", ] +[[package]] +name = "critical-section" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" + [[package]] name = "cron" version = "0.16.0" @@ -4034,6 +4067,17 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "evmap" +version = "11.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8874945f036109c72242964c1174cf99434e30cfa45bf45fedc983f50046f8" +dependencies = [ + "hashbag", + "left-right", + "smallvec", +] + [[package]] name = "fail" version = "0.5.1" @@ -4414,6 +4458,21 @@ dependencies = [ "slab", ] +[[package]] +name = "generator" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f04ae4152da20c76fe800fa48659201d5cf627c5149ca0b707b69d7eef6cf9" +dependencies = [ + "cc", + "cfg-if", + "libc", + "log", + "rustversion", + "windows-link", + "windows-result", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -4694,6 +4753,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "hashbag" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7040a10f52cba493ddb09926e15d10a9d8a28043708a405931fe4c6f19fac064" + [[package]] name = "hashbrown" version = "0.12.3" @@ -5572,6 +5637,21 @@ dependencies = [ "cpufeatures 0.2.17", ] +[[package]] +name = "konst" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128133ed7824fcd73d6e7b17957c5eb7bacb885649bd8c69708b2331a10bcefb" +dependencies = [ + "konst_macro_rules", +] + +[[package]] +name = "konst_macro_rules" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4933f3f57a8e9d9da04db23fb153356ecaf00cbd14aee46279c33dc80925c37" + [[package]] name = "krb5-src" version = "0.3.4" @@ -5676,6 +5756,17 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" +[[package]] +name = "left-right" +version = "0.11.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f0c21e4c8ff95f487fb34e6f9182875f42c84cef966d29216bf115d9bba835a" +dependencies = [ + "crossbeam-utils", + "loom", + "slab", +] + [[package]] name = "levenshtein_automata" version = "0.2.1" @@ -5860,6 +5951,19 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "loom" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + [[package]] name = "lru" version = "0.16.4" @@ -5985,12 +6089,12 @@ dependencies = [ [[package]] name = "metrics" -version = "0.24.3" +version = "0.24.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d5312e9ba3771cfa961b585728215e3d972c950a3eed9252aa093d6301277e8" +checksum = "b7cd3e9eb685089c784f5769b1197d348c7274bc20d4e1349650f63b91b6d0af" dependencies = [ - "ahash", "portable-atomic", + "rapidhash", ] [[package]] @@ -6008,11 +6112,39 @@ dependencies = [ "tracing", ] +[[package]] +name = "metrics-exporter-otel" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58b8984fa38406b80c094943c0ba90e53d5fff0aea051ff9fac96cf6940993c8" +dependencies = [ + "metrics", + "metrics-util", + "opentelemetry", + "portable-atomic", + "scc", +] + +[[package]] +name = "metrics-exporter-prometheus" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c0ca2990f7f78a72c4000ddce186db7d1b700477426563ee851c95ea3c0d0c4" +dependencies = [ + "base64 0.22.1", + "evmap", + "indexmap 2.14.0", + "metrics", + "metrics-util", + "quanta", + "thiserror 2.0.18", +] + [[package]] name = "metrics-util" -version = "0.20.1" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdfb1365fea27e6dd9dc1dbc19f570198bc86914533ad639dae939635f096be4" +checksum = "55ff5c12b797ebf094dc7c1d87e905efc0329cba332f96d51db03875441012b5" dependencies = [ "aho-corasick", "crossbeam-epoch", @@ -6025,6 +6157,7 @@ dependencies = [ "radix_trie", "rand 0.9.4", "rand_xoshiro", + "rapidhash", "sketches-ddsketch 0.3.1", ] @@ -7609,6 +7742,9 @@ name = "portable-atomic" version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" +dependencies = [ + "critical-section", +] [[package]] name = "portable-atomic-util" @@ -8210,6 +8346,8 @@ dependencies = [ "itertools 0.14.0", "metrics", "metrics-exporter-dogstatsd", + "metrics-exporter-otel", + "metrics-exporter-prometheus", "numfmt", "openssl-probe 0.1.6", "opentelemetry", @@ -8322,9 +8460,11 @@ dependencies = [ "anyhow", "async-speed-limit", "async-trait", + "atomic_float", "backtrace", "bytesize", "coarsetime", + "const_format", "dyn-clone", "env_logger", "fnv", @@ -8335,7 +8475,10 @@ dependencies = [ "http 1.4.0", "hyper 1.9.0", "hyper-util", + "inventory", "itertools 0.14.0", + "metrics", + "metrics-util", "pin-project", "pnet", "prometheus", @@ -9445,6 +9588,15 @@ dependencies = [ "rand_core 0.9.5", ] +[[package]] +name = "rapidhash" +version = "4.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e48930979c155e2f33aa36ab3119b5ee81332beb6482199a8ecd6029b80b59" +dependencies = [ + "rustversion", +] + [[package]] name = "raw-cpuid" version = "11.6.0" diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 04244aeee40..59d3ce85d44 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -97,6 +97,7 @@ assert-json-diff = "2" async-compression = { version = "0.4", features = ["tokio", "gzip"] } async-speed-limit = "0.4" async-trait = "0.1" +atomic_float = "1.1" backtrace = "0.3" base64 = "0.22" binggan = { version = "0.15" } @@ -113,6 +114,7 @@ clap = { version = "4.5", features = ["env", "string"] } coarsetime = "0.1" colored = "3.0" console-subscriber = "0.5" +const_format = "0.2" criterion = { version = "0.8", features = ["async_tokio"] } cron = "0.16" dialoguer = { version = "0.12", default-features = false } @@ -154,6 +156,7 @@ hyper-util = { version = "0.1", default-features = false, features = [ ] } indexmap = { version = "2.12", features = ["serde"] } indicatif = "0.18" +inventory = "0.3" itertools = "0.14" lambda_runtime = "0.13" json_comments = "0.2" @@ -163,6 +166,9 @@ matches = "0.1" md5 = "0.8" metrics = "0.24" metrics-exporter-dogstatsd = "0.9" +metrics-exporter-otel = "0.3" +metrics-exporter-prometheus = { version = "0.18", default-features = false } +metrics-util = "0.20" mime_guess = "2.0" mini-moka = "0.10.3" mockall = "0.14" diff --git a/quickwit/quickwit-cli/Cargo.toml b/quickwit/quickwit-cli/Cargo.toml index ebf3ae90e8c..17a8db55bf9 100644 --- a/quickwit/quickwit-cli/Cargo.toml +++ b/quickwit/quickwit-cli/Cargo.toml @@ -57,6 +57,8 @@ tracing-subscriber = { workspace = true } metrics = { workspace = true } metrics-exporter-dogstatsd = { workspace = true } +metrics-exporter-otel = { workspace = true } +metrics-exporter-prometheus = { workspace = true } quickwit-actors = { workspace = true } quickwit-cluster = { workspace = true } diff --git a/quickwit/quickwit-common/Cargo.toml b/quickwit/quickwit-common/Cargo.toml index 14c05e19c5e..9ed6850b695 100644 --- a/quickwit/quickwit-common/Cargo.toml +++ b/quickwit/quickwit-common/Cargo.toml @@ -14,9 +14,11 @@ license.workspace = true anyhow = { workspace = true } async-speed-limit = { workspace = true } async-trait = { workspace = true } +atomic_float = { workspace = true } backtrace = { workspace = true, optional = true } bytesize = { workspace = true } coarsetime = { workspace = true } +const_format = { workspace = true } dyn-clone = { workspace = true } env_logger = { workspace = true } fnv = { workspace = true } @@ -28,6 +30,9 @@ http = { workspace = true } hyper = { workspace = true } hyper-util = { workspace = true, optional = true } itertools = { workspace = true } +inventory = { workspace = true } +metrics = { workspace = true } +metrics-util = { workspace = true } pin-project = { workspace = true } pnet = { workspace = true } prometheus = { workspace = true } From c93fa2eb1c5dbff1047a7d8b41320e268fb86579 Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Wed, 29 Apr 2026 13:28:44 +0200 Subject: [PATCH 02/54] Port quickwit-common metrics core --- quickwit/quickwit-common/src/io.rs | 56 ++- quickwit/quickwit-common/src/metrics.rs | 453 ------------------ .../quickwit-common/src/metrics/counter.rs | 267 +++++++++++ quickwit/quickwit-common/src/metrics/gauge.rs | 344 +++++++++++++ .../quickwit-common/src/metrics/histogram.rs | 151 ++++++ quickwit/quickwit-common/src/metrics/mod.rs | 181 +++++++ .../quickwit-common/src/metrics/quickwit.rs | 188 ++++++++ quickwit/quickwit-common/src/metrics/tests.rs | 317 ++++++++++++ quickwit/quickwit-common/src/runtimes.rs | 86 ++-- quickwit/quickwit-common/src/stream_utils.rs | 50 +- quickwit/quickwit-common/src/thread_pool.rs | 60 +-- .../src/tower/circuit_breaker.rs | 20 +- quickwit/quickwit-common/src/tower/metrics.rs | 213 ++++---- 13 files changed, 1709 insertions(+), 677 deletions(-) delete mode 100644 quickwit/quickwit-common/src/metrics.rs create mode 100644 quickwit/quickwit-common/src/metrics/counter.rs create mode 100644 quickwit/quickwit-common/src/metrics/gauge.rs create mode 100644 quickwit/quickwit-common/src/metrics/histogram.rs create mode 100644 quickwit/quickwit-common/src/metrics/mod.rs create mode 100644 quickwit/quickwit-common/src/metrics/quickwit.rs create mode 100644 quickwit/quickwit-common/src/metrics/tests.rs diff --git a/quickwit/quickwit-common/src/io.rs b/quickwit/quickwit-common/src/io.rs index e1d9ad796f1..466c16d5fd1 100644 --- a/quickwit/quickwit-common/src/io.rs +++ b/quickwit/quickwit-common/src/io.rs @@ -34,10 +34,9 @@ use async_speed_limit::clock::StandardClock; use async_speed_limit::limiter::Consume; use bytesize::ByteSize; use pin_project::pin_project; -use prometheus::IntCounter; use tokio::io::AsyncWrite; -use crate::metrics::{IntCounterVec, new_counter_vec}; +use crate::metrics::{Counter, counter}; use crate::{KillSwitch, Progress, ProtectedZoneGuard}; // Max 1MB at a time. @@ -48,25 +47,14 @@ fn truncate_bytes(bytes: &[u8]) -> &[u8] { &bytes[..num_bytes] } -struct IoMetrics { - write_bytes: IntCounterVec<1>, -} - -impl Default for IoMetrics { - fn default() -> Self { - let write_bytes = new_counter_vec( - "write_bytes", - "Number of bytes written by a given component in [indexer, merger, deleter, \ - split_downloader_{merge,delete}]", - "", - &[], - ["component"], - ); - Self { write_bytes } - } -} - -static IO_METRICS: LazyLock = LazyLock::new(IoMetrics::default); +static WRITE_BYTES: LazyLock = LazyLock::new(|| { + counter!( + name: "write_bytes", + description: "Number of bytes written by a given component in [indexer, merger, deleter, split_downloader_{merge,delete}]", + subsystem: "", + observable: true, + ) +}); /// Parameter used in `async_speed_limit`. /// @@ -91,20 +79,18 @@ pub fn limiter(throughput: ByteSize) -> Limiter { #[derive(Clone)] pub struct IoControls { throughput_limiter_opt: Option, - bytes_counter: IntCounter, + bytes_counter: Counter, progress: Progress, kill_switch: KillSwitch, } impl Default for IoControls { fn default() -> Self { - let default_bytes_counter = - IntCounter::new("default_write_num_bytes", "Default write counter.").unwrap(); IoControls { throughput_limiter_opt: None, progress: Progress::default(), kill_switch: KillSwitch::default(), - bytes_counter: default_bytes_counter, + bytes_counter: DEFAULT_WRITE_BYTES.clone(), } } } @@ -132,7 +118,10 @@ impl IoControls { } pub fn set_component(mut self, component: &str) -> Self { - self.bytes_counter = IO_METRICS.write_bytes.with_label_values([component]); + self.bytes_counter = counter!( + parent: &*WRITE_BYTES, + "component" => component.to_string(), + ); self } @@ -148,7 +137,7 @@ impl IoControls { self } - pub fn set_bytes_counter(mut self, bytes_counter: IntCounter) -> Self { + pub fn set_bytes_counter(mut self, bytes_counter: Counter) -> Self { self.bytes_counter = bytes_counter; self } @@ -167,11 +156,20 @@ impl IoControls { if let Some(throughput_limiter) = &self.throughput_limiter_opt { throughput_limiter.blocking_consume(num_bytes); } - self.bytes_counter.inc_by(num_bytes as u64); + self.bytes_counter.increment(num_bytes as u64); Ok(()) } } +static DEFAULT_WRITE_BYTES: LazyLock = LazyLock::new(|| { + counter!( + name: "default_write_num_bytes", + description: "Default write counter.", + subsystem: "", + observable: true, + ) +}); + #[pin_project] pub struct ControlledWrite { #[pin] @@ -220,7 +218,7 @@ impl ControlledWrite { let len = *obj.as_ref().unwrap_or(&0); if len > 0 { let waiter = this.io_controls_access.apply(|io_controls| { - io_controls.bytes_counter.inc_by(len as u64); + io_controls.bytes_counter.increment(len as u64); io_controls .throughput_limiter_opt .as_ref() diff --git a/quickwit/quickwit-common/src/metrics.rs b/quickwit/quickwit-common/src/metrics.rs deleted file mode 100644 index 193def5e01a..00000000000 --- a/quickwit/quickwit-common/src/metrics.rs +++ /dev/null @@ -1,453 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::{BTreeMap, HashMap}; -use std::sync::{LazyLock, OnceLock}; - -use prometheus::{Gauge, HistogramOpts, Opts, TextEncoder}; -pub use prometheus::{ - Histogram, HistogramTimer, HistogramVec as PrometheusHistogramVec, IntCounter, - IntCounterVec as PrometheusIntCounterVec, IntGauge, IntGaugeVec as PrometheusIntGaugeVec, - exponential_buckets, linear_buckets, -}; - -#[derive(Clone)] -pub struct HistogramVec { - underlying: PrometheusHistogramVec, -} - -impl HistogramVec { - pub fn with_label_values(&self, label_values: [&str; N]) -> Histogram { - self.underlying.with_label_values(&label_values) - } -} - -#[derive(Clone)] -pub struct IntCounterVec { - underlying: PrometheusIntCounterVec, -} - -impl IntCounterVec { - pub fn new( - name: &str, - help: &str, - subsystem: &str, - const_labels: &[(&str, &str)], - label_names: [&str; N], - ) -> IntCounterVec { - let owned_const_labels: HashMap = const_labels - .iter() - .map(|(label_name, label_value)| (label_name.to_string(), label_value.to_string())) - .collect(); - let counter_opts = Opts::new(name, help) - .namespace("quickwit") - .subsystem(subsystem) - .const_labels(owned_const_labels); - let underlying = PrometheusIntCounterVec::new(counter_opts, &label_names) - .expect("failed to create counter vec"); - IntCounterVec { underlying } - } - - pub fn with_label_values(&self, label_values: [&str; N]) -> IntCounter { - self.underlying.with_label_values(&label_values) - } -} - -#[derive(Clone)] -pub struct IntGaugeVec { - underlying: PrometheusIntGaugeVec, -} - -impl IntGaugeVec { - pub fn with_label_values(&self, label_values: [&str; N]) -> IntGauge { - self.underlying.with_label_values(&label_values) - } -} - -pub fn register_info(name: &'static str, help: &'static str, kvs: BTreeMap<&'static str, String>) { - let mut counter_opts = Opts::new(name, help).namespace("quickwit"); - for (k, v) in kvs { - counter_opts = counter_opts.const_label(k, v); - } - let counter = IntCounter::with_opts(counter_opts).expect("failed to create counter"); - counter.inc(); - prometheus::register(Box::new(counter)).expect("failed to register counter"); -} - -pub fn new_counter( - name: &str, - help: &str, - subsystem: &str, - const_labels: &[(&str, &str)], -) -> IntCounter { - let owned_const_labels: HashMap = const_labels - .iter() - .map(|(label_name, label_value)| (label_name.to_string(), label_value.to_string())) - .collect(); - let counter_opts = Opts::new(name, help) - .namespace("quickwit") - .subsystem(subsystem) - .const_labels(owned_const_labels); - let counter = IntCounter::with_opts(counter_opts).expect("failed to create counter"); - prometheus::register(Box::new(counter.clone())).expect("failed to register counter"); - counter -} - -pub fn new_counter_vec( - name: &str, - help: &str, - subsystem: &str, - const_labels: &[(&str, &str)], - label_names: [&str; N], -) -> IntCounterVec { - let int_counter_vec = IntCounterVec::new(name, help, subsystem, const_labels, label_names); - let collector = Box::new(int_counter_vec.underlying.clone()); - prometheus::register(collector).expect("failed to register counter vec"); - int_counter_vec -} - -pub fn new_float_gauge( - name: &str, - help: &str, - subsystem: &str, - const_labels: &[(&str, &str)], -) -> Gauge { - let owned_const_labels: HashMap = const_labels - .iter() - .map(|(label_name, label_value)| (label_name.to_string(), label_value.to_string())) - .collect(); - let gauge_opts = Opts::new(name, help) - .namespace("quickwit") - .subsystem(subsystem) - .const_labels(owned_const_labels); - let gauge = Gauge::with_opts(gauge_opts).expect("failed to create float gauge"); - prometheus::register(Box::new(gauge.clone())).expect("failed to register float gauge"); - gauge -} - -pub fn new_gauge( - name: &str, - help: &str, - subsystem: &str, - const_labels: &[(&str, &str)], -) -> IntGauge { - let owned_const_labels: HashMap = const_labels - .iter() - .map(|(label_name, label_value)| (label_name.to_string(), label_value.to_string())) - .collect(); - let gauge_opts = Opts::new(name, help) - .namespace("quickwit") - .subsystem(subsystem) - .const_labels(owned_const_labels); - let gauge = IntGauge::with_opts(gauge_opts).expect("failed to create gauge"); - prometheus::register(Box::new(gauge.clone())).expect("failed to register gauge"); - gauge -} - -pub fn new_gauge_vec( - name: &str, - help: &str, - subsystem: &str, - const_labels: &[(&str, &str)], - label_names: [&str; N], -) -> IntGaugeVec { - let owned_const_labels: HashMap = const_labels - .iter() - .map(|(label_name, label_value)| (label_name.to_string(), label_value.to_string())) - .collect(); - let gauge_opts = Opts::new(name, help) - .namespace("quickwit") - .subsystem(subsystem) - .const_labels(owned_const_labels); - let underlying = - PrometheusIntGaugeVec::new(gauge_opts, &label_names).expect("failed to create gauge vec"); - - let collector = Box::new(underlying.clone()); - prometheus::register(collector).expect("failed to register counter vec"); - - IntGaugeVec { underlying } -} - -pub fn new_histogram(name: &str, help: &str, subsystem: &str, buckets: Vec) -> Histogram { - let histogram_opts = HistogramOpts::new(name, help) - .namespace("quickwit") - .subsystem(subsystem) - .buckets(buckets); - let histogram = Histogram::with_opts(histogram_opts).expect("failed to create histogram"); - prometheus::register(Box::new(histogram.clone())).expect("failed to register histogram"); - histogram -} - -pub fn new_histogram_vec( - name: &str, - help: &str, - subsystem: &str, - const_labels: &[(&str, &str)], - label_names: [&str; N], - buckets: Vec, -) -> HistogramVec { - let owned_const_labels: HashMap = const_labels - .iter() - .map(|(label_name, label_value)| (label_name.to_string(), label_value.to_string())) - .collect(); - let histogram_opts = HistogramOpts::new(name, help) - .namespace("quickwit") - .subsystem(subsystem) - .const_labels(owned_const_labels) - .buckets(buckets); - let underlying = PrometheusHistogramVec::new(histogram_opts, &label_names) - .expect("failed to create histogram vec"); - - let collector = Box::new(underlying.clone()); - prometheus::register(collector).expect("failed to register histogram vec"); - - HistogramVec { underlying } -} - -pub struct GaugeGuard<'a> { - gauge: &'a IntGauge, - delta: i64, -} - -impl std::fmt::Debug for GaugeGuard<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - self.delta.fmt(f) - } -} - -impl<'a> GaugeGuard<'a> { - pub fn from_gauge(gauge: &'a IntGauge) -> Self { - Self { gauge, delta: 0i64 } - } - - pub fn get(&self) -> i64 { - self.delta - } - - pub fn add(&mut self, delta: i64) { - self.gauge.add(delta); - self.delta += delta; - } - - pub fn sub(&mut self, delta: i64) { - self.gauge.sub(delta); - self.delta -= delta; - } -} - -impl Drop for GaugeGuard<'_> { - fn drop(&mut self) { - self.gauge.sub(self.delta) - } -} - -pub struct OwnedGaugeGuard { - gauge: IntGauge, - delta: i64, -} - -impl std::fmt::Debug for OwnedGaugeGuard { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - self.delta.fmt(f) - } -} - -impl OwnedGaugeGuard { - pub fn from_gauge(gauge: IntGauge) -> Self { - Self { gauge, delta: 0i64 } - } - - pub fn get(&self) -> i64 { - self.delta - } - - pub fn add(&mut self, delta: i64) { - self.gauge.add(delta); - self.delta += delta; - } - - pub fn sub(&mut self, delta: i64) { - self.gauge.sub(delta); - self.delta -= delta; - } -} - -impl Drop for OwnedGaugeGuard { - fn drop(&mut self) { - self.gauge.sub(self.delta) - } -} - -pub fn metrics_text_payload() -> Result { - let metric_families = prometheus::gather(); - // Arbitrary non-zero size in order to skip a bunch of - // buffer growth-reallocations when encoding metrics. - let mut buffer = String::with_capacity(1024); - let encoder = TextEncoder::new(); - match encoder.encode_utf8(&metric_families, &mut buffer) { - Ok(()) => Ok(buffer), - Err(e) => Err(e.to_string()), - } -} - -#[derive(Clone)] -pub struct MemoryMetrics { - pub active_bytes: IntGauge, - pub allocated_bytes: IntGauge, - pub resident_bytes: IntGauge, - pub in_flight: InFlightDataGauges, -} - -impl Default for MemoryMetrics { - fn default() -> Self { - Self { - active_bytes: new_gauge( - "active_bytes", - "Total number of bytes in active pages allocated by the application, as reported \ - by jemalloc `stats.active`.", - "memory", - &[], - ), - allocated_bytes: new_gauge( - "allocated_bytes", - "Total number of bytes allocated by the application, as reported by jemalloc \ - `stats.allocated`.", - "memory", - &[], - ), - resident_bytes: new_gauge( - "resident_bytes", - " Total number of bytes in physically resident data pages mapped by the \ - allocator, as reported by jemalloc `stats.resident`.", - "memory", - &[], - ), - in_flight: InFlightDataGauges::default(), - } - } -} - -#[derive(Clone)] -pub struct InFlightDataGauges { - pub rest_server: IntGauge, - pub ingest_router: IntGauge, - pub ingester_persist: IntGauge, - pub ingester_replicate: IntGauge, - pub wal: IntGauge, - pub fetch_stream: IntGauge, - pub multi_fetch_stream: IntGauge, - pub doc_processor_mailbox: IntGauge, - pub indexer_mailbox: IntGauge, - pub index_writer: IntGauge, - in_flight_gauge_vec: IntGaugeVec<1>, -} - -impl Default for InFlightDataGauges { - fn default() -> Self { - let in_flight_gauge_vec = new_gauge_vec( - "in_flight_data_bytes", - "Amount of data in-flight in various buffers in bytes.", - "memory", - &[], - ["component"], - ); - Self { - rest_server: in_flight_gauge_vec.with_label_values(["rest_server"]), - ingest_router: in_flight_gauge_vec.with_label_values(["ingest_router"]), - ingester_persist: in_flight_gauge_vec.with_label_values(["ingester_persist"]), - ingester_replicate: in_flight_gauge_vec.with_label_values(["ingester_replicate"]), - wal: in_flight_gauge_vec.with_label_values(["wal"]), - fetch_stream: in_flight_gauge_vec.with_label_values(["fetch_stream"]), - multi_fetch_stream: in_flight_gauge_vec.with_label_values(["multi_fetch_stream"]), - doc_processor_mailbox: in_flight_gauge_vec.with_label_values(["doc_processor_mailbox"]), - indexer_mailbox: in_flight_gauge_vec.with_label_values(["indexer_mailbox"]), - index_writer: in_flight_gauge_vec.with_label_values(["index_writer"]), - in_flight_gauge_vec: in_flight_gauge_vec.clone(), - } - } -} - -impl InFlightDataGauges { - #[inline] - pub fn file(&self) -> &IntGauge { - static GAUGE: OnceLock = OnceLock::new(); - GAUGE.get_or_init(|| self.in_flight_gauge_vec.with_label_values(["file_source"])) - } - - #[inline] - pub fn ingest(&self) -> &IntGauge { - static GAUGE: OnceLock = OnceLock::new(); - GAUGE.get_or_init(|| { - self.in_flight_gauge_vec - .with_label_values(["ingest_source"]) - }) - } - - #[inline] - pub fn kafka(&self) -> &IntGauge { - static GAUGE: OnceLock = OnceLock::new(); - GAUGE.get_or_init(|| self.in_flight_gauge_vec.with_label_values(["kafka_source"])) - } - - #[inline] - pub fn kinesis(&self) -> &IntGauge { - static GAUGE: OnceLock = OnceLock::new(); - GAUGE.get_or_init(|| { - self.in_flight_gauge_vec - .with_label_values(["kinesis_source"]) - }) - } - - #[inline] - pub fn pubsub(&self) -> &IntGauge { - static GAUGE: OnceLock = OnceLock::new(); - GAUGE.get_or_init(|| { - self.in_flight_gauge_vec - .with_label_values(["pubsub_source"]) - }) - } - - #[inline] - pub fn pulsar(&self) -> &IntGauge { - static GAUGE: OnceLock = OnceLock::new(); - GAUGE.get_or_init(|| { - self.in_flight_gauge_vec - .with_label_values(["pulsar_source"]) - }) - } - - #[inline] - pub fn other(&self) -> &IntGauge { - static GAUGE: OnceLock = OnceLock::new(); - GAUGE.get_or_init(|| { - self.in_flight_gauge_vec - .with_label_values(["pulsar_source"]) - }) - } -} - -/// This function returns `index_id` as is if per-index metrics are enabled, or projects it to -/// `"__any__"` otherwise. -pub fn index_label(index_id: &str) -> &str { - static PER_INDEX_METRICS_ENABLED: LazyLock = - LazyLock::new(|| !crate::get_bool_from_env("QW_DISABLE_PER_INDEX_METRICS", false)); - - if *PER_INDEX_METRICS_ENABLED { - index_id - } else { - "__any__" - } -} - -pub static MEMORY_METRICS: LazyLock = LazyLock::new(MemoryMetrics::default); diff --git a/quickwit/quickwit-common/src/metrics/counter.rs b/quickwit/quickwit-common/src/metrics/counter.rs new file mode 100644 index 00000000000..3c93f91e431 --- /dev/null +++ b/quickwit/quickwit-common/src/metrics/counter.rs @@ -0,0 +1,267 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; + +use metrics::CounterFn; + +use super::MetricInfo; + +#[doc(hidden)] +pub enum CounterShadow { + Noop, + Ref(&'static AtomicU64), + Arc(Arc), +} + +impl Clone for CounterShadow { + fn clone(&self) -> Self { + match self { + Self::Noop => Self::Noop, + Self::Ref(value) => Self::Ref(value), + Self::Arc(value) => Self::Arc(Arc::clone(value)), + } + } +} + +impl CounterShadow { + fn increment(&self, value: u64) { + match self { + Self::Noop => {} + Self::Ref(atomic) => { + atomic.fetch_add(value, Ordering::Relaxed); + } + Self::Arc(atomic) => { + atomic.fetch_add(value, Ordering::Relaxed); + } + } + } + + fn absolute(&self, value: u64) { + match self { + Self::Noop => {} + Self::Ref(atomic) => atomic.store(value, Ordering::Relaxed), + Self::Arc(atomic) => atomic.store(value, Ordering::Relaxed), + } + } + + fn get(&self) -> u64 { + match self { + Self::Noop => u64::MAX, + Self::Ref(atomic) => atomic.load(Ordering::Relaxed), + Self::Arc(atomic) => atomic.load(Ordering::Relaxed), + } + } +} + +#[derive(Clone)] +pub struct Counter { + pub(crate) info: &'static MetricInfo, + pub(crate) key: metrics::Key, + pub(crate) inner: metrics::Counter, + pub(crate) shadow: CounterShadow, +} + +impl std::fmt::Debug for Counter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Counter").field("key", &self.key).finish() + } +} + +impl Counter { + #[doc(hidden)] + pub fn __new( + info: &'static MetricInfo, + key: metrics::Key, + inner: metrics::Counter, + shadow: CounterShadow, + ) -> Self { + Self { + info, + key, + inner, + shadow, + } + } + + #[doc(hidden)] + pub const fn __info(&self) -> &'static MetricInfo { + self.info + } + + pub const fn key(&self) -> &metrics::Key { + &self.key + } + + pub fn increment(&self, value: u64) { + self.shadow.increment(value); + self.inner.increment(value); + } + + pub fn absolute(&self, value: u64) { + self.shadow.absolute(value); + self.inner.absolute(value); + } + + pub fn get(&self) -> u64 { + self.shadow.get() + } +} + +impl CounterFn for Counter { + fn increment(&self, value: u64) { + Self::increment(self, value); + } + + fn absolute(&self, value: u64) { + Self::absolute(self, value); + } +} + +#[macro_export] +macro_rules! counter { + ( + name: $name:literal, + description: $description:literal, + subsystem: "" + $(, $label:literal => $value:literal)* $(,)? + ) => {{ + $crate::key_info_metadata!( + kind: $crate::metrics::MetricKind::Counter, + observable: false, + name: $name, + description: $description, + subsystem: "" + $(, $label => $value)* + ); + let inner = $crate::metrics::__metrics::with_recorder(|recorder| { + recorder.register_counter(&KEY, &METADATA) + }); + $crate::metrics::Counter::__new( + &INFO, + KEY.clone(), + inner, + $crate::metrics::CounterShadow::Noop, + ) + }}; + + ( + name: $name:literal, + description: $description:literal, + subsystem: "", + observable: true + $(, $label:literal => $value:literal)* $(,)? + ) => {{ + $crate::key_info_metadata!( + kind: $crate::metrics::MetricKind::Counter, + observable: true, + name: $name, + description: $description, + subsystem: "" + $(, $label => $value)* + ); + static SHADOW: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0); + let inner = $crate::metrics::__metrics::with_recorder(|recorder| { + recorder.register_counter(&KEY, &METADATA) + }); + $crate::metrics::Counter::__new( + &INFO, + KEY.clone(), + inner, + $crate::metrics::CounterShadow::Ref(&SHADOW), + ) + }}; + + ( + name: $name:literal, + description: $description:literal, + subsystem: $subsystem:literal + $(, $label:literal => $value:literal)* $(,)? + ) => {{ + $crate::key_info_metadata!( + kind: $crate::metrics::MetricKind::Counter, + observable: false, + name: $name, + description: $description, + subsystem: $subsystem + $(, $label => $value)* + ); + let inner = $crate::metrics::__metrics::with_recorder(|recorder| { + recorder.register_counter(&KEY, &METADATA) + }); + $crate::metrics::Counter::__new( + &INFO, + KEY.clone(), + inner, + $crate::metrics::CounterShadow::Noop, + ) + }}; + + ( + name: $name:literal, + description: $description:literal, + subsystem: $subsystem:literal, + observable: true + $(, $label:literal => $value:literal)* $(,)? + ) => {{ + $crate::key_info_metadata!( + kind: $crate::metrics::MetricKind::Counter, + observable: true, + name: $name, + description: $description, + subsystem: $subsystem + $(, $label => $value)* + ); + static SHADOW: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0); + let inner = $crate::metrics::__metrics::with_recorder(|recorder| { + recorder.register_counter(&KEY, &METADATA) + }); + $crate::metrics::Counter::__new( + &INFO, + KEY.clone(), + inner, + $crate::metrics::CounterShadow::Ref(&SHADOW), + ) + }}; + + ( + parent: $parent:expr, + $($label:literal => $value:expr),+ $(,)? + ) => {{ + let parent_key = $parent.key(); + let mut labels = + Vec::with_capacity(parent_key.labels().len() + $crate::count!($($label)*)); + labels.extend(parent_key.labels().cloned()); + $(labels.push($crate::metrics::__metrics::Label::new($label, $value));)+ + + let info = $parent.__info(); + let key = $crate::metrics::__metrics::Key::from_parts(info.key_name, labels); + let metadata = $crate::metadata!(info.subsystem); + + let inner = $crate::metrics::__metrics::with_recorder(|recorder| { + recorder.register_counter(&key, &metadata) + }); + + let shadow = if info.observable { + $crate::metrics::CounterShadow::Arc(std::sync::Arc::new( + std::sync::atomic::AtomicU64::new(0), + )) + } else { + $crate::metrics::CounterShadow::Noop + }; + + $crate::metrics::Counter::__new(info, key, inner, shadow) + }}; +} diff --git a/quickwit/quickwit-common/src/metrics/gauge.rs b/quickwit/quickwit-common/src/metrics/gauge.rs new file mode 100644 index 00000000000..635413eff6d --- /dev/null +++ b/quickwit/quickwit-common/src/metrics/gauge.rs @@ -0,0 +1,344 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; +use std::sync::atomic::Ordering; + +use atomic_float::AtomicF64; +use metrics::GaugeFn; + +use super::MetricInfo; + +#[doc(hidden)] +pub enum GaugeShadow { + Noop, + Ref(&'static AtomicF64), + Arc(Arc), +} + +impl Clone for GaugeShadow { + fn clone(&self) -> Self { + match self { + Self::Noop => Self::Noop, + Self::Ref(value) => Self::Ref(value), + Self::Arc(value) => Self::Arc(Arc::clone(value)), + } + } +} + +impl GaugeShadow { + fn increment(&self, value: f64) { + match self { + Self::Noop => {} + Self::Ref(atomic) => { + atomic.fetch_add(value, Ordering::Relaxed); + } + Self::Arc(atomic) => { + atomic.fetch_add(value, Ordering::Relaxed); + } + } + } + + fn decrement(&self, value: f64) { + match self { + Self::Noop => {} + Self::Ref(atomic) => { + atomic.fetch_sub(value, Ordering::Relaxed); + } + Self::Arc(atomic) => { + atomic.fetch_sub(value, Ordering::Relaxed); + } + } + } + + fn set(&self, value: f64) { + match self { + Self::Noop => {} + Self::Ref(atomic) => atomic.store(value, Ordering::Relaxed), + Self::Arc(atomic) => atomic.store(value, Ordering::Relaxed), + } + } + + fn get(&self) -> f64 { + match self { + Self::Noop => f64::NAN, + Self::Ref(atomic) => atomic.load(Ordering::Relaxed), + Self::Arc(atomic) => atomic.load(Ordering::Relaxed), + } + } +} + +#[derive(Clone)] +pub struct Gauge { + pub(crate) info: &'static MetricInfo, + pub(crate) key: metrics::Key, + pub(crate) inner: metrics::Gauge, + pub(crate) shadow: GaugeShadow, +} + +impl std::fmt::Debug for Gauge { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Gauge").field("key", &self.key).finish() + } +} + +impl Gauge { + #[doc(hidden)] + pub fn __new( + info: &'static MetricInfo, + key: metrics::Key, + inner: metrics::Gauge, + shadow: GaugeShadow, + ) -> Self { + Self { + info, + key, + inner, + shadow, + } + } + + #[doc(hidden)] + pub const fn __info(&self) -> &'static MetricInfo { + self.info + } + + pub const fn key(&self) -> &metrics::Key { + &self.key + } + + pub fn increment(&self, value: f64) { + self.shadow.increment(value); + self.inner.increment(value); + } + + pub fn decrement(&self, value: f64) { + self.shadow.decrement(value); + self.inner.decrement(value); + } + + pub fn set(&self, value: f64) { + self.shadow.set(value); + self.inner.set(value); + } + + pub fn get(&self) -> f64 { + self.shadow.get() + } +} + +impl GaugeFn for Gauge { + fn increment(&self, value: f64) { + Self::increment(self, value); + } + + fn decrement(&self, value: f64) { + Self::decrement(self, value); + } + + fn set(&self, value: f64) { + Self::set(self, value); + } +} + +#[derive(Debug)] +pub struct GaugeGuard { + gauge: Gauge, + delta: f64, +} + +impl GaugeGuard { + pub fn from_gauge(gauge: &Gauge) -> Self { + Self { + gauge: gauge.clone(), + delta: 0.0, + } + } + + pub fn increment(gauge: &Gauge, value: f64) -> Self { + let mut guard = Self::from_gauge(gauge); + guard.add_f64(value); + guard + } + + pub fn add(&mut self, delta: i64) { + self.add_f64(delta as f64); + } + + pub fn sub(&mut self, delta: i64) { + self.sub_f64(delta as f64); + } + + pub fn add_f64(&mut self, delta: f64) { + self.gauge.increment(delta); + self.delta += delta; + } + + pub fn sub_f64(&mut self, delta: f64) { + self.gauge.decrement(delta); + self.delta -= delta; + } + + pub fn get(&self) -> i64 { + self.delta as i64 + } + + pub fn value(&self) -> f64 { + self.delta + } +} + +impl Drop for GaugeGuard { + fn drop(&mut self) { + self.gauge.decrement(self.delta); + } +} + +#[macro_export] +macro_rules! gauge { + ( + name: $name:literal, + description: $description:literal, + subsystem: "" + $(, $label:literal => $value:literal)* $(,)? + ) => {{ + $crate::key_info_metadata!( + kind: $crate::metrics::MetricKind::Gauge, + observable: false, + name: $name, + description: $description, + subsystem: "" + $(, $label => $value)* + ); + let inner = $crate::metrics::__metrics::with_recorder(|recorder| { + recorder.register_gauge(&KEY, &METADATA) + }); + $crate::metrics::Gauge::__new( + &INFO, + KEY.clone(), + inner, + $crate::metrics::GaugeShadow::Noop, + ) + }}; + + ( + name: $name:literal, + description: $description:literal, + subsystem: "", + observable: true + $(, $label:literal => $value:literal)* $(,)? + ) => {{ + $crate::key_info_metadata!( + kind: $crate::metrics::MetricKind::Gauge, + observable: true, + name: $name, + description: $description, + subsystem: "" + $(, $label => $value)* + ); + static SHADOW: $crate::metrics::__atomic_float::AtomicF64 = + $crate::metrics::__atomic_float::AtomicF64::new(0.0); + let inner = $crate::metrics::__metrics::with_recorder(|recorder| { + recorder.register_gauge(&KEY, &METADATA) + }); + $crate::metrics::Gauge::__new( + &INFO, + KEY.clone(), + inner, + $crate::metrics::GaugeShadow::Ref(&SHADOW), + ) + }}; + + ( + name: $name:literal, + description: $description:literal, + subsystem: $subsystem:literal + $(, $label:literal => $value:literal)* $(,)? + ) => {{ + $crate::key_info_metadata!( + kind: $crate::metrics::MetricKind::Gauge, + observable: false, + name: $name, + description: $description, + subsystem: $subsystem + $(, $label => $value)* + ); + let inner = $crate::metrics::__metrics::with_recorder(|recorder| { + recorder.register_gauge(&KEY, &METADATA) + }); + $crate::metrics::Gauge::__new( + &INFO, + KEY.clone(), + inner, + $crate::metrics::GaugeShadow::Noop, + ) + }}; + + ( + name: $name:literal, + description: $description:literal, + subsystem: $subsystem:literal, + observable: true + $(, $label:literal => $value:literal)* $(,)? + ) => {{ + $crate::key_info_metadata!( + kind: $crate::metrics::MetricKind::Gauge, + observable: true, + name: $name, + description: $description, + subsystem: $subsystem + $(, $label => $value)* + ); + static SHADOW: $crate::metrics::__atomic_float::AtomicF64 = + $crate::metrics::__atomic_float::AtomicF64::new(0.0); + let inner = $crate::metrics::__metrics::with_recorder(|recorder| { + recorder.register_gauge(&KEY, &METADATA) + }); + $crate::metrics::Gauge::__new( + &INFO, + KEY.clone(), + inner, + $crate::metrics::GaugeShadow::Ref(&SHADOW), + ) + }}; + + ( + parent: $parent:expr, + $($label:literal => $value:expr),+ $(,)? + ) => {{ + let parent_key = $parent.key(); + let mut labels = + Vec::with_capacity(parent_key.labels().len() + $crate::count!($($label)*)); + labels.extend(parent_key.labels().cloned()); + $(labels.push($crate::metrics::__metrics::Label::new($label, $value));)+ + + let info = $parent.__info(); + let key = $crate::metrics::__metrics::Key::from_parts(info.key_name, labels); + let metadata = $crate::metadata!(info.subsystem); + + let inner = $crate::metrics::__metrics::with_recorder(|recorder| { + recorder.register_gauge(&key, &metadata) + }); + + let shadow = if info.observable { + $crate::metrics::GaugeShadow::Arc(std::sync::Arc::new( + $crate::metrics::__atomic_float::AtomicF64::new(0.0), + )) + } else { + $crate::metrics::GaugeShadow::Noop + }; + + $crate::metrics::Gauge::__new(info, key, inner, shadow) + }}; +} diff --git a/quickwit/quickwit-common/src/metrics/histogram.rs b/quickwit/quickwit-common/src/metrics/histogram.rs new file mode 100644 index 00000000000..65d9ac9b30e --- /dev/null +++ b/quickwit/quickwit-common/src/metrics/histogram.rs @@ -0,0 +1,151 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use metrics::HistogramFn; + +use super::MetricInfo; + +#[doc(hidden)] +#[derive(Clone, Copy)] +pub struct HistogramConfig { + pub info: &'static MetricInfo, + pub buckets_fn: fn() -> Vec, +} + +inventory::collect!(HistogramConfig); + +#[derive(Clone)] +pub struct Histogram { + pub(crate) info: &'static HistogramConfig, + pub(crate) key: metrics::Key, + pub(crate) inner: metrics::Histogram, +} + +impl std::fmt::Debug for Histogram { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Histogram").field("key", &self.key).finish() + } +} + +impl Histogram { + #[doc(hidden)] + pub fn __new( + info: &'static HistogramConfig, + key: metrics::Key, + inner: metrics::Histogram, + ) -> Self { + Self { info, key, inner } + } + + #[doc(hidden)] + pub const fn __info(&self) -> &'static HistogramConfig { + self.info + } + + pub const fn key(&self) -> &metrics::Key { + &self.key + } + + pub fn record(&self, value: f64) { + self.inner.record(value); + } +} + +impl HistogramFn for Histogram { + fn record(&self, value: f64) { + Self::record(self, value); + } +} + +#[macro_export] +macro_rules! histogram { + ( + name: $name:literal, + description: $description:literal, + subsystem: "", + buckets: $buckets:expr + $(, $label:literal => $value:literal)* $(,)? + ) => {{ + $crate::key_info_metadata!( + kind: $crate::metrics::MetricKind::Histogram, + observable: false, + name: $name, + description: $description, + subsystem: "" + $(, $label => $value)* + ); + + static HISTOGRAM_CONFIG: $crate::metrics::HistogramConfig = + $crate::metrics::HistogramConfig { + info: &INFO, + buckets_fn: || $buckets, + }; + $crate::metrics::__inventory::submit!(HISTOGRAM_CONFIG); + + let inner = $crate::metrics::__metrics::with_recorder(|recorder| { + recorder.register_histogram(&KEY, &METADATA) + }); + $crate::metrics::Histogram::__new(&HISTOGRAM_CONFIG, KEY.clone(), inner) + }}; + + ( + name: $name:literal, + description: $description:literal, + subsystem: $subsystem:literal, + buckets: $buckets:expr + $(, $label:literal => $value:literal)* $(,)? + ) => {{ + $crate::key_info_metadata!( + kind: $crate::metrics::MetricKind::Histogram, + observable: false, + name: $name, + description: $description, + subsystem: $subsystem + $(, $label => $value)* + ); + + static HISTOGRAM_CONFIG: $crate::metrics::HistogramConfig = + $crate::metrics::HistogramConfig { + info: &INFO, + buckets_fn: || $buckets, + }; + $crate::metrics::__inventory::submit!(HISTOGRAM_CONFIG); + + let inner = $crate::metrics::__metrics::with_recorder(|recorder| { + recorder.register_histogram(&KEY, &METADATA) + }); + $crate::metrics::Histogram::__new(&HISTOGRAM_CONFIG, KEY.clone(), inner) + }}; + + ( + parent: $parent:expr, + $($label:literal => $value:expr),+ $(,)? + ) => {{ + let parent_key = $parent.key(); + let mut labels = + Vec::with_capacity(parent_key.labels().len() + $crate::count!($($label)*)); + labels.extend(parent_key.labels().cloned()); + $(labels.push($crate::metrics::__metrics::Label::new($label, $value));)+ + + let info = $parent.__info(); + let key = $crate::metrics::__metrics::Key::from_parts(info.info.key_name, labels); + let metadata = $crate::metadata!(info.info.subsystem); + + let inner = $crate::metrics::__metrics::with_recorder(|recorder| { + recorder.register_histogram(&key, &metadata) + }); + + $crate::metrics::Histogram::__new(info, key, inner) + }}; +} diff --git a/quickwit/quickwit-common/src/metrics/mod.rs b/quickwit/quickwit-common/src/metrics/mod.rs new file mode 100644 index 00000000000..b74c856afee --- /dev/null +++ b/quickwit/quickwit-common/src/metrics/mod.rs @@ -0,0 +1,181 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[doc(hidden)] +pub use atomic_float as __atomic_float; +#[doc(hidden)] +pub use const_format::concatcp as __concatcp; +#[doc(hidden)] +pub use inventory as __inventory; +#[doc(hidden)] +pub use metrics as __metrics; +pub use metrics::{CounterFn, GaugeFn, HistogramFn}; +pub use metrics_util::MetricKind; +pub use prometheus::{exponential_buckets, linear_buckets}; + +mod counter; +mod gauge; +mod histogram; +mod quickwit; + +pub use counter::Counter; +#[doc(hidden)] +pub use counter::CounterShadow; +#[doc(hidden)] +pub use gauge::GaugeShadow; +pub use gauge::{Gauge, GaugeGuard}; +pub use histogram::{Histogram, HistogramConfig}; +pub use quickwit::{ + InFlightDataGauges, MEMORY_METRICS, MemoryMetrics, index_label, metrics_text_payload, + register_info, +}; + +#[cfg(test)] +mod tests; + +/// System-level prefix prepended to every metric name. +pub const SYSTEM: &str = "quickwit"; + +#[doc(hidden)] +#[derive(Clone, Copy)] +pub struct MetricInfo { + pub name: &'static str, + pub subsystem: &'static str, + pub key_name: &'static str, + pub description: &'static str, + pub kind: MetricKind, + pub observable: bool, +} + +inventory::collect!(MetricInfo); + +pub fn describe_metrics() { + metrics::with_recorder(|recorder| { + for info in inventory::iter:: { + let key_name = metrics::KeyName::from_const_str(info.key_name); + let description: metrics::SharedString = info.description.into(); + match info.kind { + MetricKind::Counter => recorder.describe_counter(key_name, None, description), + MetricKind::Gauge => recorder.describe_gauge(key_name, None, description), + MetricKind::Histogram => recorder.describe_histogram(key_name, None, description), + } + } + }); +} + +pub fn metrics_info() -> impl Iterator { + inventory::iter::.into_iter() +} + +pub fn histogram_buckets() -> impl Iterator)> { + inventory::iter:: + .into_iter() + .map(|config| (config.info.key_name, (config.buckets_fn)())) +} + +#[doc(hidden)] +#[macro_export] +macro_rules! key_name { + ("", $name:literal) => { + $crate::metrics::__concatcp!($crate::metrics::SYSTEM, "_", $name) + }; + ($subsystem:literal, $name:literal) => { + $crate::metrics::__concatcp!($crate::metrics::SYSTEM, "_", $subsystem, "_", $name) + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! count { + () => { + 0usize + }; + ($head:tt $($tail:tt)*) => { + 1usize + $crate::count!($($tail)*) + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! metadata { + ($subsystem:expr) => { + $crate::metrics::__metrics::Metadata::new( + $subsystem, + $crate::metrics::__metrics::Level::INFO, + Some(module_path!()), + ) + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! key_info_metadata { + ( + kind: $kind:expr, + observable: $observable:expr, + name: $name:literal, + description: $description:literal, + subsystem: "" + $(, $label:literal => $value:literal)* $(,)? + ) => { + const KEY_NAME: &str = $crate::metrics::__concatcp!($crate::metrics::SYSTEM, "_", $name); + static INFO: $crate::metrics::MetricInfo = $crate::metrics::MetricInfo { + name: $name, + subsystem: "", + key_name: KEY_NAME, + description: $description, + kind: $kind, + observable: $observable, + }; + $crate::metrics::__inventory::submit!(INFO); + + static LABELS: [$crate::metrics::__metrics::Label; $crate::count!($($label)*)] = [ + $($crate::metrics::__metrics::Label::from_static_parts($label, $value)),* + ]; + static KEY: $crate::metrics::__metrics::Key = + $crate::metrics::__metrics::Key::from_static_parts(KEY_NAME, &LABELS); + static METADATA: $crate::metrics::__metrics::Metadata<'static> = + $crate::metadata!(""); + }; + + ( + kind: $kind:expr, + observable: $observable:expr, + name: $name:literal, + description: $description:literal, + subsystem: $subsystem:literal + $(, $label:literal => $value:literal)* $(,)? + ) => { + const KEY_NAME: &str = $crate::key_name!($subsystem, $name); + static INFO: $crate::metrics::MetricInfo = $crate::metrics::MetricInfo { + name: $name, + subsystem: $subsystem, + key_name: KEY_NAME, + description: $description, + kind: $kind, + observable: $observable, + }; + $crate::metrics::__inventory::submit!(INFO); + + static LABELS: [$crate::metrics::__metrics::Label; $crate::count!($($label)*)] = [ + $($crate::metrics::__metrics::Label::from_static_parts($label, $value)),* + ]; + static KEY: $crate::metrics::__metrics::Key = + $crate::metrics::__metrics::Key::from_static_parts(KEY_NAME, &LABELS); + static METADATA: $crate::metrics::__metrics::Metadata<'static> = + $crate::metadata!($subsystem); + }; +} + +pub use crate::{counter, gauge, histogram}; diff --git a/quickwit/quickwit-common/src/metrics/quickwit.rs b/quickwit/quickwit-common/src/metrics/quickwit.rs new file mode 100644 index 00000000000..9254a819be8 --- /dev/null +++ b/quickwit/quickwit-common/src/metrics/quickwit.rs @@ -0,0 +1,188 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeMap; +use std::sync::{LazyLock, OnceLock}; + +use super::{Gauge, SYSTEM}; +use crate::gauge; + +pub fn register_info(name: &'static str, help: &'static str, kvs: BTreeMap<&'static str, String>) { + let key_name = metric_key_name("", name); + let labels = kvs + .into_iter() + .map(|(label, value)| metrics::Label::new(label, value)) + .collect::>(); + let key = metrics::Key::from_parts(key_name.clone(), labels); + let metadata = metrics::Metadata::new("", metrics::Level::INFO, Some(module_path!())); + metrics::with_recorder(|recorder| { + recorder.describe_counter(metrics::KeyName::from(key_name), None, help.into()); + recorder.register_counter(&key, &metadata).increment(1); + }); +} + +pub fn metrics_text_payload() -> Result { + Err("Prometheus metrics rendering is not installed yet".to_string()) +} + +#[derive(Clone)] +pub struct MemoryMetrics { + pub active_bytes: Gauge, + pub allocated_bytes: Gauge, + pub resident_bytes: Gauge, + pub in_flight: InFlightDataGauges, +} + +impl Default for MemoryMetrics { + fn default() -> Self { + Self { + active_bytes: MEMORY_ACTIVE_BYTES.clone(), + allocated_bytes: MEMORY_ALLOCATED_BYTES.clone(), + resident_bytes: MEMORY_RESIDENT_BYTES.clone(), + in_flight: InFlightDataGauges::default(), + } + } +} + +#[derive(Clone)] +pub struct InFlightDataGauges { + pub rest_server: Gauge, + pub ingest_router: Gauge, + pub ingester_persist: Gauge, + pub ingester_replicate: Gauge, + pub wal: Gauge, + pub fetch_stream: Gauge, + pub multi_fetch_stream: Gauge, + pub doc_processor_mailbox: Gauge, + pub indexer_mailbox: Gauge, + pub index_writer: Gauge, +} + +impl Default for InFlightDataGauges { + fn default() -> Self { + Self { + rest_server: in_flight_data_gauge("rest_server"), + ingest_router: in_flight_data_gauge("ingest_router"), + ingester_persist: in_flight_data_gauge("ingester_persist"), + ingester_replicate: in_flight_data_gauge("ingester_replicate"), + wal: in_flight_data_gauge("wal"), + fetch_stream: in_flight_data_gauge("fetch_stream"), + multi_fetch_stream: in_flight_data_gauge("multi_fetch_stream"), + doc_processor_mailbox: in_flight_data_gauge("doc_processor_mailbox"), + indexer_mailbox: in_flight_data_gauge("indexer_mailbox"), + index_writer: in_flight_data_gauge("index_writer"), + } + } +} + +impl InFlightDataGauges { + #[inline] + pub fn file(&self) -> &'static Gauge { + static GAUGE: OnceLock = OnceLock::new(); + GAUGE.get_or_init(|| in_flight_data_gauge("file_source")) + } + + #[inline] + pub fn ingest(&self) -> &'static Gauge { + static GAUGE: OnceLock = OnceLock::new(); + GAUGE.get_or_init(|| in_flight_data_gauge("ingest_source")) + } + + #[inline] + pub fn kafka(&self) -> &'static Gauge { + static GAUGE: OnceLock = OnceLock::new(); + GAUGE.get_or_init(|| in_flight_data_gauge("kafka_source")) + } + + #[inline] + pub fn kinesis(&self) -> &'static Gauge { + static GAUGE: OnceLock = OnceLock::new(); + GAUGE.get_or_init(|| in_flight_data_gauge("kinesis_source")) + } + + #[inline] + pub fn pubsub(&self) -> &'static Gauge { + static GAUGE: OnceLock = OnceLock::new(); + GAUGE.get_or_init(|| in_flight_data_gauge("pubsub_source")) + } + + #[inline] + pub fn pulsar(&self) -> &'static Gauge { + static GAUGE: OnceLock = OnceLock::new(); + GAUGE.get_or_init(|| in_flight_data_gauge("pulsar_source")) + } + + #[inline] + pub fn other(&self) -> &'static Gauge { + static GAUGE: OnceLock = OnceLock::new(); + GAUGE.get_or_init(|| in_flight_data_gauge("pulsar_source")) + } +} + +pub fn index_label(index_id: &str) -> &str { + static PER_INDEX_METRICS_ENABLED: LazyLock = + LazyLock::new(|| !crate::get_bool_from_env("QW_DISABLE_PER_INDEX_METRICS", false)); + + if *PER_INDEX_METRICS_ENABLED { + index_id + } else { + "__any__" + } +} + +pub static MEMORY_METRICS: LazyLock = LazyLock::new(MemoryMetrics::default); + +static MEMORY_ACTIVE_BYTES: LazyLock = LazyLock::new(|| { + gauge!( + name: "active_bytes", + description: "Total number of bytes in active pages allocated by the application, as reported by jemalloc `stats.active`.", + subsystem: "memory", + ) +}); + +static MEMORY_ALLOCATED_BYTES: LazyLock = LazyLock::new(|| { + gauge!( + name: "allocated_bytes", + description: "Total number of bytes allocated by the application, as reported by jemalloc `stats.allocated`.", + subsystem: "memory", + ) +}); + +static MEMORY_RESIDENT_BYTES: LazyLock = LazyLock::new(|| { + gauge!( + name: "resident_bytes", + description: " Total number of bytes in physically resident data pages mapped by the allocator, as reported by jemalloc `stats.resident`.", + subsystem: "memory", + ) +}); + +static IN_FLIGHT_DATA_BYTES: LazyLock = LazyLock::new(|| { + gauge!( + name: "in_flight_data_bytes", + description: "Amount of data in-flight in various buffers in bytes.", + subsystem: "memory", + ) +}); + +fn in_flight_data_gauge(component: &'static str) -> Gauge { + gauge!(parent: &*IN_FLIGHT_DATA_BYTES, "component" => component) +} + +fn metric_key_name(subsystem: &str, name: &str) -> String { + if subsystem.is_empty() { + format!("{SYSTEM}_{name}") + } else { + format!("{SYSTEM}_{subsystem}_{name}") + } +} diff --git a/quickwit/quickwit-common/src/metrics/tests.rs b/quickwit/quickwit-common/src/metrics/tests.rs new file mode 100644 index 00000000000..a3f96102028 --- /dev/null +++ b/quickwit/quickwit-common/src/metrics/tests.rs @@ -0,0 +1,317 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use metrics::with_local_recorder; +use metrics_util::debugging::{DebugValue, DebuggingRecorder}; + +use super::*; + +type MetricEntry = (String, Vec<(String, String)>, DebugValue); + +fn with_recorder(f: impl FnOnce()) -> Vec { + let recorder = DebuggingRecorder::new(); + let snapshotter = recorder.snapshotter(); + with_local_recorder(&recorder, f); + snapshotter + .snapshot() + .into_vec() + .into_iter() + .map(|(composite_key, _unit, _description, value)| { + let (_, key) = composite_key.into_parts(); + let labels = key + .labels() + .map(|label| (label.key().to_string(), label.value().to_string())) + .collect(); + (key.name().to_string(), labels, value) + }) + .collect() +} + +#[test] +fn counter_increment_and_absolute_values() { + let entries = with_recorder(|| { + let counter = counter!( + name: "test_counter_increment", + description: "test counter", + subsystem: "metrics_tests", + ); + counter.increment(5); + + let absolute = counter!( + name: "test_counter_absolute", + description: "absolute counter", + subsystem: "metrics_tests", + ); + absolute.absolute(42); + }); + + assert!(entries.contains(&( + "quickwit_metrics_tests_test_counter_increment".to_string(), + Vec::new(), + DebugValue::Counter(5), + ))); + assert!(entries.contains(&( + "quickwit_metrics_tests_test_counter_absolute".to_string(), + Vec::new(), + DebugValue::Counter(42), + ))); +} + +#[test] +fn gauge_set_increment_and_decrement() { + let entries = with_recorder(|| { + let gauge = gauge!( + name: "test_gauge", + description: "test gauge", + subsystem: "metrics_tests", + ); + gauge.set(10.0); + gauge.increment(5.0); + gauge.decrement(3.0); + }); + + assert_eq!( + entries[0], + ( + "quickwit_metrics_tests_test_gauge".to_string(), + Vec::new(), + DebugValue::Gauge(12.0.into()), + ) + ); +} + +#[test] +fn histogram_records_value() { + let entries = with_recorder(|| { + let histogram = histogram!( + name: "test_histogram", + description: "test histogram", + subsystem: "metrics_tests", + buckets: vec![1.0, 5.0, 10.0], + ); + histogram.record(3.5); + }); + + let (name, labels, value) = &entries[0]; + assert_eq!(name, "quickwit_metrics_tests_test_histogram"); + assert!(labels.is_empty()); + match value { + DebugValue::Histogram(values) => { + assert_eq!(values.len(), 1); + assert_eq!(values[0].into_inner(), 3.5); + } + other => panic!("expected histogram, got {other:?}"), + } +} + +#[test] +fn empty_subsystem_omits_double_underscore() { + let entries = with_recorder(|| { + let counter = counter!( + name: "empty_subsystem_counter", + description: "empty subsystem counter", + subsystem: "", + ); + counter.increment(1); + }); + + assert_eq!(entries[0].0, "quickwit_empty_subsystem_counter"); +} + +#[test] +fn static_labels_are_preserved() { + let entries = with_recorder(|| { + let counter = counter!( + name: "static_labels_counter", + description: "static labels counter", + subsystem: "metrics_tests", + "env" => "prod", + "region" => "eu", + ); + counter.increment(1); + }); + + assert_eq!( + entries[0].1, + vec![ + ("env".to_string(), "prod".to_string()), + ("region".to_string(), "eu".to_string()), + ] + ); +} + +#[test] +fn parent_labels_dynamic_values_and_nested_extension() { + let entries = with_recorder(|| { + let base = counter!( + name: "nested_counter", + description: "nested counter", + subsystem: "metrics_tests", + "env" => "prod", + ); + let region = String::from("us-east"); + let child = counter!(parent: base, "region" => region); + let grandchild = counter!(parent: child, "az" => "use1-a"); + grandchild.increment(7); + }); + + let grandchild = entries + .iter() + .find(|(name, labels, _)| { + name == "quickwit_metrics_tests_nested_counter" && labels.len() == 3 + }) + .expect("grandchild metric should be recorded"); + assert_eq!( + grandchild.1, + vec![ + ("env".to_string(), "prod".to_string()), + ("region".to_string(), "us-east".to_string()), + ("az".to_string(), "use1-a".to_string()), + ] + ); + assert_eq!(grandchild.2, DebugValue::Counter(7)); +} + +#[test] +fn observable_counter_and_gauge_get_values() { + with_recorder(|| { + let counter = counter!( + name: "observable_counter", + description: "observable counter", + subsystem: "metrics_tests", + observable: true, + ); + counter.increment(3); + counter.absolute(11); + assert_eq!(counter.get(), 11); + + let gauge = gauge!( + name: "observable_gauge", + description: "observable gauge", + subsystem: "metrics_tests", + observable: true, + ); + gauge.set(10.0); + gauge.increment(2.0); + gauge.decrement(1.0); + assert_eq!(gauge.get(), 11.0); + }); +} + +#[test] +fn non_observable_metrics_return_sentinel_values() { + with_recorder(|| { + let counter = counter!( + name: "non_observable_counter", + description: "non observable counter", + subsystem: "metrics_tests", + ); + counter.increment(1); + assert_eq!(counter.get(), u64::MAX); + + let gauge = gauge!( + name: "non_observable_gauge", + description: "non observable gauge", + subsystem: "metrics_tests", + ); + gauge.set(1.0); + assert!(gauge.get().is_nan()); + }); +} + +#[test] +fn gauge_guard_balances_variable_delta_on_drop() { + let entries = with_recorder(|| { + let gauge = gauge!( + name: "guarded_gauge", + description: "guarded gauge", + subsystem: "metrics_tests", + observable: true, + ); + gauge.set(10.0); + { + let mut guard = GaugeGuard::from_gauge(&gauge); + guard.add(5); + guard.sub(2); + assert_eq!(guard.get(), 3); + assert_eq!(guard.value(), 3.0); + assert_eq!(gauge.get(), 13.0); + } + assert_eq!(gauge.get(), 10.0); + }); + + assert_eq!(entries[0].2, DebugValue::Gauge(10.0.into())); +} + +#[test] +fn histogram_bucket_inventory_contains_declared_buckets() { + with_recorder(|| { + let _ = histogram!( + name: "bucketed_histogram", + description: "bucketed histogram", + subsystem: "metrics_tests", + buckets: vec![0.1, 1.0, 10.0], + ); + }); + + assert!(histogram_buckets().any(|(name, buckets)| { + name == "quickwit_metrics_tests_bucketed_histogram" && buckets == vec![0.1, 1.0, 10.0] + })); +} + +#[test] +fn metrics_info_contains_declared_metadata() { + with_recorder(|| { + let _ = counter!( + name: "metadata_counter", + description: "metadata counter", + subsystem: "metrics_tests", + observable: true, + ); + }); + + let info = metrics_info() + .find(|info| info.key_name == "quickwit_metrics_tests_metadata_counter") + .expect("metadata counter info should be registered"); + assert_eq!(info.name, "metadata_counter"); + assert_eq!(info.subsystem, "metrics_tests"); + assert_eq!(info.description, "metadata counter"); + assert_eq!(info.kind, MetricKind::Counter); + assert!(info.observable); +} + +#[test] +fn describe_metrics_sets_debugging_recorder_description() { + let recorder = DebuggingRecorder::new(); + let snapshotter = recorder.snapshotter(); + with_local_recorder(&recorder, || { + let counter = counter!( + name: "described_counter", + description: "described counter", + subsystem: "metrics_tests", + ); + describe_metrics(); + counter.increment(1); + }); + + let snapshot = snapshotter.snapshot().into_vec(); + let (_, _, description, _) = snapshot + .into_iter() + .find(|(composite_key, _, _, _)| { + let (_, key) = composite_key.clone().into_parts(); + key.name() == "quickwit_metrics_tests_described_counter" + }) + .expect("described counter should be recorded"); + assert_eq!(description.as_deref(), Some("described counter")); +} diff --git a/quickwit/quickwit-common/src/runtimes.rs b/quickwit/quickwit-common/src/runtimes.rs index 79ac2611bd9..082098c710e 100644 --- a/quickwit/quickwit-common/src/runtimes.rs +++ b/quickwit/quickwit-common/src/runtimes.rs @@ -17,14 +17,46 @@ use std::sync::OnceLock; use std::sync::atomic::{AtomicUsize, Ordering}; use std::time::Duration; -use prometheus::{Gauge, IntCounter, IntGauge}; use tokio::runtime::Runtime; use tokio_metrics::{RuntimeMetrics, RuntimeMonitor}; -use crate::metrics::{new_counter, new_float_gauge, new_gauge}; +use crate::metrics::{Counter, Gauge, counter, gauge}; static RUNTIMES: OnceLock> = OnceLock::new(); +static TOKIO_SCHEDULED_TASKS: std::sync::LazyLock = std::sync::LazyLock::new(|| { + gauge!( + name: "tokio_scheduled_tasks", + description: "The total number of tasks currently scheduled in workers' local queues.", + subsystem: "runtime", + ) +}); + +static TOKIO_WORKER_BUSY_DURATION_MILLISECONDS_TOTAL: std::sync::LazyLock = + std::sync::LazyLock::new(|| { + counter!( + name: "tokio_worker_busy_duration_milliseconds_total", + description: " The total amount of time worker threads were busy.", + subsystem: "runtime", + ) + }); + +static TOKIO_WORKER_BUSY_RATIO: std::sync::LazyLock = std::sync::LazyLock::new(|| { + gauge!( + name: "tokio_worker_busy_ratio", + description: "The ratio of time worker threads were busy since the last time runtime metrics were collected.", + subsystem: "runtime", + ) +}); + +static TOKIO_WORKER_THREADS: std::sync::LazyLock = std::sync::LazyLock::new(|| { + gauge!( + name: "tokio_worker_threads", + description: "The number of worker threads used by the runtime.", + subsystem: "runtime", + ) +}); + /// Describes which runtime an actor should run on. #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] pub enum RuntimeType { @@ -165,61 +197,43 @@ pub fn scrape_tokio_runtime_metrics(handle: &tokio::runtime::Handle, label: &'st let runtime_monitor = RuntimeMonitor::new(handle); handle.spawn(async move { let mut interval = tokio::time::interval(Duration::from_secs(1)); - let mut prometheus_runtime_metrics = PrometheusRuntimeMetrics::new(label); + let mut runtime_metrics_recorder = RuntimeMetricsRecorder::new(label); for tokio_runtime_metrics in runtime_monitor.intervals() { interval.tick().await; - prometheus_runtime_metrics.update(&tokio_runtime_metrics); + runtime_metrics_recorder.update(&tokio_runtime_metrics); } }); } -struct PrometheusRuntimeMetrics { - scheduled_tasks: IntGauge, - worker_busy_duration_milliseconds_total: IntCounter, +struct RuntimeMetricsRecorder { + scheduled_tasks: Gauge, + worker_busy_duration_milliseconds_total: Counter, worker_busy_ratio: Gauge, - worker_threads: IntGauge, + worker_threads: Gauge, } -impl PrometheusRuntimeMetrics { +impl RuntimeMetricsRecorder { pub fn new(label: &'static str) -> Self { Self { - scheduled_tasks: new_gauge( - "tokio_scheduled_tasks", - "The total number of tasks currently scheduled in workers' local queues.", - "runtime", - &[("runtime_type", label)], - ), - worker_busy_duration_milliseconds_total: new_counter( - "tokio_worker_busy_duration_milliseconds_total", - " The total amount of time worker threads were busy.", - "runtime", - &[("runtime_type", label)], - ), - worker_busy_ratio: new_float_gauge( - "tokio_worker_busy_ratio", - "The ratio of time worker threads were busy since the last time runtime metrics \ - were collected.", - "runtime", - &[("runtime_type", label)], - ), - worker_threads: new_gauge( - "tokio_worker_threads", - "The number of worker threads used by the runtime.", - "runtime", - &[("runtime_type", label)], + scheduled_tasks: gauge!(parent: &*TOKIO_SCHEDULED_TASKS, "runtime_type" => label), + worker_busy_duration_milliseconds_total: counter!( + parent: &*TOKIO_WORKER_BUSY_DURATION_MILLISECONDS_TOTAL, + "runtime_type" => label, ), + worker_busy_ratio: gauge!(parent: &*TOKIO_WORKER_BUSY_RATIO, "runtime_type" => label), + worker_threads: gauge!(parent: &*TOKIO_WORKER_THREADS, "runtime_type" => label), } } pub fn update(&mut self, runtime_metrics: &RuntimeMetrics) { self.scheduled_tasks - .set(runtime_metrics.total_local_queue_depth as i64); + .set(runtime_metrics.total_local_queue_depth as f64); self.worker_busy_duration_milliseconds_total - .inc_by(runtime_metrics.total_busy_duration.as_millis() as u64); + .increment(runtime_metrics.total_busy_duration.as_millis() as u64); self.worker_busy_ratio.set(runtime_metrics.busy_ratio()); self.worker_threads - .set(runtime_metrics.workers_count as i64); + .set(runtime_metrics.workers_count as f64); } } diff --git a/quickwit/quickwit-common/src/stream_utils.rs b/quickwit/quickwit-common/src/stream_utils.rs index 00b40ee4b43..ad060fb5af2 100644 --- a/quickwit/quickwit-common/src/stream_utils.rs +++ b/quickwit/quickwit-common/src/stream_utils.rs @@ -18,12 +18,11 @@ use std::pin::Pin; use bytesize::ByteSize; use futures::{Stream, StreamExt, TryStreamExt, stream}; -use prometheus::IntGauge; use tokio::sync::{mpsc, watch}; use tokio_stream::wrappers::{ReceiverStream, UnboundedReceiverStream, WatchStream}; use tracing::warn; -use crate::metrics::GaugeGuard; +use crate::metrics::{Gauge, GaugeGuard}; use crate::tower::RpcName; pub type BoxStream = Pin + Send + Unpin + 'static>>; @@ -77,7 +76,7 @@ where T: Send + 'static pub fn new_bounded_with_gauge( capacity: usize, - gauge: &'static IntGauge, + gauge: &'static Gauge, ) -> (TrackedSender, Self) { let (sender, receiver) = mpsc::channel(capacity); let tracked_sender = TrackedSender { sender, gauge }; @@ -94,7 +93,7 @@ where T: Send + 'static (sender, receiver.into()) } - pub fn new_unbounded_with_gauge(gauge: &'static IntGauge) -> (TrackedUnboundedSender, Self) { + pub fn new_unbounded_with_gauge(gauge: &'static Gauge) -> (TrackedUnboundedSender, Self) { let (sender, receiver) = mpsc::unbounded_channel(); let tracked_sender = TrackedUnboundedSender { sender, gauge }; let receiver_stream = UnboundedReceiverStream::new(receiver) @@ -228,7 +227,7 @@ where T: RpcName } } -pub struct InFlightValue(T, #[allow(dead_code)] GaugeGuard<'static>); +pub struct InFlightValue(T, #[allow(dead_code)] GaugeGuard); impl fmt::Debug for InFlightValue where T: fmt::Debug @@ -239,7 +238,7 @@ where T: fmt::Debug } impl InFlightValue { - pub fn new(value: T, value_size: ByteSize, gauge: &'static IntGauge) -> Self { + pub fn new(value: T, value_size: ByteSize, gauge: &'static Gauge) -> Self { let mut gauge_guard = GaugeGuard::from_gauge(gauge); gauge_guard.add(value_size.as_u64() as i64); @@ -253,7 +252,7 @@ impl InFlightValue { pub struct TrackedSender { sender: mpsc::Sender>, - gauge: &'static IntGauge, + gauge: &'static Gauge, } impl TrackedSender { @@ -271,7 +270,7 @@ impl TrackedSender { pub struct TrackedUnboundedSender { sender: mpsc::UnboundedSender>, - gauge: &'static IntGauge, + gauge: &'static Gauge, } impl TrackedUnboundedSender { @@ -287,7 +286,7 @@ mod tests { use std::sync::LazyLock; use super::*; - use crate::metrics::new_gauge; + use crate::metrics::{Gauge, gauge}; #[tokio::test] async fn test_service_stream_map() { @@ -300,32 +299,37 @@ mod tests { #[tokio::test] async fn test_tracked_service_stream_bounded() { - static TEST_GAUGE: LazyLock = LazyLock::new(|| { - new_gauge("common", "help", "test_tracked_service_stream_bounded", &[]) + static TEST_GAUGE: LazyLock = LazyLock::new(|| { + gauge!( + name: "common", + description: "help", + subsystem: "test_tracked_service_stream_bounded", + observable: true, + ) }); let (service_stream_tx, mut service_stream) = ServiceStream::new_bounded_with_gauge(3, &TEST_GAUGE); service_stream_tx.send(1, ByteSize(42)).await.unwrap(); - assert_eq!(TEST_GAUGE.get(), 42); + assert_eq!(TEST_GAUGE.get(), 42.0); service_stream_tx.send(2, ByteSize(1337)).await.unwrap(); - assert_eq!(TEST_GAUGE.get(), 1379); + assert_eq!(TEST_GAUGE.get(), 1379.0); let value = service_stream.next().await.unwrap(); assert_eq!(value, 1); - assert_eq!(TEST_GAUGE.get(), 1337); + assert_eq!(TEST_GAUGE.get(), 1337.0); } #[tokio::test] async fn test_tracked_service_stream_unbounded() { - static TEST_GAUGE: LazyLock = LazyLock::new(|| { - new_gauge( - "common", - "help", - "test_tracked_service_stream_unbounded", - &[], + static TEST_GAUGE: LazyLock = LazyLock::new(|| { + gauge!( + name: "common", + description: "help", + subsystem: "test_tracked_service_stream_unbounded", + observable: true, ) }); @@ -333,13 +337,13 @@ mod tests { ServiceStream::new_unbounded_with_gauge(&TEST_GAUGE); service_stream_tx.send(1, ByteSize(42)).unwrap(); - assert_eq!(TEST_GAUGE.get(), 42); + assert_eq!(TEST_GAUGE.get(), 42.0); service_stream_tx.send(2, ByteSize(1337)).unwrap(); - assert_eq!(TEST_GAUGE.get(), 1379); + assert_eq!(TEST_GAUGE.get(), 1379.0); let value = service_stream.next().await.unwrap(); assert_eq!(value, 1); - assert_eq!(TEST_GAUGE.get(), 1337); + assert_eq!(TEST_GAUGE.get(), 1337.0); } } diff --git a/quickwit/quickwit-common/src/thread_pool.rs b/quickwit/quickwit-common/src/thread_pool.rs index f4b738ef2c0..25a8e74a4a8 100644 --- a/quickwit/quickwit-common/src/thread_pool.rs +++ b/quickwit/quickwit-common/src/thread_pool.rs @@ -13,14 +13,29 @@ // limitations under the License. use std::fmt; -use std::sync::{Arc, LazyLock}; +use std::sync::Arc; use futures::{Future, TryFutureExt}; -use prometheus::IntGauge; use tokio::sync::oneshot; use tracing::error; -use crate::metrics::{GaugeGuard, IntGaugeVec, OwnedGaugeGuard, new_gauge_vec}; +use crate::metrics::{Gauge, GaugeGuard, gauge}; + +static THREAD_POOL_ONGOING_TASKS: std::sync::LazyLock = std::sync::LazyLock::new(|| { + gauge!( + name: "ongoing_tasks", + description: "number of tasks being currently processed by threads in the thread pool", + subsystem: "thread_pool", + ) +}); + +static THREAD_POOL_PENDING_TASKS: std::sync::LazyLock = std::sync::LazyLock::new(|| { + gauge!( + name: "pending_tasks", + description: "number of tasks waiting in the queue before being processed by the thread pool", + subsystem: "thread_pool", + ) +}); /// An executor backed by a thread pool to run CPU-intensive tasks. /// @@ -29,8 +44,8 @@ use crate::metrics::{GaugeGuard, IntGaugeVec, OwnedGaugeGuard, new_gauge_vec}; #[derive(Clone)] pub struct ThreadPool { thread_pool: Arc, - ongoing_tasks: IntGauge, - pending_tasks: IntGauge, + ongoing_tasks: Gauge, + pending_tasks: Gauge, } impl ThreadPool { @@ -46,8 +61,8 @@ impl ThreadPool { let thread_pool = rayon_pool_builder .build() .expect("failed to spawn thread pool"); - let ongoing_tasks = THREAD_POOL_METRICS.ongoing_tasks.with_label_values([name]); - let pending_tasks = THREAD_POOL_METRICS.pending_tasks.with_label_values([name]); + let ongoing_tasks = gauge!(parent: &*THREAD_POOL_ONGOING_TASKS, "pool" => name); + let pending_tasks = gauge!(parent: &*THREAD_POOL_PENDING_TASKS, "pool" => name); ThreadPool { thread_pool: Arc::new(thread_pool), ongoing_tasks, @@ -84,8 +99,7 @@ impl ThreadPool { { let span = tracing::Span::current(); let ongoing_tasks = self.ongoing_tasks.clone(); - let mut pending_tasks_guard: OwnedGaugeGuard = - OwnedGaugeGuard::from_gauge(self.pending_tasks.clone()); + let mut pending_tasks_guard = GaugeGuard::from_gauge(&self.pending_tasks); pending_tasks_guard.add(1i64); let (tx, rx) = oneshot::channel(); self.thread_pool.spawn(move || { @@ -134,34 +148,6 @@ impl fmt::Display for Panicked { impl std::error::Error for Panicked {} -struct ThreadPoolMetrics { - ongoing_tasks: IntGaugeVec<1>, - pending_tasks: IntGaugeVec<1>, -} - -impl Default for ThreadPoolMetrics { - fn default() -> Self { - ThreadPoolMetrics { - ongoing_tasks: new_gauge_vec( - "ongoing_tasks", - "number of tasks being currently processed by threads in the thread pool", - "thread_pool", - &[], - ["pool"], - ), - pending_tasks: new_gauge_vec( - "pending_tasks", - "number of tasks waiting in the queue before being processed by the thread pool", - "thread_pool", - &[], - ["pool"], - ), - } - } -} - -static THREAD_POOL_METRICS: LazyLock = LazyLock::new(ThreadPoolMetrics::default); - #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/quickwit/quickwit-common/src/tower/circuit_breaker.rs b/quickwit/quickwit-common/src/tower/circuit_breaker.rs index 09ada07e187..aaf98cd1c3d 100644 --- a/quickwit/quickwit-common/src/tower/circuit_breaker.rs +++ b/quickwit/quickwit-common/src/tower/circuit_breaker.rs @@ -19,10 +19,11 @@ use std::task::{Context, Poll}; use std::time::Duration; use pin_project::pin_project; -use prometheus::IntCounter; use tokio::time::Instant; use tower::{Layer, Service}; +use crate::metrics::Counter; + /// The circuit breaker layer implements the [circuit breaker pattern](https://martinfowler.com/bliki/CircuitBreaker.html). /// /// It counts the errors emitted by the inner service, and if the number of errors exceeds a certain @@ -49,7 +50,7 @@ pub struct CircuitBreakerLayer { time_window: Duration, timeout: Duration, evaluator: Evaluator, - circuit_break_total: prometheus::IntCounter, + circuit_break_total: Counter, } pub trait CircuitBreakerEvaluator: Clone { @@ -61,7 +62,7 @@ pub trait CircuitBreakerEvaluator: Clone { self, max_num_errors_per_secs: u32, timeout: Duration, - circuit_break_total: prometheus::IntCounter, + circuit_break_total: Counter, ) -> CircuitBreakerLayer { CircuitBreakerLayer { max_error_count_per_time_window: max_num_errors_per_secs, @@ -102,7 +103,7 @@ struct CircuitBreakerInner { timeout: Duration, evaluator: Evaluator, state: CircuitBreakerState, - circuit_break_total: IntCounter, + circuit_break_total: Counter, } impl CircuitBreakerInner { @@ -125,7 +126,7 @@ impl CircuitBreakerInner { fn receive_error(&mut self) { match self.state { CircuitBreakerState::HalfOpen => { - self.circuit_break_total.inc(); + self.circuit_break_total.increment(1); self.state = CircuitBreakerState::Open { until: Instant::now() + self.timeout, } @@ -144,7 +145,7 @@ impl CircuitBreakerInner { } let now = Instant::now(); if now < error_window_end { - self.circuit_break_total.inc(); + self.circuit_break_total.increment(1); self.state = CircuitBreakerState::Open { until: now + self.timeout, }; @@ -301,8 +302,11 @@ mod tests { const TIMEOUT: Duration = Duration::from_millis(500); - let int_counter: prometheus::IntCounter = - IntCounter::new("circuit_break_total_test", "test circuit breaker counter").unwrap(); + let int_counter = crate::metrics::counter!( + name: "circuit_break_total_test", + description: "test circuit breaker counter", + subsystem: "", + ); let mut service = ServiceBuilder::new() .layer(TestCircuitBreakerEvaluator.make_layer(10, TIMEOUT, int_counter)) .service_fn(|_| async { diff --git a/quickwit/quickwit-common/src/tower/metrics.rs b/quickwit/quickwit-common/src/tower/metrics.rs index b2d093adbe3..23e8845d1b2 100644 --- a/quickwit/quickwit-common/src/tower/metrics.rs +++ b/quickwit/quickwit-common/src/tower/metrics.rs @@ -13,28 +13,51 @@ // limitations under the License. use std::pin::Pin; +use std::sync::LazyLock; use std::task::{Context, Poll}; use std::time::Instant; use futures::{Future, ready}; use pin_project::{pin_project, pinned_drop}; -use prometheus::exponential_buckets; use tower::{Layer, Service}; -use crate::metrics::{ - HistogramVec, IntCounterVec, IntGaugeVec, new_counter_vec, new_gauge_vec, new_histogram_vec, -}; +use crate::metrics::{Counter, Gauge, Histogram, counter, exponential_buckets, gauge, histogram}; pub trait RpcName { fn rpc_name() -> &'static str; } +static GRPC_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "grpc_requests_total", + description: "Total number of gRPC requests processed.", + subsystem: "", + ) +}); + +static GRPC_REQUESTS_IN_FLIGHT: LazyLock = LazyLock::new(|| { + gauge!( + name: "grpc_requests_in_flight", + description: "Number of gRPC requests in-flight.", + subsystem: "", + ) +}); + +static GRPC_REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { + histogram!( + name: "grpc_request_duration_seconds", + description: "Duration of request in seconds.", + subsystem: "", + buckets: exponential_buckets(0.001, 2.0, 12).unwrap(), + ) +}); + #[derive(Clone)] pub struct GrpcMetrics { inner: S, - requests_total: IntCounterVec<2>, - requests_in_flight: IntGaugeVec<1>, - request_duration_seconds: HistogramVec<2>, + requests_total: Counter, + requests_in_flight: Gauge, + request_duration_seconds: Histogram, } impl Service for GrpcMetrics @@ -55,7 +78,7 @@ where let rpc_name = R::rpc_name(); let inner = self.inner.call(request); - self.requests_in_flight.with_label_values([rpc_name]).inc(); + gauge!(parent: &self.requests_in_flight, "rpc" => rpc_name).increment(1.0); ResponseFuture { inner, @@ -71,35 +94,28 @@ where #[derive(Clone)] pub struct GrpcMetricsLayer { - requests_total: IntCounterVec<2>, - requests_in_flight: IntGaugeVec<1>, - request_duration_seconds: HistogramVec<2>, + requests_total: Counter, + requests_in_flight: Gauge, + request_duration_seconds: Histogram, } impl GrpcMetricsLayer { pub fn new(subsystem: &'static str, kind: &'static str) -> Self { Self { - requests_total: new_counter_vec( - "grpc_requests_total", - "Total number of gRPC requests processed.", - subsystem, - &[("kind", kind)], - ["rpc", "status"], + requests_total: counter!( + parent: &*GRPC_REQUESTS_TOTAL, + "service" => subsystem, + "kind" => kind, ), - requests_in_flight: new_gauge_vec( - "grpc_requests_in_flight", - "Number of gRPC requests in-flight.", - subsystem, - &[("kind", kind)], - ["rpc"], + requests_in_flight: gauge!( + parent: &*GRPC_REQUESTS_IN_FLIGHT, + "service" => subsystem, + "kind" => kind, ), - request_duration_seconds: new_histogram_vec( - "grpc_request_duration_seconds", - "Duration of request in seconds.", - subsystem, - &[("kind", kind)], - ["rpc", "status"], - exponential_buckets(0.001, 2.0, 12).unwrap(), + request_duration_seconds: histogram!( + parent: &*GRPC_REQUEST_DURATION_SECONDS, + "service" => subsystem, + "kind" => kind, ), } } @@ -118,7 +134,7 @@ impl Layer for GrpcMetricsLayer { } } -/// Response future for [`PrometheusMetrics`]. +/// Response future for [`GrpcMetrics`]. #[pin_project(PinnedDrop)] pub struct ResponseFuture { #[pin] @@ -126,24 +142,28 @@ pub struct ResponseFuture { start: Instant, rpc_name: &'static str, status: &'static str, - requests_total: IntCounterVec<2>, - requests_in_flight: IntGaugeVec<1>, - request_duration_seconds: HistogramVec<2>, + requests_total: Counter, + requests_in_flight: Gauge, + request_duration_seconds: Histogram, } #[pinned_drop] impl PinnedDrop for ResponseFuture { fn drop(self: Pin<&mut Self>) { let elapsed = self.start.elapsed().as_secs_f64(); - let label_values = [self.rpc_name, self.status]; - - self.requests_total.with_label_values(label_values).inc(); - self.request_duration_seconds - .with_label_values(label_values) - .observe(elapsed); - self.requests_in_flight - .with_label_values([self.rpc_name]) - .dec(); + counter!( + parent: &self.requests_total, + "rpc" => self.rpc_name, + "status" => self.status, + ) + .increment(1); + histogram!( + parent: &self.request_duration_seconds, + "rpc" => self.rpc_name, + "status" => self.status, + ) + .record(elapsed); + gauge!(parent: &self.requests_in_flight, "rpc" => self.rpc_name).decrement(1.0); } } @@ -162,6 +182,9 @@ where F: Future> #[cfg(test)] mod tests { + use metrics::with_local_recorder; + use metrics_util::debugging::{DebugValue, DebuggingRecorder}; + use super::*; struct HelloRequest; @@ -180,59 +203,67 @@ mod tests { } } - #[tokio::test] - async fn test_grpc_metrics() { - let layer = GrpcMetricsLayer::new("quickwit_test", "server"); - - let mut hello_service = - layer - .clone() - .layer(tower::service_fn(|request: HelloRequest| async move { - Ok::<_, ()>(request) - })); - let mut goodbye_service = - layer - .clone() - .layer(tower::service_fn(|request: GoodbyeRequest| async move { - Ok::<_, ()>(request) - })); - - hello_service.call(HelloRequest).await.unwrap(); - + #[test] + fn test_grpc_metrics() { + let recorder = DebuggingRecorder::new(); + let snapshotter = recorder.snapshotter(); + + with_local_recorder(&recorder, || { + futures::executor::block_on(async { + let layer = GrpcMetricsLayer::new("quickwit_test", "server"); + + let mut hello_service = + layer + .clone() + .layer(tower::service_fn(|request: HelloRequest| async move { + Ok::<_, ()>(request) + })); + let mut goodbye_service = + layer + .clone() + .layer(tower::service_fn(|request: GoodbyeRequest| async move { + Ok::<_, ()>(request) + })); + + hello_service.call(HelloRequest).await.unwrap(); + goodbye_service.call(GoodbyeRequest).await.unwrap(); + + let hello_future = hello_service.call(HelloRequest); + drop(hello_future); + }); + }); + + let snapshot = snapshotter.snapshot().into_vec(); + let counter_value = |rpc: &str, status: &str| { + snapshot.iter().find_map(|(composite_key, _, _, value)| { + let (_, key) = composite_key.clone().into_parts(); + let labels = key + .labels() + .map(|label| (label.key(), label.value())) + .collect::>(); + if key.name() == "quickwit_grpc_requests_total" + && labels.contains(&("service", "quickwit_test")) + && labels.contains(&("kind", "server")) + && labels.contains(&("rpc", rpc)) + && labels.contains(&("status", status)) + { + Some(value) + } else { + None + } + }) + }; assert_eq!( - layer - .requests_total - .with_label_values(["hello", "success"]) - .get(), - 1 + counter_value("hello", "success"), + Some(&DebugValue::Counter(1)) ); assert_eq!( - layer - .requests_total - .with_label_values(["goodbye", "success"]) - .get(), - 0 + counter_value("goodbye", "success"), + Some(&DebugValue::Counter(1)) ); - - goodbye_service.call(GoodbyeRequest).await.unwrap(); - - assert_eq!( - layer - .requests_total - .with_label_values(["goodbye", "success"]) - .get(), - 1 - ); - - let hello_future = hello_service.call(HelloRequest); - drop(hello_future); - assert_eq!( - layer - .requests_total - .with_label_values(["hello", "cancelled"]) - .get(), - 1 + counter_value("hello", "cancelled"), + Some(&DebugValue::Counter(1)) ); } } From 19e6a817d435450ede733354f23c5d044311f46b Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Thu, 30 Apr 2026 09:09:13 +0200 Subject: [PATCH 03/54] Migrate metrics call sites to metrics macros --- PLAN.md | 57 ++ quickwit/quickwit-actors/src/actor_context.rs | 6 +- quickwit/quickwit-actors/src/mailbox.rs | 54 +- quickwit/quickwit-actors/src/spawn_builder.rs | 9 +- quickwit/quickwit-cli/src/jemalloc.rs | 6 +- quickwit/quickwit-cli/src/lib.rs | 3 +- quickwit/quickwit-cli/src/metrics.rs | 22 +- quickwit/quickwit-cluster/src/grpc_gossip.rs | 2 +- quickwit/quickwit-cluster/src/lib.rs | 18 +- quickwit/quickwit-cluster/src/metrics.rs | 220 +++++--- .../quickwit-common/src/metrics/histogram.rs | 37 ++ quickwit/quickwit-common/src/metrics/mod.rs | 2 +- quickwit/quickwit-common/src/metrics/tests.rs | 51 ++ .../src/control_plane.rs | 8 +- .../src/indexing_scheduler/mod.rs | 8 +- .../src/ingest/ingest_controller.rs | 6 +- .../quickwit-control-plane/src/metrics.rs | 180 +++--- .../quickwit-control-plane/src/model/mod.rs | 2 +- .../src/model/shard_table.rs | 36 +- .../src/garbage_collection.rs | 14 +- .../src/actors/doc_processor.rs | 27 +- .../quickwit-indexing/src/actors/indexer.rs | 4 +- .../src/actors/indexing_pipeline.rs | 59 +- .../src/actors/merge_pipeline.rs | 30 +- .../src/actors/merge_scheduler_service.rs | 10 +- .../metrics_pipeline/parquet_uploader.rs | 10 +- .../src/actors/metrics_pipeline/pipeline.rs | 14 +- .../processed_parquet_batch.rs | 2 +- .../quickwit-indexing/src/actors/uploader.rs | 24 +- quickwit/quickwit-indexing/src/metrics.rs | 186 +++--- .../src/models/indexed_split.rs | 4 +- .../src/models/processed_doc.rs | 2 +- .../src/models/raw_doc_batch.rs | 2 +- .../src/source/kafka_source.rs | 4 +- quickwit/quickwit-indexing/src/source/mod.rs | 2 +- .../quickwit-ingest/src/ingest_api_service.rs | 14 +- .../src/ingest_v2/broadcast/local_shards.rs | 8 +- .../quickwit-ingest/src/ingest_v2/ingester.rs | 77 ++- .../quickwit-ingest/src/ingest_v2/metrics.rs | 263 +++++---- .../src/ingest_v2/replication.rs | 4 +- .../quickwit-ingest/src/ingest_v2/router.rs | 67 ++- quickwit/quickwit-ingest/src/lib.rs | 35 +- quickwit/quickwit-ingest/src/metrics.rs | 103 ++-- quickwit/quickwit-jaeger/src/lib.rs | 63 ++- quickwit/quickwit-jaeger/src/metrics.rs | 114 ++-- quickwit/quickwit-jaeger/src/v1.rs | 29 +- quickwit/quickwit-jaeger/src/v2.rs | 50 +- .../src/actors/delete_task_planner.rs | 13 +- .../src/actors/garbage_collector.rs | 86 +-- quickwit/quickwit-janitor/src/metrics.rs | 92 +-- .../quickwit-lambda-client/src/invoker.rs | 23 +- .../quickwit-lambda-client/src/metrics.rs | 76 +-- .../src/metastore/postgres/metrics.rs | 53 +- .../src/metastore/postgres/pool.rs | 4 +- .../quickwit-opentelemetry/src/otlp/logs.rs | 83 +-- .../src/otlp/metrics.rs | 132 +++-- .../src/otlp/otel_metrics.rs | 85 +-- .../quickwit-opentelemetry/src/otlp/traces.rs | 83 +-- .../src/index/accumulator.rs | 6 +- .../src/ingest/processor.rs | 30 +- .../src/ingest/sketch_processor.rs | 40 +- .../quickwit-parquet-engine/src/metrics.rs | 160 +++--- quickwit/quickwit-search/src/leaf.rs | 25 +- quickwit/quickwit-search/src/list_terms.rs | 4 +- quickwit/quickwit-search/src/metrics.rs | 435 +++++++++------ .../quickwit-search/src/metrics_trackers.rs | 58 +- .../quickwit-search/src/scroll_context.rs | 2 +- .../quickwit-search/src/search_job_placer.rs | 10 +- .../src/search_permit_provider.rs | 4 +- quickwit/quickwit-serve/src/decompression.rs | 2 +- quickwit/quickwit-serve/src/load_shield.rs | 22 +- quickwit/quickwit-serve/src/metrics.rs | 97 ++-- quickwit/quickwit-serve/src/rest.rs | 24 +- .../quickwit-storage/src/cache/base_cache.rs | 82 +-- .../src/cache/byte_range_cache.rs | 40 +- .../src/file_descriptor_cache.rs | 14 +- quickwit/quickwit-storage/src/metrics.rs | 528 +++++++++++------- .../src/object_storage/azure_blob_storage.rs | 14 +- .../src/object_storage/error.rs | 12 +- .../object_storage/s3_compatible_storage.rs | 20 +- .../src/opendal_storage/base.rs | 12 +- .../quickwit-storage/src/split_cache/mod.rs | 6 +- .../src/split_cache/split_table.rs | 12 +- .../src/timeout_and_retry_storage.rs | 6 +- 84 files changed, 2587 insertions(+), 1756 deletions(-) create mode 100644 PLAN.md diff --git a/PLAN.md b/PLAN.md new file mode 100644 index 00000000000..13a662df65f --- /dev/null +++ b/PLAN.md @@ -0,0 +1,57 @@ +# Metrics-RS Macro Migration Plan + +## Summary +Replace Quickwit’s current `prometheus` crate-based metrics with the metricspp API directly. The new implementation lives inside `quickwit-common` as `quickwit_common::metrics`, not as a new Cargo crate, and call sites are migrated to `counter!`, `gauge!`, and `histogram!`. + +Metric names stay compatible as `quickwit_{subsystem}_{name}` so existing dashboards and `/metrics` users keep working. + +## Design Authority +The proposed design in `~/go/src/github.com/DataDog/experimental/users/luca.cominardi/metricspp` is the reference architecture for this migration. Quickwit code should be updated to follow that design, including its API shape, macro behavior, metadata model, observable metric behavior, histogram bucket handling, and exporter setup. + +When existing Quickwit metrics code conflicts with the metricspp design, prefer adapting the existing Quickwit code to the metricspp design. Do not reshape the metricspp approach around Quickwit’s current `prometheus` crate patterns, constructors, or historical implementation details unless there is a concrete Quickwit constraint that makes the proposed design impossible to apply. + +## Key Changes +- Port the metricspp design from `~/go/src/github.com/DataDog/experimental/users/luca.cominardi/metricspp` into `quickwit-common/src/metrics/`: typed `Counter`, `Gauge`, `Histogram`, `GaugeGuard`, observable shadows, inventory metadata, histogram bucket registry, and `describe_metrics()`. +- Add dependencies needed by metricspp: `metrics-util`, `metrics-exporter-prometheus`, `metrics-exporter-otel`, `inventory`, `const_format`, and `atomic_float`. +- Replace existing `new_counter`, `new_gauge`, `new_histogram`, and `*Vec` declarations across Quickwit with static `LazyLock` metrics declared through metricspp macros. +- Replace metric operations with the new API: + - counters: `.increment(n)` or `.absolute(n)` + - gauges: `.increment(n)`, `.decrement(n)`, `.set(n)` + - histograms: `.record(value)` + - labeled metrics: `counter!(parent: BASE, "label" => value)`, etc. +- Use `observable: true` only where production code or tests currently read values through `.get()`. + +## Exporters +- Install one global metrics-rs recorder during CLI startup. +- Always include a Prometheus recorder and retain Quickwit’s existing `/metrics` route by rendering the stored `PrometheusHandle`. +- Configure Prometheus and OTLP histogram buckets from `metricspp::histogram_buckets()` before metrics are first used. +- Add OTLP metrics export behind `QW_ENABLE_OPENTELEMETRY_OTLP_EXPORTER=true`, using the existing OTLP protocol env behavior: + - `OTEL_EXPORTER_OTLP_METRICS_PROTOCOL` + - fallback to `OTEL_EXPORTER_OTLP_PROTOCOL` + - support `grpc`, `http/protobuf`, and `http/json` +- Fan out to Prometheus, OTLP, and the existing DogStatsD/invariant recorder path where applicable. + +## Migration Work +- For every migrated module, start from the metricspp design and ask how the existing Quickwit code should change to match it, not how metricspp should be adjusted to preserve the old Quickwit implementation. +- Convert metric modules in `quickwit-serve`, `quickwit-search`, `quickwit-indexing`, `quickwit-ingest`, `quickwit-storage`, `quickwit-opentelemetry`, `quickwit-jaeger`, `quickwit-cluster`, `quickwit-actors`, and `quickwit-common`. +- Remove direct `prometheus` usage from Quickwit-owned code, including tower/circuit-breaker helpers. +- Replace `register_info("build_info", ...)` with a metricspp-declared counter/info metric set to `1` with build labels. +- Update docs/comments mentioning Prometheus crate semantics to describe metrics-rs plus Prometheus rendering. + +## Test Plan +- Port metricspp tests for counters, gauges, histograms, parent labels, observable metrics, guards, and histogram bucket inventory. +- Add Quickwit integration tests for `/metrics` output: metric names, labels, descriptions, and histogram buckets. +- Add OTLP metrics tests with an in-memory OpenTelemetry exporter. +- Update tests that assert metric values to use `observable: true` declarations or `metrics_util::debugging::DebuggingRecorder`. +- Run: + - `cargo test -p quickwit-common metrics` + - `cargo test -p quickwit-serve metrics_api` + - `cargo test -p quickwit-cli logger` + - `cargo clippy --workspace --tests --all-features` + - `make fmt` + +## Assumptions +- `quickeit-metrics` means the Quickwit metrics module under `quickwit_common::metrics`. +- A broad call-site migration is acceptable; preserving old constructor/type APIs is explicitly out of scope. +- Existing Prometheus metric names must remain stable. +- The existing Quickwit `metrics` Cargo feature for metrics ingestion is unrelated and unchanged. diff --git a/quickwit/quickwit-actors/src/actor_context.rs b/quickwit/quickwit-actors/src/actor_context.rs index 3186e210647..079eb42e453 100644 --- a/quickwit/quickwit-actors/src/actor_context.rs +++ b/quickwit/quickwit-actors/src/actor_context.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use std::sync::atomic::{AtomicBool, Ordering}; use std::time::Duration; -use quickwit_common::metrics::IntCounter; +use quickwit_common::metrics::Counter; use quickwit_common::{KillSwitch, Progress, ProtectedZoneGuard}; use tokio::sync::{oneshot, watch}; use tracing::{debug, error}; @@ -61,7 +61,7 @@ pub struct ActorContextInner { self_mailbox: Mailbox, progress: Progress, actor_state: AtomicState, - backpressure_micros_counter_opt: Option, + backpressure_micros_counter_opt: Option, observable_state_tx: watch::Sender, // Boolean marking the presence of an observe message in the actor's high priority queue. observe_enqueued: AtomicBool, @@ -72,7 +72,7 @@ impl ActorContext { self_mailbox: Mailbox, spawn_ctx: SpawnContext, observable_state_tx: watch::Sender, - backpressure_micros_counter_opt: Option, + backpressure_micros_counter_opt: Option, ) -> Self { ActorContext { inner: ActorContextInner { diff --git a/quickwit/quickwit-actors/src/mailbox.rs b/quickwit/quickwit-actors/src/mailbox.rs index f222294e4c2..41eb67a4c8c 100644 --- a/quickwit/quickwit-actors/src/mailbox.rs +++ b/quickwit/quickwit-actors/src/mailbox.rs @@ -19,7 +19,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, LazyLock, Weak}; use std::time::Instant; -use quickwit_common::metrics::{GaugeGuard, IntCounter, IntGauge}; +use quickwit_common::metrics::{Counter, Gauge, GaugeGuard, gauge}; use tokio::sync::oneshot; use crate::channel_with_priority::{Receiver, Sender, TrySendError}; @@ -191,7 +191,7 @@ impl Mailbox { pub async fn send_message_with_backpressure_counter( &self, message: M, - backpressure_micros_counter_opt: Option<&IntCounter>, + backpressure_micros_counter_opt: Option<&Counter>, ) -> Result, SendError> where A: DeferableReplyHandler, @@ -205,7 +205,7 @@ impl Mailbox { let now = Instant::now(); self.inner.tx.send_low_priority(envelope).await?; let elapsed = now.elapsed(); - backpressure_micros_counter.inc_by(elapsed.as_micros() as u64); + backpressure_micros_counter.increment(elapsed.as_micros() as u64); } else { self.inner.tx.send_low_priority(envelope).await?; } @@ -273,7 +273,7 @@ impl Mailbox { pub async fn ask_with_backpressure_counter( &self, message: M, - backpressure_micros_counter_opt: Option<&IntCounter>, + backpressure_micros_counter_opt: Option<&Counter>, ) -> Result> where A: DeferableReplyHandler, @@ -308,9 +308,17 @@ impl Mailbox { struct InboxInner { rx: Receiver>, - _inboxes_count_gauge_guard: GaugeGuard<'static>, + _inboxes_count_gauge_guard: GaugeGuard, } +static INBOX_GAUGE: LazyLock = LazyLock::new(|| { + gauge!( + name: "inboxes_count", + description: "overall count of actors", + subsystem: "actor", + ) +}); + pub struct Inbox { inner: Arc>, } @@ -385,15 +393,7 @@ impl Inbox { } } -fn get_actor_inboxes_count_gauge_guard() -> GaugeGuard<'static> { - static INBOX_GAUGE: LazyLock = LazyLock::new(|| { - quickwit_common::metrics::new_gauge( - "inboxes_count", - "overall count of actors", - "actor", - &[], - ) - }); +fn get_actor_inboxes_count_gauge_guard() -> GaugeGuard { let mut gauge_guard = GaugeGuard::from_gauge(&INBOX_GAUGE); gauge_guard.add(1); gauge_guard @@ -452,6 +452,8 @@ mod tests { use std::mem; use std::time::Duration; + use quickwit_common::metrics::counter; + use super::*; use crate::tests::{Ping, PingReceiverActor}; use crate::{ActorContext, ActorExitStatus, Handler, Universe}; @@ -519,8 +521,12 @@ mod tests { .await .unwrap(); // At this point the actor was started and even processed a message entirely. - let backpressure_micros_counter = - IntCounter::new("test_counter", "help for test_counter").unwrap(); + let backpressure_micros_counter = counter!( + name: "test_counter_low_backpressure", + description: "help for test_counter", + subsystem: "actor", + observable: true, + ); let wait_duration = Duration::from_millis(1); let processed = mailbox .send_message_with_backpressure_counter( @@ -546,8 +552,12 @@ mod tests { .ask_with_backpressure_counter(Duration::default(), None) .await .unwrap(); - let backpressure_micros_counter = - IntCounter::new("test_counter", "help for test_counter").unwrap(); + let backpressure_micros_counter = counter!( + name: "test_counter_backpressure", + description: "help for test_counter", + subsystem: "actor", + observable: true, + ); let wait_duration = Duration::from_millis(1); mailbox .send_message_with_backpressure_counter( @@ -578,8 +588,12 @@ mod tests { .ask_with_backpressure_counter(Duration::default(), None) .await .unwrap(); - let backpressure_micros_counter = - IntCounter::new("test_counter", "help for test_counter").unwrap(); + let backpressure_micros_counter = counter!( + name: "test_counter_no_waiting_backpressure", + description: "help for test_counter", + subsystem: "actor", + observable: true, + ); let start = Instant::now(); mailbox .ask_with_backpressure_counter(Duration::from_millis(1), None) diff --git a/quickwit/quickwit-actors/src/spawn_builder.rs b/quickwit/quickwit-actors/src/spawn_builder.rs index 6dfc1aa9155..f8359c3f32c 100644 --- a/quickwit/quickwit-actors/src/spawn_builder.rs +++ b/quickwit/quickwit-actors/src/spawn_builder.rs @@ -16,7 +16,7 @@ use std::fmt; use std::time::Duration; use anyhow::Context; -use quickwit_common::metrics::IntCounter; +use quickwit_common::metrics::Counter; use sync_wrapper::SyncWrapper; use tokio::sync::watch; use tracing::{debug, error, info}; @@ -91,7 +91,7 @@ pub struct SpawnBuilder { spawn_ctx: SpawnContext, #[allow(clippy::type_complexity)] mailboxes: Option<(Mailbox, Inbox)>, - backpressure_micros_counter_opt: Option, + backpressure_micros_counter_opt: Option, } impl SpawnBuilder { @@ -129,10 +129,7 @@ impl SpawnBuilder { /// /// When using `.ask` the amount of time counted may be misleading. /// (See `Mailbox::ask_with_backpressure_counter` for more details) - pub fn set_backpressure_micros_counter( - mut self, - backpressure_micros_counter: IntCounter, - ) -> Self { + pub fn set_backpressure_micros_counter(mut self, backpressure_micros_counter: Counter) -> Self { self.backpressure_micros_counter_opt = Some(backpressure_micros_counter); self } diff --git a/quickwit/quickwit-cli/src/jemalloc.rs b/quickwit/quickwit-cli/src/jemalloc.rs index f22caff5a37..ab7f5772e61 100644 --- a/quickwit/quickwit-cli/src/jemalloc.rs +++ b/quickwit/quickwit-cli/src/jemalloc.rs @@ -48,13 +48,13 @@ pub async fn jemalloc_metrics_loop() -> tikv_jemalloc_ctl::Result<()> { // Read statistics using MIB keys: let active = active_mib.read()?; - memory_metrics.active_bytes.set(active as i64); + memory_metrics.active_bytes.set(active as f64); let allocated = allocated_mib.read()?; - memory_metrics.allocated_bytes.set(allocated as i64); + memory_metrics.allocated_bytes.set(allocated as f64); let resident = resident_mib.read()?; - memory_metrics.resident_bytes.set(resident as i64); + memory_metrics.resident_bytes.set(resident as f64); } } diff --git a/quickwit/quickwit-cli/src/lib.rs b/quickwit/quickwit-cli/src/lib.rs index 45275c3ff5d..174b385145b 100644 --- a/quickwit/quickwit-cli/src/lib.rs +++ b/quickwit/quickwit-cli/src/lib.rs @@ -395,8 +395,7 @@ pub mod busy_detector { let delta = now - time.load(Ordering::Relaxed); CLI_METRICS .thread_unpark_duration_microseconds - .with_label_values([]) - .observe(delta as f64); + .record(delta as f64); if delta > ALLOWED_DELAY_MICROS { emit_debug(delta, now); } diff --git a/quickwit/quickwit-cli/src/metrics.rs b/quickwit/quickwit-cli/src/metrics.rs index c51d010c9ea..fe1323fa886 100644 --- a/quickwit/quickwit-cli/src/metrics.rs +++ b/quickwit/quickwit-cli/src/metrics.rs @@ -14,23 +14,25 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{HistogramVec, new_histogram_vec}; +use quickwit_common::metrics::{Histogram, exponential_buckets, histogram}; pub struct CliMetrics { - pub thread_unpark_duration_microseconds: HistogramVec<0>, + pub thread_unpark_duration_microseconds: Histogram, } +static THREAD_UNPARK_DURATION_MICROSECONDS: LazyLock = LazyLock::new(|| { + histogram!( + name: "thread_unpark_duration_microseconds", + description: "Duration for which a thread of the main tokio runtime is unparked.", + subsystem: "cli", + buckets: exponential_buckets(5.0, 5.0, 5).unwrap(), + ) +}); + impl Default for CliMetrics { fn default() -> Self { CliMetrics { - thread_unpark_duration_microseconds: new_histogram_vec( - "thread_unpark_duration_microseconds", - "Duration for which a thread of the main tokio runtime is unparked.", - "cli", - &[], - [], - quickwit_common::metrics::exponential_buckets(5.0, 5.0, 5).unwrap(), - ), + thread_unpark_duration_microseconds: THREAD_UNPARK_DURATION_MICROSECONDS.clone(), } } } diff --git a/quickwit/quickwit-cluster/src/grpc_gossip.rs b/quickwit/quickwit-cluster/src/grpc_gossip.rs index 0ebf9f662d2..1cc6260e78e 100644 --- a/quickwit/quickwit-cluster/src/grpc_gossip.rs +++ b/quickwit/quickwit-cluster/src/grpc_gossip.rs @@ -108,7 +108,7 @@ async fn perform_grpc_gossip_rounds( warn!("failed to fetch cluster state from node `{node_id}`"); continue; }; - CLUSTER_METRICS.grpc_gossip_rounds_total.inc(); + CLUSTER_METRICS.grpc_gossip_rounds_total.increment(1); let mut chitchat_guard = chitchat.lock().await; diff --git a/quickwit/quickwit-cluster/src/lib.rs b/quickwit/quickwit-cluster/src/lib.rs index 0f2dbebf749..af717fd3dfe 100644 --- a/quickwit/quickwit-cluster/src/lib.rs +++ b/quickwit/quickwit-cluster/src/lib.rs @@ -31,7 +31,7 @@ use chitchat::transport::{Socket, Transport, UdpSocket}; use chitchat::{ChitchatMessage, Serializable}; pub use chitchat::{FailureDetectorConfig, KeyChangeEvent, ListenerHandle}; pub use grpc_service::cluster_grpc_server; -use quickwit_common::metrics::IntCounter; +use quickwit_common::metrics::Counter; use quickwit_common::tower::ClientGrpcConfig; use quickwit_config::service::QuickwitService; use quickwit_config::{GrpcConfig, NodeConfig, TlsConfig}; @@ -74,10 +74,10 @@ struct CountingUdpTransport; struct CountingUdpSocket { socket: UdpSocket, - gossip_recv: IntCounter, - gossip_recv_bytes: IntCounter, - gossip_send: IntCounter, - gossip_send_bytes: IntCounter, + gossip_recv: Counter, + gossip_recv_bytes: Counter, + gossip_send: Counter, + gossip_send_bytes: Counter, } #[async_trait] @@ -85,16 +85,16 @@ impl Socket for CountingUdpSocket { async fn send(&mut self, to: SocketAddr, msg: ChitchatMessage) -> anyhow::Result<()> { let msg_len = msg.serialized_len() as u64; self.socket.send(to, msg).await?; - self.gossip_send.inc(); - self.gossip_send_bytes.inc_by(msg_len); + self.gossip_send.increment(1); + self.gossip_send_bytes.increment(msg_len); Ok(()) } async fn recv(&mut self) -> anyhow::Result<(SocketAddr, ChitchatMessage)> { let (socket_addr, msg) = self.socket.recv().await?; - self.gossip_recv.inc(); + self.gossip_recv.increment(1); let msg_len = msg.serialized_len() as u64; - self.gossip_recv_bytes.inc_by(msg_len); + self.gossip_recv_bytes.increment(msg_len); Ok((socket_addr, msg)) } } diff --git a/quickwit/quickwit-cluster/src/metrics.rs b/quickwit/quickwit-cluster/src/metrics.rs index a5ac5d4a9ef..f3a192eace8 100644 --- a/quickwit/quickwit-cluster/src/metrics.rs +++ b/quickwit/quickwit-cluster/src/metrics.rs @@ -18,101 +18,137 @@ use std::sync::{LazyLock, Weak}; use std::time::Duration; use chitchat::{Chitchat, ChitchatId}; -use quickwit_common::metrics::{IntCounter, IntGauge, new_counter, new_gauge}; +use quickwit_common::metrics::{Counter, Gauge, counter, gauge}; use tokio::sync::Mutex; use crate::member::NodeStateExt; pub struct ClusterMetrics { - pub live_nodes: IntGauge, - pub ready_nodes: IntGauge, - pub zombie_nodes: IntGauge, - pub dead_nodes: IntGauge, - pub cluster_state_size_bytes: IntGauge, - pub node_state_size_bytes: IntGauge, - pub node_state_keys: IntGauge, - pub gossip_recv_messages_total: IntCounter, - pub gossip_recv_bytes_total: IntCounter, - pub gossip_sent_messages_total: IntCounter, - pub gossip_sent_bytes_total: IntCounter, - pub grpc_gossip_rounds_total: IntCounter, + pub live_nodes: Gauge, + pub ready_nodes: Gauge, + pub zombie_nodes: Gauge, + pub dead_nodes: Gauge, + pub cluster_state_size_bytes: Gauge, + pub node_state_size_bytes: Gauge, + pub node_state_keys: Gauge, + pub gossip_recv_messages_total: Counter, + pub gossip_recv_bytes_total: Counter, + pub gossip_sent_messages_total: Counter, + pub gossip_sent_bytes_total: Counter, + pub grpc_gossip_rounds_total: Counter, } +static LIVE_NODES: LazyLock = LazyLock::new(|| { + gauge!( + name: "live_nodes", + description: "The number of live nodes observed locally.", + subsystem: "cluster", + ) +}); + +static READY_NODES: LazyLock = LazyLock::new(|| { + gauge!( + name: "ready_nodes", + description: "The number of ready nodes observed locally.", + subsystem: "cluster", + ) +}); + +static ZOMBIE_NODES: LazyLock = LazyLock::new(|| { + gauge!( + name: "zombie_nodes", + description: "The number of zombie nodes observed locally.", + subsystem: "cluster", + ) +}); + +static DEAD_NODES: LazyLock = LazyLock::new(|| { + gauge!( + name: "dead_nodes", + description: "The number of dead nodes observed locally.", + subsystem: "cluster", + ) +}); + +static CLUSTER_STATE_SIZE_BYTES: LazyLock = LazyLock::new(|| { + gauge!( + name: "cluster_state_size_bytes", + description: "The size of the cluster state in bytes.", + subsystem: "cluster", + ) +}); + +static NODE_STATE_KEYS: LazyLock = LazyLock::new(|| { + gauge!( + name: "node_state_keys", + description: "The number of keys in the node state.", + subsystem: "cluster", + ) +}); + +static NODE_STATE_SIZE_BYTES: LazyLock = LazyLock::new(|| { + gauge!( + name: "node_state_size_bytes", + description: "The size of the node state in bytes.", + subsystem: "cluster", + ) +}); + +static GOSSIP_RECV_MESSAGES_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "gossip_recv_messages_total", + description: "Total number of gossip messages received.", + subsystem: "cluster", + ) +}); + +static GOSSIP_RECV_BYTES_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "gossip_recv_bytes_total", + description: "Total amount of gossip data received in bytes.", + subsystem: "cluster", + ) +}); + +static GOSSIP_SENT_MESSAGES_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "gossip_sent_messages_total", + description: "Total number of gossip messages sent.", + subsystem: "cluster", + ) +}); + +static GOSSIP_SENT_BYTES_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "gossip_sent_bytes_total", + description: "Total amount of gossip data sent in bytes.", + subsystem: "cluster", + ) +}); + +static GRPC_GOSSIP_ROUNDS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "grpc_gossip_rounds_total", + description: "Total number of gRPC gossip rounds performed with peer nodes.", + subsystem: "cluster", + ) +}); + impl Default for ClusterMetrics { fn default() -> Self { ClusterMetrics { - live_nodes: new_gauge( - "live_nodes", - "The number of live nodes observed locally.", - "cluster", - &[], - ), - ready_nodes: new_gauge( - "ready_nodes", - "The number of ready nodes observed locally.", - "cluster", - &[], - ), - zombie_nodes: new_gauge( - "zombie_nodes", - "The number of zombie nodes observed locally.", - "cluster", - &[], - ), - dead_nodes: new_gauge( - "dead_nodes", - "The number of dead nodes observed locally.", - "cluster", - &[], - ), - cluster_state_size_bytes: new_gauge( - "cluster_state_size_bytes", - "The size of the cluster state in bytes.", - "cluster", - &[], - ), - node_state_keys: new_gauge( - "node_state_keys", - "The number of keys in the node state.", - "cluster", - &[], - ), - node_state_size_bytes: new_gauge( - "node_state_size_bytes", - "The size of the node state in bytes.", - "cluster", - &[], - ), - gossip_recv_messages_total: new_counter( - "gossip_recv_messages_total", - "Total number of gossip messages received.", - "cluster", - &[], - ), - gossip_recv_bytes_total: new_counter( - "gossip_recv_bytes_total", - "Total amount of gossip data received in bytes.", - "cluster", - &[], - ), - gossip_sent_messages_total: new_counter( - "gossip_sent_messages_total", - "Total number of gossip messages sent.", - "cluster", - &[], - ), - gossip_sent_bytes_total: new_counter( - "gossip_sent_bytes_total", - "Total amount of gossip data sent in bytes.", - "cluster", - &[], - ), - grpc_gossip_rounds_total: new_counter( - "grpc_gossip_rounds_total", - "Total number of gRPC gossip rounds performed with peer nodes.", - "cluster", - &[], - ), + live_nodes: LIVE_NODES.clone(), + ready_nodes: READY_NODES.clone(), + zombie_nodes: ZOMBIE_NODES.clone(), + dead_nodes: DEAD_NODES.clone(), + cluster_state_size_bytes: CLUSTER_STATE_SIZE_BYTES.clone(), + node_state_keys: NODE_STATE_KEYS.clone(), + node_state_size_bytes: NODE_STATE_SIZE_BYTES.clone(), + gossip_recv_messages_total: GOSSIP_RECV_MESSAGES_TOTAL.clone(), + gossip_recv_bytes_total: GOSSIP_RECV_BYTES_TOTAL.clone(), + gossip_sent_messages_total: GOSSIP_SENT_MESSAGES_TOTAL.clone(), + gossip_sent_bytes_total: GOSSIP_SENT_BYTES_TOTAL.clone(), + grpc_gossip_rounds_total: GRPC_GOSSIP_ROUNDS_TOTAL.clone(), } } } @@ -157,22 +193,22 @@ pub(crate) fn spawn_metrics_task( if *chitchat_id == self_chitchat_id { CLUSTER_METRICS .node_state_keys - .set(node_state.num_key_values() as i64); + .set(node_state.num_key_values() as f64); CLUSTER_METRICS .node_state_size_bytes - .set(node_state_size_bytes as i64); + .set(node_state_size_bytes as f64); } } drop(chitchat_guard); - CLUSTER_METRICS.live_nodes.set(num_live_nodes as i64); - CLUSTER_METRICS.ready_nodes.set(num_ready_nodes as i64); - CLUSTER_METRICS.zombie_nodes.set(num_zombie_nodes as i64); - CLUSTER_METRICS.dead_nodes.set(num_dead_nodes as i64); + CLUSTER_METRICS.live_nodes.set(num_live_nodes as f64); + CLUSTER_METRICS.ready_nodes.set(num_ready_nodes as f64); + CLUSTER_METRICS.zombie_nodes.set(num_zombie_nodes as f64); + CLUSTER_METRICS.dead_nodes.set(num_dead_nodes as f64); CLUSTER_METRICS .cluster_state_size_bytes - .set(cluster_state_size_bytes as i64); + .set(cluster_state_size_bytes as f64); } }; tokio::spawn(future); diff --git a/quickwit/quickwit-common/src/metrics/histogram.rs b/quickwit/quickwit-common/src/metrics/histogram.rs index 65d9ac9b30e..2b37acf1c8f 100644 --- a/quickwit/quickwit-common/src/metrics/histogram.rs +++ b/quickwit/quickwit-common/src/metrics/histogram.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::time::Instant; + use metrics::HistogramFn; use super::MetricInfo; @@ -60,6 +62,10 @@ impl Histogram { pub fn record(&self, value: f64) { self.inner.record(value); } + + pub fn start_timer(&self) -> HistogramTimer { + HistogramTimer::__new(self.clone()) + } } impl HistogramFn for Histogram { @@ -68,6 +74,37 @@ impl HistogramFn for Histogram { } } +#[derive(Debug)] +pub struct HistogramTimer { + histogram: Histogram, + start: Instant, + observed: bool, +} + +impl HistogramTimer { + fn __new(histogram: Histogram) -> Self { + Self { + histogram, + start: Instant::now(), + observed: false, + } + } + + pub fn observe_duration(self) { + let mut timer = self; + timer.observed = true; + timer.histogram.record(timer.start.elapsed().as_secs_f64()); + } +} + +impl Drop for HistogramTimer { + fn drop(&mut self) { + if !self.observed { + self.histogram.record(self.start.elapsed().as_secs_f64()); + } + } +} + #[macro_export] macro_rules! histogram { ( diff --git a/quickwit/quickwit-common/src/metrics/mod.rs b/quickwit/quickwit-common/src/metrics/mod.rs index b74c856afee..18c9db29bb9 100644 --- a/quickwit/quickwit-common/src/metrics/mod.rs +++ b/quickwit/quickwit-common/src/metrics/mod.rs @@ -35,7 +35,7 @@ pub use counter::CounterShadow; #[doc(hidden)] pub use gauge::GaugeShadow; pub use gauge::{Gauge, GaugeGuard}; -pub use histogram::{Histogram, HistogramConfig}; +pub use histogram::{Histogram, HistogramConfig, HistogramTimer}; pub use quickwit::{ InFlightDataGauges, MEMORY_METRICS, MemoryMetrics, index_label, metrics_text_payload, register_info, diff --git a/quickwit/quickwit-common/src/metrics/tests.rs b/quickwit/quickwit-common/src/metrics/tests.rs index a3f96102028..9ec338f76c0 100644 --- a/quickwit/quickwit-common/src/metrics/tests.rs +++ b/quickwit/quickwit-common/src/metrics/tests.rs @@ -115,6 +115,57 @@ fn histogram_records_value() { } } +#[test] +fn histogram_timer_records_value_on_drop() { + let entries = with_recorder(|| { + let histogram = histogram!( + name: "test_histogram_timer_drop", + description: "test histogram timer drop", + subsystem: "metrics_tests", + buckets: vec![1.0, 5.0, 10.0], + ); + let _timer = histogram.start_timer(); + }); + + let (name, labels, value) = &entries[0]; + assert_eq!(name, "quickwit_metrics_tests_test_histogram_timer_drop"); + assert!(labels.is_empty()); + match value { + DebugValue::Histogram(values) => { + assert_eq!(values.len(), 1); + assert!(values[0].into_inner() >= 0.0); + } + other => panic!("expected histogram, got {other:?}"), + } +} + +#[test] +fn histogram_timer_observe_duration_records_once() { + let entries = with_recorder(|| { + let histogram = histogram!( + name: "test_histogram_timer_observe_duration", + description: "test histogram timer observe duration", + subsystem: "metrics_tests", + buckets: vec![1.0, 5.0, 10.0], + ); + histogram.start_timer().observe_duration(); + }); + + let (name, labels, value) = &entries[0]; + assert_eq!( + name, + "quickwit_metrics_tests_test_histogram_timer_observe_duration" + ); + assert!(labels.is_empty()); + match value { + DebugValue::Histogram(values) => { + assert_eq!(values.len(), 1); + assert!(values[0].into_inner() >= 0.0); + } + other => panic!("expected histogram, got {other:?}"), + } +} + #[test] fn empty_subsystem_omits_double_underscore() { let entries = with_recorder(|| { diff --git a/quickwit/quickwit-control-plane/src/control_plane.rs b/quickwit/quickwit-control-plane/src/control_plane.rs index 1056aba6eb8..4df822181a1 100644 --- a/quickwit/quickwit-control-plane/src/control_plane.rs +++ b/quickwit/quickwit-control-plane/src/control_plane.rs @@ -219,7 +219,9 @@ impl Actor for ControlPlane { } async fn initialize(&mut self, ctx: &ActorContext) -> Result<(), ActorExitStatus> { - crate::metrics::CONTROL_PLANE_METRICS.restart_total.inc(); + crate::metrics::CONTROL_PLANE_METRICS + .restart_total + .increment(1); self.model .load_from_metastore(&mut self.metastore, ctx.progress()) @@ -570,7 +572,7 @@ fn convert_metastore_error( } crate::metrics::CONTROL_PLANE_METRICS .metastore_error_aborted - .inc(); + .increment(1); Ok(Err(ControlPlaneError::Metastore(metastore_error))) } else { // If the metastore transaction may have been executed, we need to restart the control plane @@ -578,7 +580,7 @@ fn convert_metastore_error( error!(error=?metastore_error, transaction_outcome="maybe-executed", "metastore error"); crate::metrics::CONTROL_PLANE_METRICS .metastore_error_maybe_executed - .inc(); + .increment(1); Err(ActorExitStatus::from(anyhow::anyhow!(metastore_error))) } } diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs index 300f6a9d151..e260e5444a3 100644 --- a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs +++ b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs @@ -295,7 +295,9 @@ impl IndexingScheduler { // Prefer not calling this method directly, and instead call // `ControlPlane::rebuild_indexing_plan_debounced`. pub(crate) fn rebuild_plan(&mut self, model: &ControlPlaneModel) { - crate::metrics::CONTROL_PLANE_METRICS.schedule_total.inc(); + crate::metrics::CONTROL_PLANE_METRICS + .schedule_total + .increment(1); let notify_on_drop = self.next_rebuild_tracker.start_rebuild(); @@ -397,7 +399,9 @@ impl IndexingScheduler { notify_on_drop: Option>, ) { debug!(new_physical_plan=?new_physical_plan, "apply physical indexing plan"); - crate::metrics::CONTROL_PLANE_METRICS.apply_plan_total.inc(); + crate::metrics::CONTROL_PLANE_METRICS + .apply_plan_total + .increment(1); for (node_id, indexing_tasks) in new_physical_plan.indexing_tasks_per_indexer() { // We don't want to block on a slow indexer so we apply this change asynchronously // TODO not blocking is cool, but we need to make sure there is not accumulation diff --git a/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs b/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs index 6a64c183361..47f16bad3aa 100644 --- a/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs +++ b/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs @@ -1026,7 +1026,7 @@ impl IngestController { crate::metrics::CONTROL_PLANE_METRICS .rebalance_shards - .set(shards_to_rebalance.len() as i64); + .set(shards_to_rebalance.len() as f64); if shards_to_rebalance.is_empty() { debug!("skipping rebalance: no shards to rebalance"); @@ -1051,14 +1051,14 @@ impl IngestController { error!(%error, "failed to open shards during rebalance"); crate::metrics::CONTROL_PLANE_METRICS .rebalance_shards - .set(0); + .set(0.0); })?; let num_opened_shards: usize = per_source_num_opened_shards.values().sum(); crate::metrics::CONTROL_PLANE_METRICS .rebalance_shards - .set(num_opened_shards as i64); + .set(num_opened_shards as f64); for source_uid in per_source_num_opened_shards.keys() { // We temporarily disable the ability the scale down the number of shards for diff --git a/quickwit/quickwit-control-plane/src/metrics.rs b/quickwit/quickwit-control-plane/src/metrics.rs index 7935f18a1e8..90ed921785e 100644 --- a/quickwit/quickwit-control-plane/src/metrics.rs +++ b/quickwit/quickwit-control-plane/src/metrics.rs @@ -14,9 +14,7 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{ - IntCounter, IntGauge, IntGaugeVec, new_counter, new_gauge, new_gauge_vec, -}; +use quickwit_common::metrics::{Counter, Gauge, counter, gauge}; #[derive(Debug, Clone, Copy)] pub struct ShardLocalityMetrics { @@ -26,107 +24,123 @@ pub struct ShardLocalityMetrics { pub struct ControlPlaneMetrics { // Indexes and shards tracked by the control plane. - pub indexes_total: IntGauge, - pub open_shards: IntGaugeVec<1>, - pub closed_shards: IntGaugeVec<1>, + pub indexes_total: Gauge, + pub open_shards: Gauge, + pub closed_shards: Gauge, // Operations performed by the control plane. - pub apply_plan_total: IntCounter, - pub rebalance_shards: IntGauge, - pub restart_total: IntCounter, - pub schedule_total: IntCounter, + pub apply_plan_total: Counter, + pub rebalance_shards: Gauge, + pub restart_total: Counter, + pub schedule_total: Counter, // Metastore errors. - pub metastore_error_aborted: IntCounter, - pub metastore_error_maybe_executed: IntCounter, + pub metastore_error_aborted: Counter, + pub metastore_error_maybe_executed: Counter, // Indexing plan metrics. - pub local_shards: IntGauge, - pub remote_shards: IntGauge, + pub local_shards: Gauge, + pub remote_shards: Gauge, } impl ControlPlaneMetrics { pub fn set_shard_locality_metrics(&self, shard_locality_metrics: ShardLocalityMetrics) { self.local_shards - .set(shard_locality_metrics.num_local_shards as i64); + .set(shard_locality_metrics.num_local_shards as f64); self.remote_shards - .set(shard_locality_metrics.num_remote_shards as i64); + .set(shard_locality_metrics.num_remote_shards as f64); } } +static INDEXES_TOTAL: LazyLock = LazyLock::new(|| { + gauge!( + name: "indexes_total", + description: "Number of indexes tracked by the control plane.", + subsystem: "control_plane", + ) +}); + +static SHARDS: LazyLock = LazyLock::new(|| { + gauge!( + name: "shards", + description: "Number of open and closed shards tracked by the ingest controller", + subsystem: "control_plane", + ) +}); + +static INDEXED_SHARDS: LazyLock = LazyLock::new(|| { + gauge!( + name: "indexed_shards", + description: "Number of (remote/local) shards in the indexing plan", + subsystem: "control_plane", + ) +}); + +static APPLY_PLAN_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "apply_plan_total", + description: "Number of control plane `apply plan` operations.", + subsystem: "control_plane", + ) +}); + +static REBALANCE_SHARDS: LazyLock = LazyLock::new(|| { + gauge!( + name: "rebalance_shards", + description: "Number of shards rebalanced by the control plane.", + subsystem: "control_plane", + ) +}); + +static RESTART_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "restart_total", + description: "Number of control plane restarts.", + subsystem: "control_plane", + ) +}); + +static SCHEDULE_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "schedule_total", + description: "Number of control plane `schedule` operations.", + subsystem: "control_plane", + ) +}); + +static METASTORE_ERROR_ABORTED: LazyLock = LazyLock::new(|| { + counter!( + name: "metastore_error_aborted", + description: "Number of aborted metastore transaction (= do not trigger a control plane restart)", + subsystem: "control_plane", + ) +}); + +static METASTORE_ERROR_MAYBE_EXECUTED: LazyLock = LazyLock::new(|| { + counter!( + name: "metastore_error_maybe_executed", + description: "Number of metastore transaction with an uncertain outcome (= do trigger a control plane restart)", + subsystem: "control_plane", + ) +}); + impl Default for ControlPlaneMetrics { fn default() -> Self { - let open_shards = new_gauge_vec( - "shards", - "Number of open and closed shards tracked by the ingest controller", - "control_plane", - &[("state", "open")], - ["index_id"], - ); - let closed_shards = new_gauge_vec( - "shards", - "Number of open and closed shards tracked by the ingest controller", - "control_plane", - &[("state", "closed")], - ["index_id"], - ); - let indexed_shards = new_gauge_vec( - "indexed_shards", - "Number of (remote/local) shards in the indexing plan", - "control_plane", - &[], - ["locality"], - ); - let local_shards = indexed_shards.with_label_values(["local"]); - let remote_shards = indexed_shards.with_label_values(["remote"]); + let open_shards = gauge!(parent: &*SHARDS, "state" => "open"); + let closed_shards = gauge!(parent: &*SHARDS, "state" => "closed"); + let local_shards = gauge!(parent: &*INDEXED_SHARDS, "locality" => "local"); + let remote_shards = gauge!(parent: &*INDEXED_SHARDS, "locality" => "remote"); ControlPlaneMetrics { - indexes_total: new_gauge( - "indexes_total", - "Number of indexes tracked by the control plane.", - "control_plane", - &[], - ), + indexes_total: INDEXES_TOTAL.clone(), open_shards, closed_shards, - apply_plan_total: new_counter( - "apply_plan_total", - "Number of control plane `apply plan` operations.", - "control_plane", - &[], - ), - rebalance_shards: new_gauge( - "rebalance_shards", - "Number of shards rebalanced by the control plane.", - "control_plane", - &[], - ), - restart_total: new_counter( - "restart_total", - "Number of control plane restarts.", - "control_plane", - &[], - ), - schedule_total: new_counter( - "schedule_total", - "Number of control plane `schedule` operations.", - "control_plane", - &[], - ), - metastore_error_aborted: new_counter( - "metastore_error_aborted", - "Number of aborted metastore transaction (= do not trigger a control plane \ - restart)", - "control_plane", - &[], - ), - metastore_error_maybe_executed: new_counter( - "metastore_error_maybe_executed", - "Number of metastore transaction with an uncertain outcome (= do trigger a \ - control plane restart)", - "control_plane", - &[], - ), + apply_plan_total: APPLY_PLAN_TOTAL.clone(), + rebalance_shards: REBALANCE_SHARDS.clone(), + restart_total: RESTART_TOTAL.clone(), + schedule_total: SCHEDULE_TOTAL.clone(), + metastore_error_aborted: METASTORE_ERROR_ABORTED.clone(), + metastore_error_maybe_executed: METASTORE_ERROR_MAYBE_EXECUTED.clone(), local_shards, remote_shards, } diff --git a/quickwit/quickwit-control-plane/src/model/mod.rs b/quickwit/quickwit-control-plane/src/model/mod.rs index 0d0431a67ce..1a1007ffe89 100644 --- a/quickwit/quickwit-control-plane/src/model/mod.rs +++ b/quickwit/quickwit-control-plane/src/model/mod.rs @@ -169,7 +169,7 @@ impl ControlPlaneModel { fn update_metrics(&self) { crate::metrics::CONTROL_PLANE_METRICS .indexes_total - .set(self.index_table.len() as i64); + .set(self.index_table.len() as f64); } pub(crate) fn source_configs(&self) -> impl Iterator + '_ { diff --git a/quickwit/quickwit-control-plane/src/model/shard_table.rs b/quickwit/quickwit-control-plane/src/model/shard_table.rs index 623ae3e6224..09a5d42e591 100644 --- a/quickwit/quickwit-control-plane/src/model/shard_table.rs +++ b/quickwit/quickwit-control-plane/src/model/shard_table.rs @@ -461,14 +461,16 @@ impl ShardTable { // can update the metrics for this specific index. if index_label == index_id { let shard_stats = table_entry.shards_stats(); - crate::metrics::CONTROL_PLANE_METRICS - .open_shards - .with_label_values([index_label]) - .set(shard_stats.num_open_shards as i64); - crate::metrics::CONTROL_PLANE_METRICS - .closed_shards - .with_label_values([index_label]) - .set(shard_stats.num_closed_shards as i64); + quickwit_common::metrics::gauge!( + parent: &crate::metrics::CONTROL_PLANE_METRICS.open_shards, + "index_id" => index_label.to_string(), + ) + .set(shard_stats.num_open_shards as f64); + quickwit_common::metrics::gauge!( + parent: &crate::metrics::CONTROL_PLANE_METRICS.closed_shards, + "index_id" => index_label.to_string(), + ) + .set(shard_stats.num_closed_shards as f64); return; } // Per-index metrics are disabled, so we update the metrics for all sources. @@ -482,14 +484,16 @@ impl ShardTable { num_closed_shards += 1; } } - crate::metrics::CONTROL_PLANE_METRICS - .open_shards - .with_label_values([index_label]) - .set(num_open_shards as i64); - crate::metrics::CONTROL_PLANE_METRICS - .closed_shards - .with_label_values([index_label]) - .set(num_closed_shards as i64); + quickwit_common::metrics::gauge!( + parent: &crate::metrics::CONTROL_PLANE_METRICS.open_shards, + "index_id" => index_label.to_string(), + ) + .set(num_open_shards as f64); + quickwit_common::metrics::gauge!( + parent: &crate::metrics::CONTROL_PLANE_METRICS.closed_shards, + "index_id" => index_label.to_string(), + ) + .set(num_closed_shards as f64); } pub fn update_shards( diff --git a/quickwit/quickwit-index-management/src/garbage_collection.rs b/quickwit/quickwit-index-management/src/garbage_collection.rs index dc2655dbfcd..692a716578c 100644 --- a/quickwit/quickwit-index-management/src/garbage_collection.rs +++ b/quickwit/quickwit-index-management/src/garbage_collection.rs @@ -20,7 +20,7 @@ use std::time::Duration; use anyhow::Context; use futures::{Future, StreamExt}; use itertools::Itertools; -use quickwit_common::metrics::IntCounter; +use quickwit_common::metrics::Counter; use quickwit_common::pretty::PrettySample; use quickwit_common::{Progress, rate_limited_info}; use quickwit_metastore::{ @@ -41,9 +41,9 @@ use tracing::{error, instrument}; const DELETE_SPLITS_BATCH_SIZE: usize = 10_000; pub struct GcMetrics { - pub deleted_splits: IntCounter, - pub deleted_bytes: IntCounter, - pub failed_splits: IntCounter, + pub deleted_splits: Counter, + pub deleted_bytes: Counter, + pub failed_splits: Counter, } pub(crate) trait RecordGcMetrics { @@ -53,9 +53,9 @@ pub(crate) trait RecordGcMetrics { impl RecordGcMetrics for Option { fn record(&self, num_deleted_splits: usize, num_deleted_bytes: u64, num_failed_splits: usize) { if let Some(metrics) = self { - metrics.deleted_splits.inc_by(num_deleted_splits as u64); - metrics.deleted_bytes.inc_by(num_deleted_bytes); - metrics.failed_splits.inc_by(num_failed_splits as u64); + metrics.deleted_splits.increment(num_deleted_splits as u64); + metrics.deleted_bytes.increment(num_deleted_bytes); + metrics.failed_splits.increment(num_failed_splits as u64); } } } diff --git a/quickwit/quickwit-indexing/src/actors/doc_processor.rs b/quickwit/quickwit-indexing/src/actors/doc_processor.rs index 407c55ff526..e67ebbe71ba 100644 --- a/quickwit/quickwit-indexing/src/actors/doc_processor.rs +++ b/quickwit/quickwit-indexing/src/actors/doc_processor.rs @@ -20,7 +20,7 @@ use anyhow::{Context, bail}; use async_trait::async_trait; use bytes::Bytes; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, Mailbox, QueueCapacity}; -use quickwit_common::metrics::IntCounter; +use quickwit_common::metrics::{Counter, counter}; use quickwit_common::rate_limited_tracing::rate_limited_warn; use quickwit_common::runtimes::RuntimeType; use quickwit_config::{SourceInputFormat, TransformConfig}; @@ -270,8 +270,8 @@ impl From> for JsonDocIterator { #[derive(Debug)] pub struct DocProcessorCounter { pub num_docs: AtomicU64, - pub num_docs_metric: IntCounter, - pub num_bytes_metric: IntCounter, + pub num_docs_metric: Counter, + pub num_bytes_metric: Counter, } impl Serialize for DocProcessorCounter { @@ -284,15 +284,18 @@ impl Serialize for DocProcessorCounter { impl DocProcessorCounter { fn for_index_and_doc_processor_outcome(index: &str, outcome: &str) -> DocProcessorCounter { let index_label = quickwit_common::metrics::index_label(index); - let labels = [index_label, outcome]; DocProcessorCounter { num_docs: Default::default(), - num_docs_metric: crate::metrics::INDEXER_METRICS - .processed_docs_total - .with_label_values(labels), - num_bytes_metric: crate::metrics::INDEXER_METRICS - .processed_bytes - .with_label_values(labels), + num_docs_metric: counter!( + parent: &crate::metrics::INDEXER_METRICS.processed_docs_total, + "index" => index_label.to_string(), + "docs_processed_status" => outcome.to_string(), + ), + num_bytes_metric: counter!( + parent: &crate::metrics::INDEXER_METRICS.processed_bytes, + "index" => index_label.to_string(), + "docs_processed_status" => outcome.to_string(), + ), } } @@ -303,8 +306,8 @@ impl DocProcessorCounter { fn record_doc(&self, num_bytes: u64) { self.num_docs.fetch_add(1, Ordering::Relaxed); - self.num_docs_metric.inc(); - self.num_bytes_metric.inc_by(num_bytes); + self.num_docs_metric.increment(1); + self.num_bytes_metric.increment(num_bytes); } } diff --git a/quickwit/quickwit-indexing/src/actors/indexer.rs b/quickwit/quickwit-indexing/src/actors/indexer.rs index b2257ed203f..a0f968672aa 100644 --- a/quickwit/quickwit-indexing/src/actors/indexer.rs +++ b/quickwit/quickwit-indexing/src/actors/indexer.rs @@ -358,8 +358,8 @@ struct IndexingWorkbench { // We use this value to set the `delete_opstamp` of the workbench splits. last_delete_opstamp: u64, // Number of bytes declared as used by tantivy. - memory_usage: GaugeGuard<'static>, - split_builders_guard: GaugeGuard<'static>, + memory_usage: GaugeGuard, + split_builders_guard: GaugeGuard, cooperative_indexing_period: Option, } diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index 3b43e47c105..43fb9e5d69d 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -23,7 +23,7 @@ use quickwit_actors::{ QueueCapacity, Supervisable, }; use quickwit_common::KillSwitch; -use quickwit_common::metrics::OwnedGaugeGuard; +use quickwit_common::metrics::{GaugeGuard, counter, gauge}; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_config::{IndexingSettings, RetentionPolicy, SourceConfig}; @@ -88,7 +88,7 @@ pub struct IndexingPipeline { // requiring a respawn of the pipeline. // We keep the list of shards here however, to reassign them after a respawn. shard_ids: BTreeSet, - _indexing_pipelines_gauge_guard: OwnedGaugeGuard, + _indexing_pipelines_gauge_guard: GaugeGuard, } #[async_trait] @@ -123,10 +123,12 @@ impl Actor for IndexingPipeline { impl IndexingPipeline { pub fn new(params: IndexingPipelineParams) -> Self { - let indexing_pipelines_gauge = crate::metrics::INDEXER_METRICS - .indexing_pipelines - .with_label_values([¶ms.pipeline_id.index_uid.index_id]); - let indexing_pipelines_gauge_guard = OwnedGaugeGuard::from_gauge(indexing_pipelines_gauge); + let indexing_pipelines_gauge = gauge!( + parent: &crate::metrics::INDEXER_METRICS.indexing_pipelines, + "index" => params.pipeline_id.index_uid.index_id.clone(), + ); + let mut indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); + indexing_pipelines_gauge_guard.add(1); let params_fingerprint = params.params_fingerprint; IndexingPipeline { params, @@ -311,21 +313,19 @@ impl IndexingPipeline { let (publisher_mailbox, publisher_handle) = ctx .spawn_actor() .set_kill_switch(self.kill_switch.clone()) - .set_backpressure_micros_counter( - crate::metrics::INDEXER_METRICS - .backpressure_micros - .with_label_values(["publisher"]), - ) + .set_backpressure_micros_counter(counter!( + parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + "actor_name" => "publisher", + )) .spawn(publisher); let sequencer = Sequencer::new(publisher_mailbox); let (sequencer_mailbox, sequencer_handle) = ctx .spawn_actor() - .set_backpressure_micros_counter( - crate::metrics::INDEXER_METRICS - .backpressure_micros - .with_label_values(["sequencer"]), - ) + .set_backpressure_micros_counter(counter!( + parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + "actor_name" => "sequencer", + )) .set_kill_switch(self.kill_switch.clone()) .spawn(sequencer); @@ -342,11 +342,10 @@ impl IndexingPipeline { ); let (uploader_mailbox, uploader_handle) = ctx .spawn_actor() - .set_backpressure_micros_counter( - crate::metrics::INDEXER_METRICS - .backpressure_micros - .with_label_values(["uploader"]), - ) + .set_backpressure_micros_counter(counter!( + parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + "actor_name" => "uploader", + )) .set_kill_switch(self.kill_switch.clone()) .spawn(uploader); @@ -377,11 +376,10 @@ impl IndexingPipeline { ); let (indexer_mailbox, indexer_handle) = ctx .spawn_actor() - .set_backpressure_micros_counter( - crate::metrics::INDEXER_METRICS - .backpressure_micros - .with_label_values(["indexer"]), - ) + .set_backpressure_micros_counter(counter!( + parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + "actor_name" => "indexer", + )) .set_kill_switch(self.kill_switch.clone()) .spawn(indexer); @@ -395,11 +393,10 @@ impl IndexingPipeline { )?; let (doc_processor_mailbox, doc_processor_handle) = ctx .spawn_actor() - .set_backpressure_micros_counter( - crate::metrics::INDEXER_METRICS - .backpressure_micros - .with_label_values(["doc_processor"]), - ) + .set_backpressure_micros_counter(counter!( + parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + "actor_name" => "doc_processor", + )) .set_kill_switch(self.kill_switch.clone()) .spawn(doc_processor); let source_runtime = SourceRuntime { diff --git a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs index b901d9f804a..1205f3e9162 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs @@ -22,6 +22,7 @@ use quickwit_actors::{ }; use quickwit_common::KillSwitch; use quickwit_common::io::{IoControls, Limiter}; +use quickwit_common::metrics::counter; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_config::RetentionPolicy; @@ -272,11 +273,10 @@ impl MergePipeline { let (merge_publisher_mailbox, merge_publisher_handle) = ctx .spawn_actor() .set_kill_switch(self.kill_switch.clone()) - .set_backpressure_micros_counter( - crate::metrics::INDEXER_METRICS - .backpressure_micros - .with_label_values(["merge_publisher"]), - ) + .set_backpressure_micros_counter(counter!( + parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + "actor_name" => "merge_publisher", + )) .spawn(merge_publisher); // Merge uploader @@ -322,11 +322,10 @@ impl MergePipeline { let (merge_executor_mailbox, merge_executor_handle) = ctx .spawn_actor() .set_kill_switch(self.kill_switch.clone()) - .set_backpressure_micros_counter( - crate::metrics::INDEXER_METRICS - .backpressure_micros - .with_label_values(["merge_executor"]), - ) + .set_backpressure_micros_counter(counter!( + parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + "actor_name" => "merge_executor", + )) .spawn(merge_executor); let merge_split_downloader = MergeSplitDownloader { @@ -338,11 +337,10 @@ impl MergePipeline { let (merge_split_downloader_mailbox, merge_split_downloader_handle) = ctx .spawn_actor() .set_kill_switch(self.kill_switch.clone()) - .set_backpressure_micros_counter( - crate::metrics::INDEXER_METRICS - .backpressure_micros - .with_label_values(["merge_split_downloader"]), - ) + .set_backpressure_micros_counter(counter!( + parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + "actor_name" => "merge_split_downloader", + )) .spawn(merge_split_downloader); // Merge planner @@ -409,7 +407,7 @@ impl MergePipeline { ) .set_generation(self.statistics.generation) .set_num_spawn_attempts(self.statistics.num_spawn_attempts) - .set_ongoing_merges(usize::try_from(num_ongoing_merges).unwrap_or(0)); + .set_ongoing_merges(num_ongoing_merges.max(0.0) as usize); } async fn perform_health_check( diff --git a/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs b/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs index 70fe17c621b..695e0eb7128 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs @@ -228,10 +228,10 @@ impl MergeSchedulerService { self.pending_merge_bytes -= merge_task.merge_operation.total_num_bytes(); crate::metrics::INDEXER_METRICS .pending_merge_operations - .set(self.pending_merge_queue.len() as i64); + .set(self.pending_merge_queue.len() as f64); crate::metrics::INDEXER_METRICS .pending_merge_bytes - .set(self.pending_merge_bytes as i64); + .set(self.pending_merge_bytes as f64); match split_downloader_mailbox.try_send_message(merge_task) { Ok(_) => {} Err(quickwit_actors::TrySendError::Full(_)) => { @@ -297,7 +297,7 @@ impl MergeSchedulerService { self.merge_concurrency as i64 - self.merge_semaphore.available_permits() as i64; crate::metrics::INDEXER_METRICS .ongoing_merge_operations - .set(num_merges); + .set(num_merges as f64); } } @@ -383,10 +383,10 @@ impl Handler for MergeSchedulerService { self.pending_merge_queue.push(scheduled_merge); crate::metrics::INDEXER_METRICS .pending_merge_operations - .set(self.pending_merge_queue.len() as i64); + .set(self.pending_merge_queue.len() as f64); crate::metrics::INDEXER_METRICS .pending_merge_bytes - .set(self.pending_merge_bytes as i64); + .set(self.pending_merge_bytes as f64); self.schedule_pending_merges(ctx); Ok(()) } diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs index 3702f727f93..695d957f9d5 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs @@ -25,6 +25,7 @@ use std::sync::{Arc, OnceLock}; use anyhow::Context; use async_trait::async_trait; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, Mailbox, QueueCapacity}; +use quickwit_common::metrics::gauge; use quickwit_common::spawn_named_task; use quickwit_metastore::StageParquetSplitsRequestExt; use quickwit_parquet_engine::split::{ParquetSplitKind, ParquetSplitMetadata}; @@ -122,10 +123,11 @@ impl ParquetUploader { let _guard = ctx.protect_zone(); let concurrent_upload_permits = CONCURRENT_UPLOAD_PERMITS_METRICS .get_or_init(|| Semaphore::const_new(self.max_concurrent_uploads)); - let gauge = INDEXER_METRICS - .available_concurrent_upload_permits - .with_label_values(["metrics"]); - gauge.set(concurrent_upload_permits.available_permits() as i64); + let gauge = gauge!( + parent: &INDEXER_METRICS.available_concurrent_upload_permits, + "component" => "metrics", + ); + gauge.set(concurrent_upload_permits.available_permits() as f64); concurrent_upload_permits .acquire() .await diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs index bf596995b89..3379c456911 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs @@ -33,7 +33,7 @@ use quickwit_actors::{ QueueCapacity, Supervisable, }; use quickwit_common::KillSwitch; -use quickwit_common::metrics::OwnedGaugeGuard; +use quickwit_common::metrics::{GaugeGuard, gauge}; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_config::{IndexingSettings, SourceConfig}; @@ -111,7 +111,7 @@ pub struct MetricsPipeline { handles_opt: Option, kill_switch: KillSwitch, shard_ids: BTreeSet, - _indexing_pipelines_gauge_guard: OwnedGaugeGuard, + _indexing_pipelines_gauge_guard: GaugeGuard, } #[async_trait] @@ -144,10 +144,12 @@ impl Actor for MetricsPipeline { impl MetricsPipeline { pub fn new(params: MetricsPipelineParams) -> Self { - let indexing_pipelines_gauge = crate::metrics::INDEXER_METRICS - .indexing_pipelines - .with_label_values([¶ms.pipeline_id.index_uid.index_id]); - let indexing_pipelines_gauge_guard = OwnedGaugeGuard::from_gauge(indexing_pipelines_gauge); + let indexing_pipelines_gauge = gauge!( + parent: &crate::metrics::INDEXER_METRICS.indexing_pipelines, + "index" => params.pipeline_id.index_uid.index_id.clone(), + ); + let mut indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); + indexing_pipelines_gauge_guard.add(1); let params_fingerprint = params.params_fingerprint; MetricsPipeline { params, diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs index 0599e03bf2c..cd6a3ea6f9d 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs @@ -35,7 +35,7 @@ pub struct ProcessedParquetBatch { /// Force commit flag - when true, accumulator should flush immediately. pub force_commit: bool, /// Memory tracking gauge guard. - _gauge_guard: GaugeGuard<'static>, + _gauge_guard: GaugeGuard, } impl ProcessedParquetBatch { diff --git a/quickwit/quickwit-indexing/src/actors/uploader.rs b/quickwit/quickwit-indexing/src/actors/uploader.rs index 8e1c0d56afb..e876d8f04a7 100644 --- a/quickwit/quickwit-indexing/src/actors/uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/uploader.rs @@ -23,6 +23,7 @@ use async_trait::async_trait; use fail::fail_point; use itertools::Itertools; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, Mailbox, QueueCapacity}; +use quickwit_common::metrics::gauge; use quickwit_common::pubsub::EventBroker; use quickwit_common::spawn_named_task; use quickwit_config::RetentionPolicy; @@ -203,26 +204,29 @@ impl Uploader { match self.uploader_type { UploaderType::IndexUploader => ( &CONCURRENT_UPLOAD_PERMITS_INDEX, - INDEXER_METRICS - .available_concurrent_upload_permits - .with_label_values(["indexer"]), + gauge!( + parent: &INDEXER_METRICS.available_concurrent_upload_permits, + "component" => "indexer", + ), ), UploaderType::MergeUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, - INDEXER_METRICS - .available_concurrent_upload_permits - .with_label_values(["merger"]), + gauge!( + parent: &INDEXER_METRICS.available_concurrent_upload_permits, + "component" => "merger", + ), ), UploaderType::DeleteUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, - INDEXER_METRICS - .available_concurrent_upload_permits - .with_label_values(["merger"]), + gauge!( + parent: &INDEXER_METRICS.available_concurrent_upload_permits, + "component" => "merger", + ), ), }; let concurrent_upload_permits = concurrent_upload_permits_once_cell .get_or_init(|| Semaphore::const_new(self.max_concurrent_split_uploads)); - concurrent_upload_permits_gauge.set(concurrent_upload_permits.available_permits() as i64); + concurrent_upload_permits_gauge.set(concurrent_upload_permits.available_permits() as f64); concurrent_upload_permits .acquire() .await diff --git a/quickwit/quickwit-indexing/src/metrics.rs b/quickwit/quickwit-indexing/src/metrics.rs index 98ca19636a2..a8350c794c7 100644 --- a/quickwit/quickwit-indexing/src/metrics.rs +++ b/quickwit/quickwit-indexing/src/metrics.rs @@ -14,99 +14,117 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{ - IntCounter, IntCounterVec, IntGauge, IntGaugeVec, new_counter, new_counter_vec, new_gauge, - new_gauge_vec, -}; +use quickwit_common::metrics::{Counter, Gauge, counter, gauge}; pub struct IndexerMetrics { - pub processed_docs_total: IntCounterVec<2>, - pub processed_bytes: IntCounterVec<2>, - pub indexing_pipelines: IntGaugeVec<1>, - pub backpressure_micros: IntCounterVec<1>, - pub available_concurrent_upload_permits: IntGaugeVec<1>, - pub split_builders: IntGauge, - pub ongoing_merge_operations: IntGauge, - pub pending_merge_operations: IntGauge, - pub pending_merge_bytes: IntGauge, + pub processed_docs_total: Counter, + pub processed_bytes: Counter, + pub indexing_pipelines: Gauge, + pub backpressure_micros: Counter, + pub available_concurrent_upload_permits: Gauge, + pub split_builders: Gauge, + pub ongoing_merge_operations: Gauge, + pub pending_merge_operations: Gauge, + pub pending_merge_bytes: Gauge, // We use a lazy counter, as most users do not use Kafka. #[cfg_attr(not(feature = "kafka"), allow(dead_code))] - pub kafka_rebalance_total: LazyLock, + pub kafka_rebalance_total: LazyLock, } +static PROCESSED_DOCS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "processed_docs_total", + description: "Number of processed docs by index, source and processed status in [valid, schema_error, parse_error, transform_error]", + subsystem: "indexing", + ) +}); + +static PROCESSED_BYTES: LazyLock = LazyLock::new(|| { + counter!( + name: "processed_bytes", + description: "Number of bytes of processed documents by index, source and processed status in [valid, schema_error, parse_error, transform_error]", + subsystem: "indexing", + ) +}); + +static INDEXING_PIPELINES: LazyLock = LazyLock::new(|| { + gauge!( + name: "indexing_pipelines", + description: "Number of running indexing pipelines", + subsystem: "indexing", + ) +}); + +static BACKPRESSURE_MICROS: LazyLock = LazyLock::new(|| { + counter!( + name: "backpressure_micros", + description: "Amount of time spent in backpressure (in micros). This time only includes the amount of time spent waiting for a place in the queue of another actor.", + subsystem: "indexing", + ) +}); + +static AVAILABLE_CONCURRENT_UPLOAD_PERMITS: LazyLock = LazyLock::new(|| { + gauge!( + name: "concurrent_upload_available_permits_num", + description: "Number of available concurrent upload permits by component in [merger, indexer]", + subsystem: "indexing", + ) +}); + +static SPLIT_BUILDERS: LazyLock = LazyLock::new(|| { + gauge!( + name: "split_builders", + description: "Number of existing index writer instances.", + subsystem: "indexing", + ) +}); + +static ONGOING_MERGE_OPERATIONS: LazyLock = LazyLock::new(|| { + gauge!( + name: "ongoing_merge_operations", + description: "Number of ongoing merge operations", + subsystem: "indexing", + observable: true, + ) +}); + +static PENDING_MERGE_OPERATIONS: LazyLock = LazyLock::new(|| { + gauge!( + name: "pending_merge_operations", + description: "Number of pending merge operations", + subsystem: "indexing", + ) +}); + +static PENDING_MERGE_BYTES: LazyLock = LazyLock::new(|| { + gauge!( + name: "pending_merge_bytes", + description: "Number of pending merge bytes", + subsystem: "indexing", + ) +}); + +static KAFKA_REBALANCE_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "kafka_rebalance_total", + description: "Number of kafka rebalances", + subsystem: "indexing", + ) +}); + impl Default for IndexerMetrics { fn default() -> Self { IndexerMetrics { - processed_docs_total: new_counter_vec( - "processed_docs_total", - "Number of processed docs by index, source and processed status in [valid, \ - schema_error, parse_error, transform_error]", - "indexing", - &[], - ["index", "docs_processed_status"], - ), - processed_bytes: new_counter_vec( - "processed_bytes", - "Number of bytes of processed documents by index, source and processed status in \ - [valid, schema_error, parse_error, transform_error]", - "indexing", - &[], - ["index", "docs_processed_status"], - ), - indexing_pipelines: new_gauge_vec( - "indexing_pipelines", - "Number of running indexing pipelines", - "indexing", - &[], - ["index"], - ), - backpressure_micros: new_counter_vec( - "backpressure_micros", - "Amount of time spent in backpressure (in micros). This time only includes the \ - amount of time spent waiting for a place in the queue of another actor.", - "indexing", - &[], - ["actor_name"], - ), - available_concurrent_upload_permits: new_gauge_vec( - "concurrent_upload_available_permits_num", - "Number of available concurrent upload permits by component in [merger, indexer]", - "indexing", - &[], - ["component"], - ), - split_builders: new_gauge( - "split_builders", - "Number of existing index writer instances.", - "indexing", - &[], - ), - ongoing_merge_operations: new_gauge( - "ongoing_merge_operations", - "Number of ongoing merge operations", - "indexing", - &[], - ), - pending_merge_operations: new_gauge( - "pending_merge_operations", - "Number of pending merge operations", - "indexing", - &[], - ), - pending_merge_bytes: new_gauge( - "pending_merge_bytes", - "Number of pending merge bytes", - "indexing", - &[], - ), - kafka_rebalance_total: LazyLock::new(|| { - new_counter( - "kafka_rebalance_total", - "Number of kafka rebalances", - "indexing", - &[], - ) - }), + processed_docs_total: PROCESSED_DOCS_TOTAL.clone(), + processed_bytes: PROCESSED_BYTES.clone(), + indexing_pipelines: INDEXING_PIPELINES.clone(), + backpressure_micros: BACKPRESSURE_MICROS.clone(), + available_concurrent_upload_permits: AVAILABLE_CONCURRENT_UPLOAD_PERMITS.clone(), + split_builders: SPLIT_BUILDERS.clone(), + ongoing_merge_operations: ONGOING_MERGE_OPERATIONS.clone(), + pending_merge_operations: PENDING_MERGE_OPERATIONS.clone(), + pending_merge_bytes: PENDING_MERGE_BYTES.clone(), + kafka_rebalance_total: LazyLock::new(|| KAFKA_REBALANCE_TOTAL.clone()), } } } diff --git a/quickwit/quickwit-indexing/src/models/indexed_split.rs b/quickwit/quickwit-indexing/src/models/indexed_split.rs index cd272bdc34c..4453802959f 100644 --- a/quickwit/quickwit-indexing/src/models/indexed_split.rs +++ b/quickwit/quickwit-indexing/src/models/indexed_split.rs @@ -182,8 +182,8 @@ pub struct IndexedSplitBatchBuilder { pub publish_token_opt: Option, pub commit_trigger: CommitTrigger, pub batch_parent_span: Span, - pub memory_usage: GaugeGuard<'static>, - pub _split_builders_guard: GaugeGuard<'static>, + pub memory_usage: GaugeGuard, + pub _split_builders_guard: GaugeGuard, } /// Sends notifications to the Publisher that the last batch of splits was empty. diff --git a/quickwit/quickwit-indexing/src/models/processed_doc.rs b/quickwit/quickwit-indexing/src/models/processed_doc.rs index bed695aa1d4..d71545eb036 100644 --- a/quickwit/quickwit-indexing/src/models/processed_doc.rs +++ b/quickwit/quickwit-indexing/src/models/processed_doc.rs @@ -41,7 +41,7 @@ pub struct ProcessedDocBatch { pub docs: Vec, pub checkpoint_delta: SourceCheckpointDelta, pub force_commit: bool, - _gauge_guard: GaugeGuard<'static>, + _gauge_guard: GaugeGuard, } impl ProcessedDocBatch { diff --git a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs index f88d9fcac2b..fe270e378c5 100644 --- a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs +++ b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs @@ -24,7 +24,7 @@ pub struct RawDocBatch { pub docs: Vec, pub checkpoint_delta: SourceCheckpointDelta, pub force_commit: bool, - _gauge_guard: GaugeGuard<'static>, + _gauge_guard: GaugeGuard, } impl RawDocBatch { diff --git a/quickwit/quickwit-indexing/src/source/kafka_source.rs b/quickwit/quickwit-indexing/src/source/kafka_source.rs index 93ce5b3dc37..7e9ab3056b8 100644 --- a/quickwit/quickwit-indexing/src/source/kafka_source.rs +++ b/quickwit/quickwit-indexing/src/source/kafka_source.rs @@ -127,7 +127,9 @@ macro_rules! return_if_err { /// impl ConsumerContext for RdKafkaContext { fn pre_rebalance(&self, _consumer: &BaseConsumer, rebalance: &Rebalance) { - crate::metrics::INDEXER_METRICS.kafka_rebalance_total.inc(); + crate::metrics::INDEXER_METRICS + .kafka_rebalance_total + .increment(1); quickwit_common::rate_limited_info!(limit_per_min = 3, topic = self.topic, "rebalance"); if let Rebalance::Revoke(tpl) = rebalance { let partitions = collect_partitions(tpl, &self.topic); diff --git a/quickwit/quickwit-indexing/src/source/mod.rs b/quickwit/quickwit-indexing/src/source/mod.rs index 0e696eaea0f..7a500cd9c98 100644 --- a/quickwit/quickwit-indexing/src/source/mod.rs +++ b/quickwit/quickwit-indexing/src/source/mod.rs @@ -519,7 +519,7 @@ pub(super) struct BatchBuilder { num_bytes: u64, checkpoint_delta: SourceCheckpointDelta, force_commit: bool, - gauge_guard: GaugeGuard<'static>, + gauge_guard: GaugeGuard, } impl BatchBuilder { diff --git a/quickwit/quickwit-ingest/src/ingest_api_service.rs b/quickwit/quickwit-ingest/src/ingest_api_service.rs index 7ee8d0b232b..f7f16174538 100644 --- a/quickwit/quickwit-ingest/src/ingest_api_service.rs +++ b/quickwit/quickwit-ingest/src/ingest_api_service.rs @@ -20,6 +20,7 @@ use bytes::Bytes; use quickwit_actors::{ Actor, ActorContext, ActorExitStatus, DeferableReplyHandler, Handler, QueueCapacity, }; +use quickwit_common::metrics::counter; use quickwit_common::runtimes::RuntimeType; use quickwit_common::tower::Cost; use quickwit_proto::ingest::RateLimitingCause; @@ -201,12 +202,13 @@ impl IngestApiService { } num_docs += batch_num_docs; - INGEST_METRICS - .ingested_docs_bytes_valid - .inc_by(batch_num_bytes as u64); - INGEST_METRICS - .ingested_docs_valid - .inc_by(batch_num_docs as u64); + counter!( + parent: &INGEST_METRICS.docs_bytes_total, + "validity" => "valid", + ) + .increment(batch_num_bytes as u64); + counter!(parent: &INGEST_METRICS.docs_total, "validity" => "valid") + .increment(batch_num_docs as u64); } // TODO we could fsync here and disable autosync to have better i/o perfs. Ok(( diff --git a/quickwit/quickwit-ingest/src/ingest_v2/broadcast/local_shards.rs b/quickwit/quickwit-ingest/src/ingest_v2/broadcast/local_shards.rs index 6531c893c9f..5fb95f5ea98 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/broadcast/local_shards.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/broadcast/local_shards.rs @@ -197,10 +197,10 @@ impl ShardThroughputTimeSeriesMap { .div_ceil(ONE_MIB.as_u64()); INGEST_V2_METRICS .shard_st_throughput_mib - .observe(short_term_ingestion_rate_mib_per_sec_u64 as f64); + .record(short_term_ingestion_rate_mib_per_sec_u64 as f64); INGEST_V2_METRICS .shard_lt_throughput_mib - .observe(long_term_ingestion_rate_mib_per_sec_u64 as f64); + .record(long_term_ingestion_rate_mib_per_sec_u64 as f64); let short_term_ingestion_rate = RateMibPerSec(short_term_ingestion_rate_mib_per_sec_u64 as u16); @@ -300,10 +300,10 @@ impl BroadcastLocalShardsTask { } } } - INGEST_V2_METRICS.open_shards.set(num_open_shards as i64); + INGEST_V2_METRICS.open_shards.set(num_open_shards as f64); INGEST_V2_METRICS .closed_shards - .set(num_closed_shards as i64); + .set(num_closed_shards as f64); let snapshot = LocalShardsSnapshot { per_source_shard_infos, diff --git a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs index c176f3d9313..61dd11e2ad9 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs @@ -25,7 +25,7 @@ use futures::StreamExt; use futures::stream::FuturesUnordered; use mrecordlog::error::CreateQueueError; use quickwit_cluster::Cluster; -use quickwit_common::metrics::{GaugeGuard, MEMORY_METRICS}; +use quickwit_common::metrics::{GaugeGuard, MEMORY_METRICS, counter}; use quickwit_common::pretty::PrettyDisplay; use quickwit_common::pubsub::{EventBroker, EventSubscriber}; use quickwit_common::rate_limiter::{RateLimiter, RateLimiterSettings}; @@ -332,10 +332,11 @@ impl Ingester { advise_reset_shards_response.shards_to_truncate.len(), now.elapsed().pretty_display() ); - INGEST_V2_METRICS - .reset_shards_operations_total - .with_label_values(["success"]) - .inc(); + counter!( + parent: &INGEST_V2_METRICS.reset_shards_operations_total, + "status" => "success", + ) + .increment(1); let wal_usage = state_guard.mrecordlog.resource_usage(); report_wal_usage(wal_usage); @@ -343,18 +344,20 @@ impl Ingester { Ok(Err(error)) => { warn!("advise reset shards request failed: {error}"); - INGEST_V2_METRICS - .reset_shards_operations_total - .with_label_values(["error"]) - .inc(); + counter!( + parent: &INGEST_V2_METRICS.reset_shards_operations_total, + "status" => "error", + ) + .increment(1); } Err(_) => { warn!("advise reset shards request timed out"); - INGEST_V2_METRICS - .reset_shards_operations_total - .with_label_values(["timeout"]) - .inc(); + counter!( + parent: &INGEST_V2_METRICS.reset_shards_operations_total, + "status" => "timeout", + ) + .increment(1); } }; // We still hold the permit while sleeping so we effectively rate limit the reset shards @@ -567,12 +570,16 @@ impl Ingester { }; if valid_doc_batch.is_empty() { - crate::metrics::INGEST_METRICS - .ingested_docs_invalid - .inc_by(parse_failures.len() as u64); - crate::metrics::INGEST_METRICS - .ingested_docs_bytes_invalid - .inc_by(original_batch_num_bytes); + counter!( + parent: &crate::metrics::INGEST_METRICS.docs_total, + "validity" => "invalid", + ) + .increment(parse_failures.len() as u64); + counter!( + parent: &crate::metrics::INGEST_METRICS.docs_bytes_total, + "validity" => "invalid", + ) + .increment(original_batch_num_bytes); let persist_success = PersistSuccess { subrequest_id: subrequest.subrequest_id, index_uid: subrequest.index_uid, @@ -586,19 +593,27 @@ impl Ingester { continue; }; - crate::metrics::INGEST_METRICS - .ingested_docs_valid - .inc_by(valid_doc_batch.num_docs() as u64); - crate::metrics::INGEST_METRICS - .ingested_docs_bytes_valid - .inc_by(valid_doc_batch.num_bytes() as u64); + counter!( + parent: &crate::metrics::INGEST_METRICS.docs_total, + "validity" => "valid", + ) + .increment(valid_doc_batch.num_docs() as u64); + counter!( + parent: &crate::metrics::INGEST_METRICS.docs_bytes_total, + "validity" => "valid", + ) + .increment(valid_doc_batch.num_bytes() as u64); if !parse_failures.is_empty() { - crate::metrics::INGEST_METRICS - .ingested_docs_invalid - .inc_by(parse_failures.len() as u64); - crate::metrics::INGEST_METRICS - .ingested_docs_bytes_invalid - .inc_by(original_batch_num_bytes - valid_doc_batch.num_bytes() as u64); + counter!( + parent: &crate::metrics::INGEST_METRICS.docs_total, + "validity" => "invalid", + ) + .increment(parse_failures.len() as u64); + counter!( + parent: &crate::metrics::INGEST_METRICS.docs_bytes_total, + "validity" => "invalid", + ) + .increment(original_batch_num_bytes - valid_doc_batch.num_bytes() as u64); } let valid_batch_num_bytes = valid_doc_batch.num_bytes() as u64; shard.rate_meter.update(valid_batch_num_bytes); diff --git a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs index 5e034f1bd36..62ed66372c2 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs @@ -16,8 +16,7 @@ use std::sync::LazyLock; use mrecordlog::ResourceUsage; use quickwit_common::metrics::{ - Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, exponential_buckets, - linear_buckets, new_counter_vec, new_gauge, new_gauge_vec, new_histogram, new_histogram_vec, + Counter, Gauge, Histogram, counter, exponential_buckets, gauge, histogram, linear_buckets, }; // Counter vec counting the different outcomes of ingest requests as @@ -25,136 +24,168 @@ use quickwit_common::metrics::{ // // The counter are counting persist subrequests. pub(crate) struct IngestResultMetrics { - pub success: IntCounter, - pub circuit_breaker: IntCounter, - pub unspecified: IntCounter, - pub index_not_found: IntCounter, - pub source_not_found: IntCounter, - pub internal: IntCounter, - pub no_shards_available: IntCounter, - pub shard_rate_limited: IntCounter, - pub wal_full: IntCounter, - pub timeout: IntCounter, - pub router_timeout: IntCounter, - pub router_load_shedding: IntCounter, - pub load_shedding: IntCounter, - pub shard_not_found: IntCounter, - pub unavailable: IntCounter, + pub success: Counter, + pub circuit_breaker: Counter, + pub unspecified: Counter, + pub index_not_found: Counter, + pub source_not_found: Counter, + pub internal: Counter, + pub no_shards_available: Counter, + pub shard_rate_limited: Counter, + pub wal_full: Counter, + pub timeout: Counter, + pub router_timeout: Counter, + pub router_load_shedding: Counter, + pub load_shedding: Counter, + pub shard_not_found: Counter, + pub unavailable: Counter, } +static INGEST_RESULT_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "ingest_result_total", + description: "Number of ingest requests by result", + subsystem: "ingest", + ) +}); + impl Default for IngestResultMetrics { fn default() -> Self { - let ingest_result_total_vec = new_counter_vec::<1>( - "ingest_result_total", - "Number of ingest requests by result", - "ingest", - &[], - ["result"], - ); Self { - success: ingest_result_total_vec.with_label_values(["success"]), - circuit_breaker: ingest_result_total_vec.with_label_values(["circuit_breaker"]), - unspecified: ingest_result_total_vec.with_label_values(["unspecified"]), - index_not_found: ingest_result_total_vec.with_label_values(["index_not_found"]), - source_not_found: ingest_result_total_vec.with_label_values(["source_not_found"]), - internal: ingest_result_total_vec.with_label_values(["internal"]), - no_shards_available: ingest_result_total_vec.with_label_values(["no_shards_available"]), - shard_rate_limited: ingest_result_total_vec.with_label_values(["shard_rate_limited"]), - wal_full: ingest_result_total_vec.with_label_values(["wal_full"]), - timeout: ingest_result_total_vec.with_label_values(["timeout"]), - router_timeout: ingest_result_total_vec.with_label_values(["router_timeout"]), - router_load_shedding: ingest_result_total_vec - .with_label_values(["router_load_shedding"]), - load_shedding: ingest_result_total_vec.with_label_values(["load_shedding"]), - unavailable: ingest_result_total_vec.with_label_values(["unavailable"]), - shard_not_found: ingest_result_total_vec.with_label_values(["shard_not_found"]), + success: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "success"), + circuit_breaker: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "circuit_breaker"), + unspecified: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "unspecified"), + index_not_found: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "index_not_found"), + source_not_found: counter!( + parent: &*INGEST_RESULT_TOTAL, + "result" => "source_not_found", + ), + internal: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "internal"), + no_shards_available: counter!( + parent: &*INGEST_RESULT_TOTAL, + "result" => "no_shards_available", + ), + shard_rate_limited: counter!( + parent: &*INGEST_RESULT_TOTAL, + "result" => "shard_rate_limited", + ), + wal_full: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "wal_full"), + timeout: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "timeout"), + router_timeout: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "router_timeout"), + router_load_shedding: counter!( + parent: &*INGEST_RESULT_TOTAL, + "result" => "router_load_shedding", + ), + load_shedding: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "load_shedding"), + unavailable: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "unavailable"), + shard_not_found: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "shard_not_found"), } } } pub(super) struct IngestV2Metrics { - pub reset_shards_operations_total: IntCounterVec<1>, - pub open_shards: IntGauge, - pub closed_shards: IntGauge, + pub reset_shards_operations_total: Counter, + pub open_shards: Gauge, + pub closed_shards: Gauge, pub shard_lt_throughput_mib: Histogram, pub shard_st_throughput_mib: Histogram, - pub wal_acquire_lock_requests_in_flight: IntGaugeVec<2>, - pub wal_acquire_lock_request_duration_secs: HistogramVec<2>, - pub wal_disk_used_bytes: IntGauge, - pub wal_memory_used_bytes: IntGauge, + pub wal_acquire_lock_requests_in_flight: Gauge, + pub wal_acquire_lock_request_duration_secs: Histogram, + pub wal_disk_used_bytes: Gauge, + pub wal_memory_used_bytes: Gauge, pub ingest_results: IngestResultMetrics, - pub ingest_attempts: IntCounterVec<1>, + pub ingest_attempts: Counter, } +static INGEST_ATTEMPTS: LazyLock = LazyLock::new(|| { + counter!( + name: "ingest_attempts", + description: "Number of routing attempts by AZ locality", + subsystem: "ingest", + ) +}); + +static RESET_SHARDS_OPERATIONS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "reset_shards_operations_total", + description: "Total number of reset shards operations performed.", + subsystem: "ingest", + ) +}); + +static SHARDS: LazyLock = LazyLock::new(|| { + gauge!( + name: "shards", + description: "Number of shards hosted by the ingester.", + subsystem: "ingest", + ) +}); + +static SHARD_LT_THROUGHPUT_MIB: LazyLock = LazyLock::new(|| { + histogram!( + name: "shard_lt_throughput_mib", + description: "Shard long term throughput as reported through chitchat", + subsystem: "ingest", + buckets: linear_buckets(0.0f64, 1.0f64, 15).unwrap(), + ) +}); + +static SHARD_ST_THROUGHPUT_MIB: LazyLock = LazyLock::new(|| { + histogram!( + name: "shard_st_throughput_mib", + description: "Shard short term throughput as reported through chitchat", + subsystem: "ingest", + buckets: linear_buckets(0.0f64, 1.0f64, 15).unwrap(), + ) +}); + +static WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT: LazyLock = LazyLock::new(|| { + gauge!( + name: "wal_acquire_lock_requests_in_flight", + description: "Number of acquire lock requests in-flight.", + subsystem: "ingest", + ) +}); + +static WAL_ACQUIRE_LOCK_REQUEST_DURATION_SECS: LazyLock = LazyLock::new(|| { + histogram!( + name: "wal_acquire_lock_request_duration_secs", + description: "Duration of acquire lock requests in seconds.", + subsystem: "ingest", + buckets: exponential_buckets(0.001, 2.0, 12).unwrap(), + ) +}); + +static WAL_DISK_USED_BYTES: LazyLock = LazyLock::new(|| { + gauge!( + name: "wal_disk_used_bytes", + description: "WAL disk space used in bytes.", + subsystem: "ingest", + ) +}); + +static WAL_MEMORY_USED_BYTES: LazyLock = LazyLock::new(|| { + gauge!( + name: "wal_memory_used_bytes", + description: "WAL memory used in bytes.", + subsystem: "ingest", + ) +}); + impl Default for IngestV2Metrics { fn default() -> Self { Self { ingest_results: IngestResultMetrics::default(), - ingest_attempts: new_counter_vec::<1>( - "ingest_attempts", - "Number of routing attempts by AZ locality", - "ingest", - &[], - ["az_routing"], - ), - reset_shards_operations_total: new_counter_vec( - "reset_shards_operations_total", - "Total number of reset shards operations performed.", - "ingest", - &[], - ["status"], - ), - open_shards: new_gauge( - "shards", - "Number of shards hosted by the ingester.", - "ingest", - &[("state", "open")], - ), - closed_shards: new_gauge( - "shards", - "Number of shards hosted by the ingester.", - "ingest", - &[("state", "closed")], - ), - shard_lt_throughput_mib: new_histogram( - "shard_lt_throughput_mib", - "Shard long term throughput as reported through chitchat", - "ingest", - linear_buckets(0.0f64, 1.0f64, 15).unwrap(), - ), - shard_st_throughput_mib: new_histogram( - "shard_st_throughput_mib", - "Shard short term throughput as reported through chitchat", - "ingest", - linear_buckets(0.0f64, 1.0f64, 15).unwrap(), - ), - wal_acquire_lock_requests_in_flight: new_gauge_vec( - "wal_acquire_lock_requests_in_flight", - "Number of acquire lock requests in-flight.", - "ingest", - &[], - ["operation", "type"], - ), - wal_acquire_lock_request_duration_secs: new_histogram_vec( - "wal_acquire_lock_request_duration_secs", - "Duration of acquire lock requests in seconds.", - "ingest", - &[], - ["operation", "type"], - exponential_buckets(0.001, 2.0, 12).unwrap(), - ), - wal_disk_used_bytes: new_gauge( - "wal_disk_used_bytes", - "WAL disk space used in bytes.", - "ingest", - &[], - ), - wal_memory_used_bytes: new_gauge( - "wal_memory_used_bytes", - "WAL memory used in bytes.", - "ingest", - &[], - ), + ingest_attempts: INGEST_ATTEMPTS.clone(), + reset_shards_operations_total: RESET_SHARDS_OPERATIONS_TOTAL.clone(), + open_shards: gauge!(parent: &*SHARDS, "state" => "open"), + closed_shards: gauge!(parent: &*SHARDS, "state" => "closed"), + shard_lt_throughput_mib: SHARD_LT_THROUGHPUT_MIB.clone(), + shard_st_throughput_mib: SHARD_ST_THROUGHPUT_MIB.clone(), + wal_acquire_lock_requests_in_flight: WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT.clone(), + wal_acquire_lock_request_duration_secs: WAL_ACQUIRE_LOCK_REQUEST_DURATION_SECS.clone(), + wal_disk_used_bytes: WAL_DISK_USED_BYTES.clone(), + wal_memory_used_bytes: WAL_MEMORY_USED_BYTES.clone(), } } } @@ -162,14 +193,14 @@ impl Default for IngestV2Metrics { pub(super) fn report_wal_usage(wal_usage: ResourceUsage) { INGEST_V2_METRICS .wal_disk_used_bytes - .set(wal_usage.disk_used_bytes as i64); + .set(wal_usage.disk_used_bytes as f64); quickwit_common::metrics::MEMORY_METRICS .in_flight .wal - .set(wal_usage.memory_allocated_bytes as i64); + .set(wal_usage.memory_allocated_bytes as f64); INGEST_V2_METRICS .wal_memory_used_bytes - .set(wal_usage.memory_used_bytes as i64); + .set(wal_usage.memory_used_bytes as f64); } pub(super) static INGEST_V2_METRICS: LazyLock = diff --git a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs index bbf0cd037c5..fe2bd941aff 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs @@ -669,10 +669,10 @@ impl ReplicationTask { INGEST_METRICS .replicated_num_bytes_total - .inc_by(batch_num_bytes); + .increment(batch_num_bytes); INGEST_METRICS .replicated_num_docs_total - .inc_by(batch_num_docs); + .increment(batch_num_docs); let replicate_success = ReplicateSuccess { subrequest_id: subrequest.subrequest_id, diff --git a/quickwit/quickwit-ingest/src/ingest_v2/router.rs b/quickwit/quickwit-ingest/src/ingest_v2/router.rs index e249dd1e0fe..7830a72889e 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/router.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/router.rs @@ -20,7 +20,7 @@ use std::time::Duration; use async_trait::async_trait; use futures::stream::FuturesUnordered; use futures::{Future, StreamExt}; -use quickwit_common::metrics::{GaugeGuard, MEMORY_METRICS}; +use quickwit_common::metrics::{GaugeGuard, MEMORY_METRICS, counter}; use quickwit_common::pubsub::{EventBroker, EventSubscriber}; use quickwit_common::{rate_limited_error, rate_limited_warn}; use quickwit_proto::control_plane::{ @@ -371,10 +371,11 @@ impl IngestRouter { let az_locality = state_guard .routing_table .classify_az_locality(&ingester_node.node_id, &self.ingester_pool); - INGEST_V2_METRICS - .ingest_attempts - .with_label_values([az_locality]) - .inc(); + counter!( + parent: &INGEST_V2_METRICS.ingest_attempts, + "az_routing" => az_locality, + ) + .increment(1); let persist_subrequest = PersistSubrequest { subrequest_id: subrequest.subrequest_id, index_uid: Some(ingester_node.index_uid.clone()), @@ -497,32 +498,36 @@ fn update_ingest_metrics(ingest_result: &IngestV2Result, num_s Ok(ingest_response) => { ingest_results_metrics .success - .inc_by(ingest_response.successes.len() as u64); + .increment(ingest_response.successes.len() as u64); for ingest_failure in &ingest_response.failures { match ingest_failure.reason() { IngestFailureReason::CircuitBreaker => { - ingest_results_metrics.circuit_breaker.inc(); + ingest_results_metrics.circuit_breaker.increment(1); + } + IngestFailureReason::Unspecified => { + ingest_results_metrics.unspecified.increment(1) } - IngestFailureReason::Unspecified => ingest_results_metrics.unspecified.inc(), IngestFailureReason::IndexNotFound => { - ingest_results_metrics.index_not_found.inc() + ingest_results_metrics.index_not_found.increment(1) } IngestFailureReason::SourceNotFound => { - ingest_results_metrics.source_not_found.inc() + ingest_results_metrics.source_not_found.increment(1) } - IngestFailureReason::Internal => ingest_results_metrics.internal.inc(), + IngestFailureReason::Internal => ingest_results_metrics.internal.increment(1), IngestFailureReason::NoShardsAvailable => { - ingest_results_metrics.no_shards_available.inc() + ingest_results_metrics.no_shards_available.increment(1) } IngestFailureReason::ShardRateLimited => { - ingest_results_metrics.shard_rate_limited.inc() + ingest_results_metrics.shard_rate_limited.increment(1) } - IngestFailureReason::WalFull => ingest_results_metrics.wal_full.inc(), - IngestFailureReason::Timeout => ingest_results_metrics.timeout.inc(), + IngestFailureReason::WalFull => ingest_results_metrics.wal_full.increment(1), + IngestFailureReason::Timeout => ingest_results_metrics.timeout.increment(1), IngestFailureReason::RouterLoadShedding => { - ingest_results_metrics.router_load_shedding.inc() + ingest_results_metrics.router_load_shedding.increment(1) + } + IngestFailureReason::LoadShedding => { + ingest_results_metrics.load_shedding.increment(1) } - IngestFailureReason::LoadShedding => ingest_results_metrics.load_shedding.inc(), } } } @@ -531,43 +536,47 @@ fn update_ingest_metrics(ingest_result: &IngestV2Result, num_s RateLimitingCause::RouterLoadShedding => { ingest_results_metrics .router_load_shedding - .inc_by(num_subrequests); - } - RateLimitingCause::LoadShedding => { - ingest_results_metrics.load_shedding.inc_by(num_subrequests) + .increment(num_subrequests); } + RateLimitingCause::LoadShedding => ingest_results_metrics + .load_shedding + .increment(num_subrequests), RateLimitingCause::WalFull => { - ingest_results_metrics.wal_full.inc_by(num_subrequests); + ingest_results_metrics.wal_full.increment(num_subrequests); } RateLimitingCause::CircuitBreaker => { ingest_results_metrics .circuit_breaker - .inc_by(num_subrequests); + .increment(num_subrequests); } RateLimitingCause::ShardRateLimiting => { ingest_results_metrics .shard_rate_limited - .inc_by(num_subrequests); + .increment(num_subrequests); } RateLimitingCause::Unknown => { - ingest_results_metrics.unspecified.inc_by(num_subrequests); + ingest_results_metrics + .unspecified + .increment(num_subrequests); } }, IngestV2Error::Timeout(_) => { ingest_results_metrics .router_timeout - .inc_by(num_subrequests); + .increment(num_subrequests); } IngestV2Error::ShardNotFound { .. } => { ingest_results_metrics .shard_not_found - .inc_by(num_subrequests); + .increment(num_subrequests); } IngestV2Error::Unavailable(_) => { - ingest_results_metrics.unavailable.inc_by(num_subrequests); + ingest_results_metrics + .unavailable + .increment(num_subrequests); } IngestV2Error::Internal(_) => { - ingest_results_metrics.internal.inc_by(num_subrequests); + ingest_results_metrics.internal.increment(num_subrequests); } }, } diff --git a/quickwit/quickwit-ingest/src/lib.rs b/quickwit/quickwit-ingest/src/lib.rs index f021f4888c1..734dade2e83 100644 --- a/quickwit/quickwit-ingest/src/lib.rs +++ b/quickwit/quickwit-ingest/src/lib.rs @@ -107,12 +107,15 @@ pub async fn start_ingest_api_service( #[macro_export] macro_rules! with_lock_metrics { - ($future:expr, $($label:tt),*) => { + ($future:expr, $operation:expr, $kind:expr) => { { - $crate::ingest_v2::metrics::INGEST_V2_METRICS - .wal_acquire_lock_requests_in_flight - .with_label_values([$($label),*]) - .inc(); + quickwit_common::metrics::gauge!( + parent: &$crate::ingest_v2::metrics::INGEST_V2_METRICS + .wal_acquire_lock_requests_in_flight, + "operation" => $operation, + "type" => $kind, + ) + .increment(1.0); let now = std::time::Instant::now(); let guard = $future; @@ -124,14 +127,20 @@ macro_rules! with_lock_metrics { "lock acquisition took {}ms", elapsed.as_millis() ); } - $crate::ingest_v2::metrics::INGEST_V2_METRICS - .wal_acquire_lock_requests_in_flight - .with_label_values([$($label),*]) - .dec(); - $crate::ingest_v2::metrics::INGEST_V2_METRICS - .wal_acquire_lock_request_duration_secs - .with_label_values([$($label),*]) - .observe(elapsed.as_secs_f64()); + quickwit_common::metrics::gauge!( + parent: &$crate::ingest_v2::metrics::INGEST_V2_METRICS + .wal_acquire_lock_requests_in_flight, + "operation" => $operation, + "type" => $kind, + ) + .decrement(1.0); + quickwit_common::metrics::histogram!( + parent: &$crate::ingest_v2::metrics::INGEST_V2_METRICS + .wal_acquire_lock_request_duration_secs, + "operation" => $operation, + "type" => $kind, + ) + .record(elapsed.as_secs_f64()); guard } diff --git a/quickwit/quickwit-ingest/src/metrics.rs b/quickwit/quickwit-ingest/src/metrics.rs index 7b6888243e5..badcd9689e9 100644 --- a/quickwit/quickwit-ingest/src/metrics.rs +++ b/quickwit/quickwit-ingest/src/metrics.rs @@ -14,67 +14,66 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{IntCounter, IntGauge, new_counter, new_counter_vec, new_gauge}; +use quickwit_common::metrics::{Counter, Gauge, counter, gauge}; pub struct IngestMetrics { - pub ingested_docs_bytes_valid: IntCounter, - pub ingested_docs_bytes_invalid: IntCounter, - pub ingested_docs_invalid: IntCounter, - pub ingested_docs_valid: IntCounter, + pub docs_bytes_total: Counter, + pub docs_total: Counter, - pub replicated_num_bytes_total: IntCounter, - pub replicated_num_docs_total: IntCounter, + pub replicated_num_bytes_total: Counter, + pub replicated_num_docs_total: Counter, #[allow(dead_code)] // this really shouldn't be dead, it needs to be used somewhere - pub queue_count: IntGauge, + pub queue_count: Gauge, } -impl Default for IngestMetrics { - fn default() -> Self { - let ingest_docs_bytes_total = new_counter_vec( - "docs_bytes_total", - "Total size of the docs ingested, measured in ingester's leader, after validation and \ - before persistence/replication", - "ingest", - &[], - ["validity"], - ); - let ingested_docs_bytes_valid = ingest_docs_bytes_total.with_label_values(["valid"]); - let ingested_docs_bytes_invalid = ingest_docs_bytes_total.with_label_values(["invalid"]); +static DOCS_BYTES_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "docs_bytes_total", + description: "Total size of the docs ingested, measured in ingester's leader, after validation and before persistence/replication", + subsystem: "ingest", + ) +}); + +static DOCS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "docs_total", + description: "Total number of the docs ingested, measured in ingester's leader, after validation and before persistence/replication", + subsystem: "ingest", + ) +}); + +static REPLICATED_NUM_BYTES_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "replicated_num_bytes_total", + description: "Total size in bytes of the replicated docs.", + subsystem: "ingest", + ) +}); - let ingest_docs_total = new_counter_vec( - "docs_total", - "Total number of the docs ingested, measured in ingester's leader, after validation \ - and before persistence/replication", - "ingest", - &[], - ["validity"], - ); - let ingested_docs_valid = ingest_docs_total.with_label_values(["valid"]); - let ingested_docs_invalid = ingest_docs_total.with_label_values(["invalid"]); +static REPLICATED_NUM_DOCS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "replicated_num_docs_total", + description: "Total number of docs replicated.", + subsystem: "ingest", + ) +}); +static QUEUE_COUNT: LazyLock = LazyLock::new(|| { + gauge!( + name: "queue_count", + description: "Number of queues currently active", + subsystem: "ingest", + ) +}); + +impl Default for IngestMetrics { + fn default() -> Self { IngestMetrics { - ingested_docs_bytes_valid, - ingested_docs_bytes_invalid, - ingested_docs_valid, - ingested_docs_invalid, - replicated_num_bytes_total: new_counter( - "replicated_num_bytes_total", - "Total size in bytes of the replicated docs.", - "ingest", - &[], - ), - replicated_num_docs_total: new_counter( - "replicated_num_docs_total", - "Total number of docs replicated.", - "ingest", - &[], - ), - queue_count: new_gauge( - "queue_count", - "Number of queues currently active", - "ingest", - &[], - ), + docs_bytes_total: DOCS_BYTES_TOTAL.clone(), + docs_total: DOCS_TOTAL.clone(), + replicated_num_bytes_total: REPLICATED_NUM_BYTES_TOTAL.clone(), + replicated_num_docs_total: REPLICATED_NUM_DOCS_TOTAL.clone(), + queue_count: QUEUE_COUNT.clone(), } } } diff --git a/quickwit/quickwit-jaeger/src/lib.rs b/quickwit/quickwit-jaeger/src/lib.rs index 1b6dfc27d0c..7fba262ee55 100644 --- a/quickwit/quickwit-jaeger/src/lib.rs +++ b/quickwit/quickwit-jaeger/src/lib.rs @@ -21,6 +21,7 @@ use std::time::Instant; use itertools::{Either, Itertools}; use prost::Message; use prost_types::{Duration as WellKnownDuration, Timestamp as WellKnownTimestamp}; +use quickwit_common::metrics::{counter, histogram}; use quickwit_config::JaegerConfig; use quickwit_opentelemetry::otlp::{ Event as QwEvent, Link as QwLink, OTEL_TRACES_INDEX_ID, Span as QwSpan, SpanFingerprint, @@ -415,43 +416,57 @@ impl JaegerService { current_span.record("num_spans", num_spans_total); current_span.record("num_bytes", num_bytes_total); - JAEGER_SERVICE_METRICS - .fetched_traces_total - .with_label_values([operation_name, OTEL_TRACES_INDEX_ID]) - .inc_by(num_traces); + counter!( + parent: &JAEGER_SERVICE_METRICS.fetched_traces_total, + "operation" => operation_name, + "index" => OTEL_TRACES_INDEX_ID, + ) + .increment(num_traces); let elapsed = request_start.elapsed().as_secs_f64(); - JAEGER_SERVICE_METRICS - .request_duration_seconds - .with_label_values([operation_name, OTEL_TRACES_INDEX_ID, "false"]) - .observe(elapsed); + histogram!( + parent: &JAEGER_SERVICE_METRICS.request_duration_seconds, + "operation" => operation_name, + "index" => OTEL_TRACES_INDEX_ID, + "error" => "false", + ) + .record(elapsed); }); Ok(ReceiverStream::new(rx)) } } pub(crate) fn record_error(operation_name: &'static str, request_start: Instant) { - JAEGER_SERVICE_METRICS - .request_errors_total - .with_label_values([operation_name, OTEL_TRACES_INDEX_ID]) - .inc(); + counter!( + parent: &JAEGER_SERVICE_METRICS.request_errors_total, + "operation" => operation_name, + "index" => OTEL_TRACES_INDEX_ID, + ) + .increment(1); let elapsed = request_start.elapsed().as_secs_f64(); - JAEGER_SERVICE_METRICS - .request_duration_seconds - .with_label_values([operation_name, OTEL_TRACES_INDEX_ID, "true"]) - .observe(elapsed); + histogram!( + parent: &JAEGER_SERVICE_METRICS.request_duration_seconds, + "operation" => operation_name, + "index" => OTEL_TRACES_INDEX_ID, + "error" => "true", + ) + .record(elapsed); } pub(crate) fn record_send(operation_name: &'static str, num_spans: usize, num_bytes: usize) { - JAEGER_SERVICE_METRICS - .fetched_spans_total - .with_label_values([operation_name, OTEL_TRACES_INDEX_ID]) - .inc_by(num_spans as u64); - JAEGER_SERVICE_METRICS - .transferred_bytes_total - .with_label_values([operation_name, OTEL_TRACES_INDEX_ID]) - .inc_by(num_bytes as u64); + counter!( + parent: &JAEGER_SERVICE_METRICS.fetched_spans_total, + "operation" => operation_name, + "index" => OTEL_TRACES_INDEX_ID, + ) + .increment(num_spans as u64); + counter!( + parent: &JAEGER_SERVICE_METRICS.transferred_bytes_total, + "operation" => operation_name, + "index" => OTEL_TRACES_INDEX_ID, + ) + .increment(num_bytes as u64); } #[allow(deprecated)] diff --git a/quickwit/quickwit-jaeger/src/metrics.rs b/quickwit/quickwit-jaeger/src/metrics.rs index 3095b68b59f..0761d4ab018 100644 --- a/quickwit/quickwit-jaeger/src/metrics.rs +++ b/quickwit/quickwit-jaeger/src/metrics.rs @@ -14,65 +14,75 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{ - HistogramVec, IntCounterVec, exponential_buckets, new_counter_vec, new_histogram_vec, -}; +use quickwit_common::metrics::{Counter, Histogram, counter, exponential_buckets, histogram}; pub struct JaegerServiceMetrics { - pub requests_total: IntCounterVec<2>, - pub request_errors_total: IntCounterVec<2>, - pub request_duration_seconds: HistogramVec<3>, - pub fetched_traces_total: IntCounterVec<2>, - pub fetched_spans_total: IntCounterVec<2>, - pub transferred_bytes_total: IntCounterVec<2>, + pub requests_total: Counter, + pub request_errors_total: Counter, + pub request_duration_seconds: Histogram, + pub fetched_traces_total: Counter, + pub fetched_spans_total: Counter, + pub transferred_bytes_total: Counter, } +static REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "requests_total", + description: "Number of requests", + subsystem: "jaeger", + ) +}); + +static REQUEST_ERRORS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "request_errors_total", + description: "Number of failed requests", + subsystem: "jaeger", + ) +}); + +static REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { + histogram!( + name: "request_duration_seconds", + description: "Duration of requests", + subsystem: "jaeger", + buckets: exponential_buckets(0.02, 2.0, 8).unwrap(), + ) +}); + +static FETCHED_TRACES_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "fetched_traces_total", + description: "Number of traces retrieved from storage", + subsystem: "jaeger", + ) +}); + +static FETCHED_SPANS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "fetched_spans_total", + description: "Number of spans retrieved from storage", + subsystem: "jaeger", + ) +}); + +static TRANSFERRED_BYTES_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "transferred_bytes_total", + description: "Number of bytes transferred", + subsystem: "jaeger", + ) +}); + impl Default for JaegerServiceMetrics { fn default() -> Self { Self { - requests_total: new_counter_vec( - "requests_total", - "Number of requests", - "jaeger", - &[], - ["operation", "index"], - ), - request_errors_total: new_counter_vec( - "request_errors_total", - "Number of failed requests", - "jaeger", - &[], - ["operation", "index"], - ), - request_duration_seconds: new_histogram_vec( - "request_duration_seconds", - "Duration of requests", - "jaeger", - &[], - ["operation", "index", "error"], - exponential_buckets(0.02, 2.0, 8).unwrap(), - ), - fetched_traces_total: new_counter_vec( - "fetched_traces_total", - "Number of traces retrieved from storage", - "jaeger", - &[], - ["operation", "index"], - ), - fetched_spans_total: new_counter_vec( - "fetched_spans_total", - "Number of spans retrieved from storage", - "jaeger", - &[], - ["operation", "index"], - ), - transferred_bytes_total: new_counter_vec( - "transferred_bytes_total", - "Number of bytes transferred", - "jaeger", - &[], - ["operation", "index"], - ), + requests_total: REQUESTS_TOTAL.clone(), + request_errors_total: REQUEST_ERRORS_TOTAL.clone(), + request_duration_seconds: REQUEST_DURATION_SECONDS.clone(), + fetched_traces_total: FETCHED_TRACES_TOTAL.clone(), + fetched_spans_total: FETCHED_SPANS_TOTAL.clone(), + transferred_bytes_total: TRANSFERRED_BYTES_TOTAL.clone(), } } } diff --git a/quickwit/quickwit-jaeger/src/v1.rs b/quickwit/quickwit-jaeger/src/v1.rs index 11d6935db4e..9634d58857e 100644 --- a/quickwit/quickwit-jaeger/src/v1.rs +++ b/quickwit/quickwit-jaeger/src/v1.rs @@ -17,6 +17,7 @@ use std::time::Instant; use async_trait::async_trait; +use quickwit_common::metrics::{counter, histogram}; use quickwit_opentelemetry::otlp::{ OTEL_TRACES_INDEX_ID, extract_otel_traces_index_id_patterns_from_metadata, }; @@ -31,22 +32,38 @@ use crate::metrics::JAEGER_SERVICE_METRICS; use crate::{JaegerService, SpanStream}; macro_rules! metrics { - ($expr:expr, [$operation:ident, $($label:expr),*]) => { + ($expr:expr, [$operation:ident, $index:expr]) => { let start = std::time::Instant::now(); - let labels = [stringify!($operation), $($label,)*]; - JAEGER_SERVICE_METRICS.requests_total.with_label_values(labels).inc(); + let operation = stringify!($operation); + let index = $index; + counter!( + parent: &JAEGER_SERVICE_METRICS.requests_total, + "operation" => operation, + "index" => index, + ) + .increment(1); let (res, is_error) = match $expr { ok @ Ok(_) => { (ok, "false") }, err @ Err(_) => { - JAEGER_SERVICE_METRICS.request_errors_total.with_label_values(labels).inc(); + counter!( + parent: &JAEGER_SERVICE_METRICS.request_errors_total, + "operation" => operation, + "index" => index, + ) + .increment(1); (err, "true") }, }; let elapsed = start.elapsed().as_secs_f64(); - let labels = [stringify!($operation), $($label,)* is_error]; - JAEGER_SERVICE_METRICS.request_duration_seconds.with_label_values(labels).observe(elapsed); + histogram!( + parent: &JAEGER_SERVICE_METRICS.request_duration_seconds, + "operation" => operation, + "index" => index, + "error" => is_error, + ) + .record(elapsed); return res.map(Response::new); }; diff --git a/quickwit/quickwit-jaeger/src/v2.rs b/quickwit/quickwit-jaeger/src/v2.rs index e355c18a8c3..1a9af33a702 100644 --- a/quickwit/quickwit-jaeger/src/v2.rs +++ b/quickwit/quickwit-jaeger/src/v2.rs @@ -19,6 +19,7 @@ use std::time::Instant; use async_trait::async_trait; use prost_types::Timestamp as WellKnownTimestamp; +use quickwit_common::metrics::{counter, histogram}; use quickwit_opentelemetry::otlp::{ OTEL_TRACES_INDEX_ID, Span as QwSpan, TraceId, extract_otel_traces_index_id_patterns_from_metadata, @@ -57,22 +58,38 @@ use crate::{ }; macro_rules! metrics { - ($expr:expr, [$operation:ident, $($label:expr),*]) => { + ($expr:expr, [$operation:ident, $index:expr]) => { let start = std::time::Instant::now(); - let labels = [stringify!($operation), $($label,)*]; - JAEGER_SERVICE_METRICS.requests_total.with_label_values(labels).inc(); + let operation = stringify!($operation); + let index = $index; + counter!( + parent: &JAEGER_SERVICE_METRICS.requests_total, + "operation" => operation, + "index" => index, + ) + .increment(1); let (res, is_error) = match $expr { ok @ Ok(_) => { (ok, "false") }, err @ Err(_) => { - JAEGER_SERVICE_METRICS.request_errors_total.with_label_values(labels).inc(); + counter!( + parent: &JAEGER_SERVICE_METRICS.request_errors_total, + "operation" => operation, + "index" => index, + ) + .increment(1); (err, "true") }, }; let elapsed = start.elapsed().as_secs_f64(); - let labels = [stringify!($operation), $($label,)* is_error]; - JAEGER_SERVICE_METRICS.request_duration_seconds.with_label_values(labels).observe(elapsed); + histogram!( + parent: &JAEGER_SERVICE_METRICS.request_duration_seconds, + "operation" => operation, + "index" => index, + "error" => is_error, + ) + .record(elapsed); return res.map(Response::new); }; @@ -426,16 +443,21 @@ async fn stream_otel_spans_impl( record_send(operation_name, num_spans, num_bytes); - JAEGER_SERVICE_METRICS - .fetched_traces_total - .with_label_values([operation_name, OTEL_TRACES_INDEX_ID]) - .inc_by(trace_ids.len() as u64); + counter!( + parent: &JAEGER_SERVICE_METRICS.fetched_traces_total, + "operation" => operation_name, + "index" => OTEL_TRACES_INDEX_ID, + ) + .increment(trace_ids.len() as u64); let elapsed = request_start.elapsed().as_secs_f64(); - JAEGER_SERVICE_METRICS - .request_duration_seconds - .with_label_values([operation_name, OTEL_TRACES_INDEX_ID, "false"]) - .observe(elapsed); + histogram!( + parent: &JAEGER_SERVICE_METRICS.request_duration_seconds, + "operation" => operation_name, + "index" => OTEL_TRACES_INDEX_ID, + "error" => "false", + ) + .record(elapsed); Ok(qw_spans) } diff --git a/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs b/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs index 5e08b7773e6..1372b6c7eca 100644 --- a/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs +++ b/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs @@ -21,6 +21,7 @@ use async_trait::async_trait; use itertools::Itertools; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, Mailbox, QueueCapacity}; use quickwit_common::extract_time_range; +use quickwit_common::metrics::gauge; use quickwit_common::uri::Uri; use quickwit_doc_mapper::tag_pruning::extract_tags_from_query; use quickwit_indexing::actors::{MergeSchedulerService, MergeSplitDownloader, schedule_merge}; @@ -205,11 +206,13 @@ impl DeleteTaskPlanner { ) .await?; let index_label = - quickwit_common::metrics::index_label(self.index_uid.index_id.as_str()); - JANITOR_METRICS - .ongoing_num_delete_operations_total - .with_label_values([index_label]) - .set(self.ongoing_delete_operations_inventory.list().len() as i64); + quickwit_common::metrics::index_label(self.index_uid.index_id.as_str()) + .to_string(); + gauge!( + parent: &JANITOR_METRICS.ongoing_num_delete_operations_total, + "index" => index_label, + ) + .set(self.ongoing_delete_operations_inventory.list().len() as f64); } } diff --git a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs index 21411bb0192..1a5fedbb51f 100644 --- a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs +++ b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs @@ -20,6 +20,7 @@ use async_trait::async_trait; use futures::{StreamExt, stream}; use quickwit_actors::{Actor, ActorContext, Handler}; use quickwit_common::is_parquet_pipeline_index; +use quickwit_common::metrics::counter; use quickwit_common::shared_consts::split_deletion_grace_period; use quickwit_index_management::{GcMetrics, run_garbage_collect, run_parquet_garbage_collect}; use quickwit_metastore::ListIndexesMetadataResponseExt; @@ -55,19 +56,22 @@ impl GcRunResult { } fn gc_metrics(split_type: &str) -> GcMetrics { + let split_type = split_type.to_string(); GcMetrics { - deleted_splits: JANITOR_METRICS - .gc_deleted_splits - .with_label_values(["success", split_type]) - .clone(), - deleted_bytes: JANITOR_METRICS - .gc_deleted_bytes - .with_label_values([split_type]) - .clone(), - failed_splits: JANITOR_METRICS - .gc_deleted_splits - .with_label_values(["error", split_type]) - .clone(), + deleted_splits: counter!( + parent: &JANITOR_METRICS.gc_deleted_splits, + "result" => "success", + "split_type" => split_type.clone(), + ), + deleted_bytes: counter!( + parent: &JANITOR_METRICS.gc_deleted_bytes, + "split_type" => split_type.clone(), + ), + failed_splits: counter!( + parent: &JANITOR_METRICS.gc_deleted_splits, + "result" => "error", + "split_type" => split_type, + ), } } @@ -202,18 +206,21 @@ impl GarbageCollector { .await; let tantivy_run_duration = tantivy_start.elapsed().as_secs(); - JANITOR_METRICS - .gc_seconds_total - .with_label_values(["tantivy"]) - .inc_by(tantivy_run_duration); + counter!( + parent: &JANITOR_METRICS.gc_seconds_total, + "split_type" => "tantivy", + ) + .increment(tantivy_run_duration); let result = match gc_res { Ok(removal_info) => { self.counters.num_successful_gc_run += 1; - JANITOR_METRICS - .gc_runs - .with_label_values(["success", "tantivy"]) - .inc(); + counter!( + parent: &JANITOR_METRICS.gc_runs, + "result" => "success", + "split_type" => "tantivy", + ) + .increment(1); GcRunResult { num_deleted_splits: removal_info.removed_split_entries.len(), num_deleted_bytes: removal_info @@ -232,10 +239,12 @@ impl GarbageCollector { } Err(error) => { self.counters.num_failed_gc_run += 1; - JANITOR_METRICS - .gc_runs - .with_label_values(["error", "tantivy"]) - .inc(); + counter!( + parent: &JANITOR_METRICS.gc_runs, + "result" => "error", + "split_type" => "tantivy", + ) + .increment(1); error!(error=?error, "failed to run garbage collection"); GcRunResult::failed() } @@ -258,18 +267,21 @@ impl GarbageCollector { .await; let parquet_run_duration = parquet_start.elapsed().as_secs(); - JANITOR_METRICS - .gc_seconds_total - .with_label_values(["parquet"]) - .inc_by(parquet_run_duration); + counter!( + parent: &JANITOR_METRICS.gc_seconds_total, + "split_type" => "parquet", + ) + .increment(parquet_run_duration); let result = match gc_res { Ok(removal_info) => { self.counters.num_successful_gc_run += 1; - JANITOR_METRICS - .gc_runs - .with_label_values(["success", "parquet"]) - .inc(); + counter!( + parent: &JANITOR_METRICS.gc_runs, + "result" => "success", + "split_type" => "parquet", + ) + .increment(1); GcRunResult { num_deleted_splits: removal_info.removed_split_count(), num_deleted_bytes: removal_info.removed_bytes() as usize, @@ -284,10 +296,12 @@ impl GarbageCollector { } Err(error) => { self.counters.num_failed_gc_run += 1; - JANITOR_METRICS - .gc_runs - .with_label_values(["error", "parquet"]) - .inc(); + counter!( + parent: &JANITOR_METRICS.gc_runs, + "result" => "error", + "split_type" => "parquet", + ) + .increment(1); error!(error=?error, "failed to run parquet garbage collection"); GcRunResult::failed() } diff --git a/quickwit/quickwit-janitor/src/metrics.rs b/quickwit/quickwit-janitor/src/metrics.rs index aeea26c2674..55327747868 100644 --- a/quickwit/quickwit-janitor/src/metrics.rs +++ b/quickwit/quickwit-janitor/src/metrics.rs @@ -14,54 +14,64 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{IntCounterVec, IntGaugeVec, new_counter_vec, new_gauge_vec}; +use quickwit_common::metrics::{Counter, Gauge, counter, gauge}; pub struct JanitorMetrics { - pub ongoing_num_delete_operations_total: IntGaugeVec<1>, - pub gc_deleted_splits: IntCounterVec<2>, - pub gc_deleted_bytes: IntCounterVec<1>, - pub gc_runs: IntCounterVec<2>, - pub gc_seconds_total: IntCounterVec<1>, + pub ongoing_num_delete_operations_total: Gauge, + pub gc_deleted_splits: Counter, + pub gc_deleted_bytes: Counter, + pub gc_runs: Counter, + pub gc_seconds_total: Counter, } +static ONGOING_NUM_DELETE_OPERATIONS_TOTAL: LazyLock = LazyLock::new(|| { + gauge!( + name: "ongoing_num_delete_operations_total", + description: "Num of ongoing delete operations (per index).", + subsystem: "quickwit_janitor", + ) +}); + +static GC_DELETED_SPLITS: LazyLock = LazyLock::new(|| { + counter!( + name: "gc_deleted_splits_total", + description: "Total number of splits deleted by the garbage collector.", + subsystem: "quickwit_janitor", + ) +}); + +static GC_DELETED_BYTES: LazyLock = LazyLock::new(|| { + counter!( + name: "gc_deleted_bytes_total", + description: "Total number of bytes deleted by the garbage collector.", + subsystem: "quickwit_janitor", + ) +}); + +static GC_RUNS: LazyLock = LazyLock::new(|| { + counter!( + name: "gc_runs_total", + description: "Total number of garbage collector execition.", + subsystem: "quickwit_janitor", + ) +}); + +static GC_SECONDS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "gc_seconds_total", + description: "Total time spent running the garbage collector", + subsystem: "quickwit_janitor", + ) +}); + impl Default for JanitorMetrics { fn default() -> Self { JanitorMetrics { - ongoing_num_delete_operations_total: new_gauge_vec( - "ongoing_num_delete_operations_total", - "Num of ongoing delete operations (per index).", - "quickwit_janitor", - &[], - ["index"], - ), - gc_deleted_splits: new_counter_vec( - "gc_deleted_splits_total", - "Total number of splits deleted by the garbage collector.", - "quickwit_janitor", - &[], - ["result", "split_type"], - ), - gc_deleted_bytes: new_counter_vec( - "gc_deleted_bytes_total", - "Total number of bytes deleted by the garbage collector.", - "quickwit_janitor", - &[], - ["split_type"], - ), - gc_runs: new_counter_vec( - "gc_runs_total", - "Total number of garbage collector execition.", - "quickwit_janitor", - &[], - ["result", "split_type"], - ), - gc_seconds_total: new_counter_vec( - "gc_seconds_total", - "Total time spent running the garbage collector", - "quickwit_janitor", - &[], - ["split_type"], - ), + ongoing_num_delete_operations_total: ONGOING_NUM_DELETE_OPERATIONS_TOTAL.clone(), + gc_deleted_splits: GC_DELETED_SPLITS.clone(), + gc_deleted_bytes: GC_DELETED_BYTES.clone(), + gc_runs: GC_RUNS.clone(), + gc_seconds_total: GC_SECONDS_TOTAL.clone(), } } } diff --git a/quickwit/quickwit-lambda-client/src/invoker.rs b/quickwit/quickwit-lambda-client/src/invoker.rs index c8ffa0716a0..42cae2811f0 100644 --- a/quickwit/quickwit-lambda-client/src/invoker.rs +++ b/quickwit/quickwit-lambda-client/src/invoker.rs @@ -23,6 +23,7 @@ use aws_sdk_lambda::primitives::Blob; use aws_sdk_lambda::types::InvocationType; use base64::prelude::*; use prost::Message; +use quickwit_common::metrics::{counter, histogram}; use quickwit_common::retry::RetryParams; use quickwit_lambda_server::{LambdaSearchRequestPayload, LambdaSearchResponsePayload}; use quickwit_proto::search::{LambdaSearchResponses, LambdaSingleSplitResult, LeafSearchRequest}; @@ -171,14 +172,16 @@ impl LambdaLeafSearchInvoker for AwsLambdaInvoker { let result = self.invoke_leaf_search_with_retry(request).await; let elapsed = start.elapsed().as_secs_f64(); let status = if result.is_ok() { "success" } else { "error" }; - LAMBDA_METRICS - .leaf_search_requests_total - .with_label_values([status]) - .inc(); - LAMBDA_METRICS - .leaf_search_duration_seconds - .with_label_values([status]) - .observe(elapsed); + counter!( + parent: &LAMBDA_METRICS.leaf_search_requests_total, + "status" => status, + ) + .increment(1); + histogram!( + parent: &LAMBDA_METRICS.leaf_search_duration_seconds, + "status" => status, + ) + .record(elapsed); result } } @@ -234,7 +237,7 @@ impl AwsLambdaInvoker { LAMBDA_METRICS .leaf_search_request_payload_size_bytes - .observe(payload_json.len() as f64); + .record(payload_json.len() as f64); debug!( payload_size = payload_json.len(), @@ -276,7 +279,7 @@ impl AwsLambdaInvoker { LAMBDA_METRICS .leaf_search_response_payload_size_bytes - .observe(response_payload.as_ref().len() as f64); + .record(response_payload.as_ref().len() as f64); let lambda_response: LambdaSearchResponsePayload = serde_json::from_slice(response_payload.as_ref()) diff --git a/quickwit/quickwit-lambda-client/src/metrics.rs b/quickwit/quickwit-lambda-client/src/metrics.rs index f136e4249c1..300c4a560b6 100644 --- a/quickwit/quickwit-lambda-client/src/metrics.rs +++ b/quickwit/quickwit-lambda-client/src/metrics.rs @@ -16,10 +16,7 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{ - Histogram, HistogramVec, IntCounterVec, exponential_buckets, new_counter_vec, new_histogram, - new_histogram_vec, -}; +use quickwit_common::metrics::{Counter, Histogram, counter, exponential_buckets, histogram}; /// From 100ms to 73s seconds fn duration_buckets() -> Vec { @@ -32,42 +29,55 @@ fn payload_size_buckets() -> Vec { } pub struct LambdaMetrics { - pub leaf_search_requests_total: IntCounterVec<1>, - pub leaf_search_duration_seconds: HistogramVec<1>, + pub leaf_search_requests_total: Counter, + pub leaf_search_duration_seconds: Histogram, pub leaf_search_request_payload_size_bytes: Histogram, pub leaf_search_response_payload_size_bytes: Histogram, } +static LEAF_SEARCH_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "leaf_search_requests_total", + description: "Total number of Lambda leaf search invocations.", + subsystem: "lambda", + ) +}); + +static LEAF_SEARCH_DURATION_SECONDS: LazyLock = LazyLock::new(|| { + histogram!( + name: "leaf_search_duration_seconds", + description: "Duration of Lambda leaf search invocations in seconds.", + subsystem: "lambda", + buckets: duration_buckets(), + ) +}); + +static LEAF_SEARCH_REQUEST_PAYLOAD_SIZE_BYTES: LazyLock = LazyLock::new(|| { + histogram!( + name: "leaf_search_request_payload_size_bytes", + description: "Size of the request payload sent to Lambda in bytes.", + subsystem: "lambda", + buckets: payload_size_buckets(), + ) +}); + +static LEAF_SEARCH_RESPONSE_PAYLOAD_SIZE_BYTES: LazyLock = LazyLock::new(|| { + histogram!( + name: "leaf_search_response_payload_size_bytes", + description: "Size of the response payload received from Lambda in bytes.", + subsystem: "lambda", + buckets: payload_size_buckets(), + ) +}); + impl Default for LambdaMetrics { fn default() -> Self { LambdaMetrics { - leaf_search_requests_total: new_counter_vec( - "leaf_search_requests_total", - "Total number of Lambda leaf search invocations.", - "lambda", - &[], - ["status"], - ), - leaf_search_duration_seconds: new_histogram_vec( - "leaf_search_duration_seconds", - "Duration of Lambda leaf search invocations in seconds.", - "lambda", - &[], - ["status"], - duration_buckets(), - ), - leaf_search_request_payload_size_bytes: new_histogram( - "leaf_search_request_payload_size_bytes", - "Size of the request payload sent to Lambda in bytes.", - "lambda", - payload_size_buckets(), - ), - leaf_search_response_payload_size_bytes: new_histogram( - "leaf_search_response_payload_size_bytes", - "Size of the response payload received from Lambda in bytes.", - "lambda", - payload_size_buckets(), - ), + leaf_search_requests_total: LEAF_SEARCH_REQUESTS_TOTAL.clone(), + leaf_search_duration_seconds: LEAF_SEARCH_DURATION_SECONDS.clone(), + leaf_search_request_payload_size_bytes: LEAF_SEARCH_REQUEST_PAYLOAD_SIZE_BYTES.clone(), + leaf_search_response_payload_size_bytes: LEAF_SEARCH_RESPONSE_PAYLOAD_SIZE_BYTES + .clone(), } } } diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/metrics.rs b/quickwit/quickwit-metastore/src/metastore/postgres/metrics.rs index 59cea1db805..7e540b6cbcf 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/metrics.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/metrics.rs @@ -14,36 +14,45 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{IntGauge, new_gauge}; +use quickwit_common::metrics::{Gauge, gauge}; #[derive(Clone)] pub(super) struct PostgresMetrics { - pub acquire_connections: IntGauge, - pub active_connections: IntGauge, - pub idle_connections: IntGauge, + pub acquire_connections: Gauge, + pub active_connections: Gauge, + pub idle_connections: Gauge, } +static ACQUIRE_CONNECTIONS: LazyLock = LazyLock::new(|| { + gauge!( + name: "acquire_connections", + description: "Number of connections being acquired.", + subsystem: "metastore", + ) +}); + +static ACTIVE_CONNECTIONS: LazyLock = LazyLock::new(|| { + gauge!( + name: "active_connections", + description: "Number of active (used + idle) connections.", + subsystem: "metastore", + ) +}); + +static IDLE_CONNECTIONS: LazyLock = LazyLock::new(|| { + gauge!( + name: "idle_connections", + description: "Number of idle connections.", + subsystem: "metastore", + ) +}); + impl Default for PostgresMetrics { fn default() -> Self { Self { - acquire_connections: new_gauge( - "acquire_connections", - "Number of connections being acquired.", - "metastore", - &[], - ), - active_connections: new_gauge( - "active_connections", - "Number of active (used + idle) connections.", - "metastore", - &[], - ), - idle_connections: new_gauge( - "idle_connections", - "Number of idle connections.", - "metastore", - &[], - ), + acquire_connections: ACQUIRE_CONNECTIONS.clone(), + active_connections: ACTIVE_CONNECTIONS.clone(), + idle_connections: IDLE_CONNECTIONS.clone(), } } } diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs b/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs index a4c1e790e5b..9142c3cf87c 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs @@ -52,10 +52,10 @@ impl<'a, DB: Database> Acquire<'a> for &TrackedPool { POSTGRES_METRICS .active_connections - .set(self.inner_pool.size() as i64); + .set(self.inner_pool.size() as f64); POSTGRES_METRICS .idle_connections - .set(self.inner_pool.num_idle() as i64); + .set(self.inner_pool.num_idle() as f64); Box::pin(async move { let mut gauge_guard = GaugeGuard::from_gauge(&POSTGRES_METRICS.acquire_connections); diff --git a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs index 03e6f238e66..22b2eb747e8 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs @@ -16,6 +16,7 @@ use std::collections::HashMap; use async_trait::async_trait; use prost::Message; +use quickwit_common::metrics::{counter, histogram}; use quickwit_common::thread_pool::run_cpu_intensive; use quickwit_common::uri::Uri; use quickwit_config::{ConfigFormat, IndexConfig, load_index_config_from_user_config}; @@ -218,7 +219,6 @@ impl OtlpGrpcLogsService { &mut self, request: ExportLogsServiceRequest, index_id: IndexId, - labels: [&str; 4], ) -> Result { let ParsedLogRecords { doc_batch, @@ -238,16 +238,24 @@ impl OtlpGrpcLogsService { return Err(tonic::Status::internal(error_message)); } let num_bytes = doc_batch.num_bytes() as u64; - self.store_logs(index_id, doc_batch).await?; - - OTLP_SERVICE_METRICS - .ingested_log_records_total - .with_label_values(labels) - .inc_by(num_log_records); - OTLP_SERVICE_METRICS - .ingested_bytes_total - .with_label_values(labels) - .inc_by(num_bytes); + self.store_logs(index_id.clone(), doc_batch).await?; + + counter!( + parent: &OTLP_SERVICE_METRICS.ingested_log_records_total, + "service" => "logs", + "index" => index_id.clone(), + "transport" => "grpc", + "format" => "protobuf", + ) + .increment(num_log_records); + counter!( + parent: &OTLP_SERVICE_METRICS.ingested_bytes_total, + "service" => "logs", + "index" => index_id, + "transport" => "grpc", + "format" => "protobuf", + ) + .increment(num_bytes); let response = ExportLogsServiceResponse { // `rejected_log_records=0` and `error_message=""` is consided a "full" success. @@ -318,29 +326,38 @@ impl OtlpGrpcLogsService { ) -> Result { let start = std::time::Instant::now(); - let labels = ["logs", &index_id, "grpc", "protobuf"]; - - OTLP_SERVICE_METRICS - .requests_total - .with_label_values(labels) - .inc(); - let (export_res, is_error) = - match self.export_inner(request, index_id.clone(), labels).await { - ok @ Ok(_) => (ok, "false"), - err @ Err(_) => { - OTLP_SERVICE_METRICS - .request_errors_total - .with_label_values(labels) - .inc(); - (err, "true") - } - }; + counter!( + parent: &OTLP_SERVICE_METRICS.requests_total, + "service" => "logs", + "index" => index_id.clone(), + "transport" => "grpc", + "format" => "protobuf", + ) + .increment(1); + let (export_res, is_error) = match self.export_inner(request, index_id.clone()).await { + ok @ Ok(_) => (ok, "false"), + err @ Err(_) => { + counter!( + parent: &OTLP_SERVICE_METRICS.request_errors_total, + "service" => "logs", + "index" => index_id.clone(), + "transport" => "grpc", + "format" => "protobuf", + ) + .increment(1); + (err, "true") + } + }; let elapsed = start.elapsed().as_secs_f64(); - let labels = ["logs", &index_id, "grpc", "protobuf", is_error]; - OTLP_SERVICE_METRICS - .request_duration_seconds - .with_label_values(labels) - .observe(elapsed); + histogram!( + parent: &OTLP_SERVICE_METRICS.request_duration_seconds, + "service" => "logs", + "index" => index_id, + "transport" => "grpc", + "format" => "protobuf", + "error" => is_error, + ) + .record(elapsed); export_res } diff --git a/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs index 614630bfb68..a25aed52b7a 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs @@ -14,73 +14,85 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{ - HistogramVec, IntCounterVec, exponential_buckets, new_counter_vec, new_histogram_vec, -}; +use quickwit_common::metrics::{Counter, Histogram, counter, exponential_buckets, histogram}; pub struct OtlpServiceMetrics { - pub requests_total: IntCounterVec<4>, - pub request_errors_total: IntCounterVec<4>, - pub request_duration_seconds: HistogramVec<5>, - pub ingested_log_records_total: IntCounterVec<4>, - pub ingested_spans_total: IntCounterVec<4>, - pub ingested_data_points_total: IntCounterVec<4>, - pub ingested_bytes_total: IntCounterVec<4>, + pub requests_total: Counter, + pub request_errors_total: Counter, + pub request_duration_seconds: Histogram, + pub ingested_log_records_total: Counter, + pub ingested_spans_total: Counter, + pub ingested_data_points_total: Counter, + pub ingested_bytes_total: Counter, } +static REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "requests_total", + description: "Number of requests", + subsystem: "otlp", + ) +}); + +static REQUEST_ERRORS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "request_errors_total", + description: "Number of failed requests", + subsystem: "otlp", + ) +}); + +static REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { + histogram!( + name: "request_duration_seconds", + description: "Duration of requests", + subsystem: "otlp", + buckets: exponential_buckets(0.02, 2.0, 8).unwrap(), + ) +}); + +static INGESTED_LOG_RECORDS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "ingested_log_records_total", + description: "Number of log records ingested", + subsystem: "otlp", + ) +}); + +static INGESTED_SPANS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "ingested_spans_total", + description: "Number of spans ingested", + subsystem: "otlp", + ) +}); + +static INGESTED_DATA_POINTS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "ingested_data_points_total", + description: "Number of metric data points ingested", + subsystem: "otlp", + ) +}); + +static INGESTED_BYTES_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "ingested_bytes_total", + description: "Number of bytes ingested", + subsystem: "otlp", + ) +}); + impl Default for OtlpServiceMetrics { fn default() -> Self { Self { - requests_total: new_counter_vec( - "requests_total", - "Number of requests", - "otlp", - &[], - ["service", "index", "transport", "format"], - ), - request_errors_total: new_counter_vec( - "request_errors_total", - "Number of failed requests", - "otlp", - &[], - ["service", "index", "transport", "format"], - ), - request_duration_seconds: new_histogram_vec( - "request_duration_seconds", - "Duration of requests", - "otlp", - &[], - ["service", "index", "transport", "format", "error"], - exponential_buckets(0.02, 2.0, 8).unwrap(), - ), - ingested_log_records_total: new_counter_vec( - "ingested_log_records_total", - "Number of log records ingested", - "otlp", - &[], - ["service", "index", "transport", "format"], - ), - ingested_spans_total: new_counter_vec( - "ingested_spans_total", - "Number of spans ingested", - "otlp", - &[], - ["service", "index", "transport", "format"], - ), - ingested_data_points_total: new_counter_vec( - "ingested_data_points_total", - "Number of metric data points ingested", - "otlp", - &[], - ["service", "index", "transport", "format"], - ), - ingested_bytes_total: new_counter_vec( - "ingested_bytes_total", - "Number of bytes ingested", - "otlp", - &[], - ["service", "index", "transport", "format"], - ), + requests_total: REQUESTS_TOTAL.clone(), + request_errors_total: REQUEST_ERRORS_TOTAL.clone(), + request_duration_seconds: REQUEST_DURATION_SECONDS.clone(), + ingested_log_records_total: INGESTED_LOG_RECORDS_TOTAL.clone(), + ingested_spans_total: INGESTED_SPANS_TOTAL.clone(), + ingested_data_points_total: INGESTED_DATA_POINTS_TOTAL.clone(), + ingested_bytes_total: INGESTED_BYTES_TOTAL.clone(), } } } diff --git a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs index 6ede3ad0edf..9a0945fb8b0 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs @@ -15,6 +15,7 @@ use std::collections::HashMap; use async_trait::async_trait; +use quickwit_common::metrics::{counter, histogram}; use quickwit_common::thread_pool::run_cpu_intensive; use quickwit_common::uri::Uri; use quickwit_config::{ConfigFormat, IndexConfig, load_index_config_from_user_config}; @@ -204,7 +205,6 @@ impl OtlpGrpcMetricsService { &mut self, request: ExportMetricsServiceRequest, index_id: IndexId, - labels: [&str; 4], ) -> Result { let ParsedMetrics { doc_batch, @@ -234,16 +234,24 @@ impl OtlpGrpcMetricsService { } let num_bytes = doc_batch.num_bytes() as u64; - self.store_metrics(index_id, doc_batch).await?; - - OTLP_SERVICE_METRICS - .ingested_data_points_total - .with_label_values(labels) - .inc_by(num_data_points - num_parse_errors); - OTLP_SERVICE_METRICS - .ingested_bytes_total - .with_label_values(labels) - .inc_by(num_bytes); + self.store_metrics(index_id.clone(), doc_batch).await?; + + counter!( + parent: &OTLP_SERVICE_METRICS.ingested_data_points_total, + "service" => "metrics", + "index" => index_id.clone(), + "transport" => "grpc", + "format" => "protobuf", + ) + .increment(num_data_points - num_parse_errors); + counter!( + parent: &OTLP_SERVICE_METRICS.ingested_bytes_total, + "service" => "metrics", + "index" => index_id, + "transport" => "grpc", + "format" => "protobuf", + ) + .increment(num_bytes); let response = ExportMetricsServiceResponse { partial_success: Some(ExportMetricsPartialSuccess { @@ -332,31 +340,40 @@ impl OtlpGrpcMetricsService { ) -> Result { let start = std::time::Instant::now(); - let labels = ["metrics", &index_id, "grpc", "protobuf"]; - - OTLP_SERVICE_METRICS - .requests_total - .with_label_values(labels) - .inc(); - - let (export_res, is_error) = - match self.export_inner(request, index_id.clone(), labels).await { - ok @ Ok(_) => (ok, "false"), - err @ Err(_) => { - OTLP_SERVICE_METRICS - .request_errors_total - .with_label_values(labels) - .inc(); - (err, "true") - } - }; + counter!( + parent: &OTLP_SERVICE_METRICS.requests_total, + "service" => "metrics", + "index" => index_id.clone(), + "transport" => "grpc", + "format" => "protobuf", + ) + .increment(1); + + let (export_res, is_error) = match self.export_inner(request, index_id.clone()).await { + ok @ Ok(_) => (ok, "false"), + err @ Err(_) => { + counter!( + parent: &OTLP_SERVICE_METRICS.request_errors_total, + "service" => "metrics", + "index" => index_id.clone(), + "transport" => "grpc", + "format" => "protobuf", + ) + .increment(1); + (err, "true") + } + }; let elapsed = start.elapsed().as_secs_f64(); - let labels = ["metrics", &index_id, "grpc", "protobuf", is_error]; - OTLP_SERVICE_METRICS - .request_duration_seconds - .with_label_values(labels) - .observe(elapsed); + histogram!( + parent: &OTLP_SERVICE_METRICS.request_duration_seconds, + "service" => "metrics", + "index" => index_id, + "transport" => "grpc", + "format" => "protobuf", + "error" => is_error, + ) + .record(elapsed); export_res } diff --git a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs index 0414ee0527f..bb0925a8465 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs @@ -18,6 +18,7 @@ use std::str::FromStr; use async_trait::async_trait; use prost::Message; +use quickwit_common::metrics::{counter, histogram}; use quickwit_common::thread_pool::run_cpu_intensive; use quickwit_common::uri::Uri; use quickwit_config::{ConfigFormat, IndexConfig, load_index_config_from_user_config}; @@ -677,7 +678,6 @@ impl OtlpGrpcTracesService { &mut self, request: ExportTraceServiceRequest, index_id: IndexId, - labels: [&str; 4], ) -> Result { let ParsedSpans { doc_batch, @@ -700,16 +700,24 @@ impl OtlpGrpcTracesService { return Err(tonic::Status::internal(error_message)); } let num_bytes = doc_batch.num_bytes() as u64; - self.store_spans(index_id, doc_batch).await?; - - OTLP_SERVICE_METRICS - .ingested_spans_total - .with_label_values(labels) - .inc_by(num_spans); - OTLP_SERVICE_METRICS - .ingested_bytes_total - .with_label_values(labels) - .inc_by(num_bytes); + self.store_spans(index_id.clone(), doc_batch).await?; + + counter!( + parent: &OTLP_SERVICE_METRICS.ingested_spans_total, + "service" => "trace", + "index" => index_id.clone(), + "transport" => "grpc", + "format" => "protobuf", + ) + .increment(num_spans); + counter!( + parent: &OTLP_SERVICE_METRICS.ingested_bytes_total, + "service" => "trace", + "index" => index_id, + "transport" => "grpc", + "format" => "protobuf", + ) + .increment(num_bytes); let response = ExportTraceServiceResponse { // `rejected_spans=0` and `error_message=""` is considered a "full" success. @@ -780,29 +788,38 @@ impl OtlpGrpcTracesService { ) -> Result { let start = std::time::Instant::now(); - let labels = ["trace", &index_id, "grpc", "protobuf"]; - - OTLP_SERVICE_METRICS - .requests_total - .with_label_values(labels) - .inc(); - let (export_res, is_error) = - match self.export_inner(request, index_id.clone(), labels).await { - ok @ Ok(_) => (ok, "false"), - err @ Err(_) => { - OTLP_SERVICE_METRICS - .request_errors_total - .with_label_values(labels) - .inc(); - (err, "true") - } - }; + counter!( + parent: &OTLP_SERVICE_METRICS.requests_total, + "service" => "trace", + "index" => index_id.clone(), + "transport" => "grpc", + "format" => "protobuf", + ) + .increment(1); + let (export_res, is_error) = match self.export_inner(request, index_id.clone()).await { + ok @ Ok(_) => (ok, "false"), + err @ Err(_) => { + counter!( + parent: &OTLP_SERVICE_METRICS.request_errors_total, + "service" => "trace", + "index" => index_id.clone(), + "transport" => "grpc", + "format" => "protobuf", + ) + .increment(1); + (err, "true") + } + }; let elapsed = start.elapsed().as_secs_f64(); - let labels = ["trace", &index_id, "grpc", "protobuf", is_error]; - OTLP_SERVICE_METRICS - .request_duration_seconds - .with_label_values(labels) - .observe(elapsed); + histogram!( + parent: &OTLP_SERVICE_METRICS.request_duration_seconds, + "service" => "trace", + "index" => index_id, + "transport" => "grpc", + "format" => "protobuf", + "error" => is_error, + ) + .record(elapsed); export_res } diff --git a/quickwit/quickwit-parquet-engine/src/index/accumulator.rs b/quickwit/quickwit-parquet-engine/src/index/accumulator.rs index 9a76de7caaf..fb02f5b1c4d 100644 --- a/quickwit/quickwit-parquet-engine/src/index/accumulator.rs +++ b/quickwit/quickwit-parquet-engine/src/index/accumulator.rs @@ -89,10 +89,10 @@ impl ParquetBatchAccumulator { let batch_bytes = estimate_batch_bytes(&batch); // Record index metrics - PARQUET_ENGINE_METRICS.index_batches_total.inc(); + PARQUET_ENGINE_METRICS.index_batches_total.increment(1); PARQUET_ENGINE_METRICS .index_rows_total - .inc_by(batch_rows as u64); + .increment(batch_rows as u64); // Merge fields into union schema before pushing (we need the schema reference) for field in batch.schema().fields() { @@ -129,7 +129,7 @@ impl ParquetBatchAccumulator { // Record batch processing duration PARQUET_ENGINE_METRICS .index_batch_duration_seconds - .observe(start.elapsed().as_secs_f64()); + .record(start.elapsed().as_secs_f64()); Ok(flushed) } diff --git a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs index eff3e30c676..3edc740df2b 100644 --- a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs +++ b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs @@ -18,6 +18,7 @@ use std::io::Cursor; use arrow::ipc::reader::StreamReader; use arrow::record_batch::RecordBatch; +use quickwit_common::metrics::counter; use tracing::{debug, instrument, warn}; use crate::metrics::PARQUET_ENGINE_METRICS; @@ -63,27 +64,32 @@ impl ParquetIngestProcessor { #[instrument(skip(self, ipc_bytes), fields(bytes_len = ipc_bytes.len()))] pub fn process_ipc(&self, ipc_bytes: &[u8]) -> Result { // Record bytes ingested - PARQUET_ENGINE_METRICS - .ingest_bytes_total - .with_label_values(["points"]) - .inc_by(ipc_bytes.len() as u64); + counter!( + parent: &PARQUET_ENGINE_METRICS.ingest_bytes_total, + "kind" => "points", + ) + .increment(ipc_bytes.len() as u64); let batch = match ipc_to_record_batch(ipc_bytes) { Ok(batch) => batch, Err(e) => { - PARQUET_ENGINE_METRICS - .errors_total - .with_label_values(["ingest", "points"]) - .inc(); + counter!( + parent: &PARQUET_ENGINE_METRICS.errors_total, + "operation" => "ingest", + "kind" => "points", + ) + .increment(1); return Err(e); } }; if let Err(e) = self.validate_schema(&batch) { - PARQUET_ENGINE_METRICS - .errors_total - .with_label_values(["ingest", "points"]) - .inc(); + counter!( + parent: &PARQUET_ENGINE_METRICS.errors_total, + "operation" => "ingest", + "kind" => "points", + ) + .increment(1); return Err(e); } diff --git a/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs b/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs index ac62fb191c8..65aaf9f6bb9 100644 --- a/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs +++ b/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs @@ -16,6 +16,7 @@ use arrow::array::AsArray; use arrow::record_batch::RecordBatch; +use quickwit_common::metrics::counter; use tracing::{debug, instrument, warn}; use super::processor::IngestError; @@ -41,35 +42,42 @@ impl SketchParquetIngestProcessor { /// sketch arrays are inconsistent. #[instrument(skip(self, ipc_bytes), fields(bytes_len = ipc_bytes.len()))] pub fn process_ipc(&self, ipc_bytes: &[u8]) -> Result { - PARQUET_ENGINE_METRICS - .ingest_bytes_total - .with_label_values(["sketches"]) - .inc_by(ipc_bytes.len() as u64); + counter!( + parent: &PARQUET_ENGINE_METRICS.ingest_bytes_total, + "kind" => "sketches", + ) + .increment(ipc_bytes.len() as u64); let batch = match super::processor::ipc_to_record_batch(ipc_bytes) { Ok(batch) => batch, Err(err) => { - PARQUET_ENGINE_METRICS - .errors_total - .with_label_values(["ingest", "sketches"]) - .inc(); + counter!( + parent: &PARQUET_ENGINE_METRICS.errors_total, + "operation" => "ingest", + "kind" => "sketches", + ) + .increment(1); return Err(err); } }; if let Err(err) = self.validate_schema(&batch) { - PARQUET_ENGINE_METRICS - .errors_total - .with_label_values(["ingest", "sketches"]) - .inc(); + counter!( + parent: &PARQUET_ENGINE_METRICS.errors_total, + "operation" => "ingest", + "kind" => "sketches", + ) + .increment(1); return Err(err); } if let Err(err) = self.validate_sketch_arrays(&batch) { - PARQUET_ENGINE_METRICS - .errors_total - .with_label_values(["ingest", "sketches"]) - .inc(); + counter!( + parent: &PARQUET_ENGINE_METRICS.errors_total, + "operation" => "ingest", + "kind" => "sketches", + ) + .increment(1); return Err(err); } diff --git a/quickwit/quickwit-parquet-engine/src/metrics.rs b/quickwit/quickwit-parquet-engine/src/metrics.rs index 64ff447d851..8d79bbb2ca4 100644 --- a/quickwit/quickwit-parquet-engine/src/metrics.rs +++ b/quickwit/quickwit-parquet-engine/src/metrics.rs @@ -19,12 +19,7 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{ - Histogram, IntCounter, IntCounterVec, new_counter, new_counter_vec, new_histogram, -}; - -/// Subsystem name for all metrics engine metrics. -const SUBSYSTEM: &str = "metrics_engine"; +use quickwit_common::metrics::{Counter, Histogram, counter, histogram}; /// Histogram buckets for duration measurements (in seconds). /// Covers sub-millisecond to multi-second operations. @@ -38,85 +33,112 @@ fn duration_buckets() -> Vec { #[derive(Clone)] pub struct ParquetEngineMetrics { /// Total number of batches accumulated during indexing. - pub index_batches_total: IntCounter, + pub index_batches_total: Counter, /// Total number of rows accumulated during indexing. - pub index_rows_total: IntCounter, + pub index_rows_total: Counter, /// Total number of bytes received from IPC payloads during ingestion, by kind /// (points/sketches). - pub ingest_bytes_total: IntCounterVec<1>, + pub ingest_bytes_total: Counter, /// Histogram of add_batch durations (seconds), including any triggered flush. pub index_batch_duration_seconds: Histogram, /// Total number of splits written to storage. - pub splits_written_total: IntCounter, + pub splits_written_total: Counter, /// Total bytes written to split files. - pub splits_bytes_written: IntCounter, + pub splits_bytes_written: Counter, /// Histogram of query execution durations (seconds). pub query_duration_seconds: Histogram, /// Total number of rows returned from queries. - pub query_rows_returned: IntCounter, + pub query_rows_returned: Counter, /// Errors by operation type and kind (points/sketches). - pub errors_total: IntCounterVec<2>, + pub errors_total: Counter, } +static INDEX_BATCHES_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "index_batches_total", + description: "Total number of batches accumulated during indexing.", + subsystem: "metrics_engine", + ) +}); + +static INDEX_ROWS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "index_rows_total", + description: "Total number of rows accumulated during indexing.", + subsystem: "metrics_engine", + ) +}); + +static INGEST_BYTES_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "ingest_bytes_total", + description: "Total number of bytes received from IPC payloads during ingestion.", + subsystem: "metrics_engine", + ) +}); + +static INDEX_BATCH_DURATION_SECONDS: LazyLock = LazyLock::new(|| { + histogram!( + name: "index_batch_duration_seconds", + description: "Histogram of add_batch durations in seconds, including any triggered flush.", + subsystem: "metrics_engine", + buckets: duration_buckets(), + ) +}); + +static SPLITS_WRITTEN_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "splits_written_total", + description: "Total number of splits written to storage.", + subsystem: "metrics_engine", + ) +}); + +static SPLITS_BYTES_WRITTEN: LazyLock = LazyLock::new(|| { + counter!( + name: "splits_bytes_written", + description: "Total bytes written to split files.", + subsystem: "metrics_engine", + ) +}); + +static QUERY_DURATION_SECONDS: LazyLock = LazyLock::new(|| { + histogram!( + name: "query_duration_seconds", + description: "Histogram of query execution durations in seconds.", + subsystem: "metrics_engine", + buckets: duration_buckets(), + ) +}); + +static QUERY_ROWS_RETURNED: LazyLock = LazyLock::new(|| { + counter!( + name: "query_rows_returned", + description: "Total number of rows returned from queries.", + subsystem: "metrics_engine", + ) +}); + +static ERRORS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "errors_total", + description: "Total errors by operation type and kind.", + subsystem: "metrics_engine", + ) +}); + impl Default for ParquetEngineMetrics { fn default() -> Self { Self { - index_batches_total: new_counter( - "index_batches_total", - "Total number of batches accumulated during indexing.", - SUBSYSTEM, - &[], - ), - index_rows_total: new_counter( - "index_rows_total", - "Total number of rows accumulated during indexing.", - SUBSYSTEM, - &[], - ), - ingest_bytes_total: new_counter_vec( - "ingest_bytes_total", - "Total number of bytes received from IPC payloads during ingestion.", - SUBSYSTEM, - &[], - ["kind"], - ), - index_batch_duration_seconds: new_histogram( - "index_batch_duration_seconds", - "Histogram of add_batch durations in seconds, including any triggered flush.", - SUBSYSTEM, - duration_buckets(), - ), - splits_written_total: new_counter( - "splits_written_total", - "Total number of splits written to storage.", - SUBSYSTEM, - &[], - ), - splits_bytes_written: new_counter( - "splits_bytes_written", - "Total bytes written to split files.", - SUBSYSTEM, - &[], - ), - query_duration_seconds: new_histogram( - "query_duration_seconds", - "Histogram of query execution durations in seconds.", - SUBSYSTEM, - duration_buckets(), - ), - query_rows_returned: new_counter( - "query_rows_returned", - "Total number of rows returned from queries.", - SUBSYSTEM, - &[], - ), - errors_total: new_counter_vec( - "errors_total", - "Total errors by operation type and kind.", - SUBSYSTEM, - &[], - ["operation", "kind"], - ), + index_batches_total: INDEX_BATCHES_TOTAL.clone(), + index_rows_total: INDEX_ROWS_TOTAL.clone(), + ingest_bytes_total: INGEST_BYTES_TOTAL.clone(), + index_batch_duration_seconds: INDEX_BATCH_DURATION_SECONDS.clone(), + splits_written_total: SPLITS_WRITTEN_TOTAL.clone(), + splits_bytes_written: SPLITS_BYTES_WRITTEN.clone(), + query_duration_seconds: QUERY_DURATION_SECONDS.clone(), + query_rows_returned: QUERY_ROWS_RETURNED.clone(), + errors_total: ERRORS_TOTAL.clone(), } } } diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index ba206889841..5d07c45c951 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -593,7 +593,7 @@ async fn leaf_search_single_split( } crate::SEARCH_METRICS .leaf_search_single_split_warmup_num_bytes - .observe(warmup_size.as_u64() as f64); + .record(warmup_size.as_u64() as f64); search_permit.update_memory_usage(warmup_size); search_permit.free_warmup_slot(); @@ -1805,16 +1805,16 @@ enum SplitSearchState { } impl SplitSearchState { - pub fn inc(self, counters: &SplitSearchOutcomeCounters) { + pub fn increment(self, counters: &SplitSearchOutcomeCounters) { match self { - SplitSearchState::Start => counters.cancel_before_warmup.inc(), - SplitSearchState::CacheHit => counters.cache_hit.inc(), - SplitSearchState::PrunedBeforeWarmup => counters.pruned_before_warmup.inc(), - SplitSearchState::WarmUp => counters.cancel_warmup.inc(), - SplitSearchState::PrunedAfterWarmup => counters.pruned_after_warmup.inc(), - SplitSearchState::CpuQueue => counters.cancel_cpu_queue.inc(), - SplitSearchState::Cpu => counters.cancel_cpu.inc(), - SplitSearchState::Success => counters.success.inc(), + SplitSearchState::Start => counters.cancel_before_warmup.increment(1), + SplitSearchState::CacheHit => counters.cache_hit.increment(1), + SplitSearchState::PrunedBeforeWarmup => counters.pruned_before_warmup.increment(1), + SplitSearchState::WarmUp => counters.cancel_warmup.increment(1), + SplitSearchState::PrunedAfterWarmup => counters.pruned_after_warmup.increment(1), + SplitSearchState::CpuQueue => counters.cancel_cpu_queue.increment(1), + SplitSearchState::Cpu => counters.cancel_cpu.increment(1), + SplitSearchState::Success => counters.success.increment(1), } } } @@ -1822,8 +1822,9 @@ impl SplitSearchState { impl Drop for SplitSearchStateGuard { fn drop(&mut self) { self.state - .inc(&crate::metrics::SEARCH_METRICS.split_search_outcome_total); - self.state.inc(&self.local_split_search_outcome_counters); + .increment(&crate::metrics::SEARCH_METRICS.split_search_outcome_total); + self.state + .increment(&self.local_split_search_outcome_counters); } } diff --git a/quickwit/quickwit-search/src/list_terms.rs b/quickwit/quickwit-search/src/list_terms.rs index 22e8a61f420..0b6bb51c88f 100644 --- a/quickwit/quickwit-search/src/list_terms.rs +++ b/quickwit/quickwit-search/src/list_terms.rs @@ -354,7 +354,9 @@ pub async fn leaf_list_terms( async move { let leaf_split_search_permit = search_permit_recv.await; // TODO dedicated counter and timer? - crate::SEARCH_METRICS.leaf_list_terms_splits_total.inc(); + crate::SEARCH_METRICS + .leaf_list_terms_splits_total + .increment(1); let timer = crate::SEARCH_METRICS .leaf_search_split_duration_secs .start_timer(); diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 3e430d7b24b..1d974919628 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -15,18 +15,17 @@ // See https://prometheus.io/docs/practices/naming/ use std::fmt; -use std::sync::LazyLock; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::{Arc, LazyLock}; use bytesize::ByteSize; use quickwit_common::metrics::{ - Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, exponential_buckets, - linear_buckets, new_counter, new_counter_vec, new_gauge, new_gauge_vec, new_histogram, - new_histogram_vec, + Counter, Gauge, Histogram, counter, exponential_buckets, gauge, histogram, linear_buckets, }; fn print_if_not_null( field_name: &'static str, - counter: &IntCounter, + counter: &SplitSearchOutcomeCounter, f: &mut fmt::Formatter, ) -> fmt::Result { let val = counter.get(); @@ -36,15 +35,56 @@ fn print_if_not_null( Ok(()) } +#[derive(Clone)] +pub struct SplitSearchOutcomeCounter { + inner: SplitSearchOutcomeCounterInner, +} + +#[derive(Clone)] +enum SplitSearchOutcomeCounterInner { + Registered(Counter), + Local(Arc), +} + +impl SplitSearchOutcomeCounter { + fn registered(counter: Counter) -> Self { + Self { + inner: SplitSearchOutcomeCounterInner::Registered(counter), + } + } + + fn local() -> Self { + Self { + inner: SplitSearchOutcomeCounterInner::Local(Arc::new(AtomicU64::new(0))), + } + } + + pub fn increment(&self, value: u64) { + match &self.inner { + SplitSearchOutcomeCounterInner::Registered(counter) => counter.increment(value), + SplitSearchOutcomeCounterInner::Local(value_ref) => { + value_ref.fetch_add(value, Ordering::Relaxed); + } + } + } + + pub fn get(&self) -> u64 { + match &self.inner { + SplitSearchOutcomeCounterInner::Registered(counter) => counter.get(), + SplitSearchOutcomeCounterInner::Local(value_ref) => value_ref.load(Ordering::Relaxed), + } + } +} + pub struct SplitSearchOutcomeCounters { - pub cancel_before_warmup: IntCounter, - pub cache_hit: IntCounter, - pub pruned_before_warmup: IntCounter, - pub cancel_warmup: IntCounter, - pub pruned_after_warmup: IntCounter, - pub cancel_cpu_queue: IntCounter, - pub cancel_cpu: IntCounter, - pub success: IntCounter, + pub cancel_before_warmup: SplitSearchOutcomeCounter, + pub cache_hit: SplitSearchOutcomeCounter, + pub pruned_before_warmup: SplitSearchOutcomeCounter, + pub cancel_warmup: SplitSearchOutcomeCounter, + pub pruned_after_warmup: SplitSearchOutcomeCounter, + pub cancel_cpu_queue: SplitSearchOutcomeCounter, + pub cancel_cpu: SplitSearchOutcomeCounter, + pub success: SplitSearchOutcomeCounter, } impl fmt::Display for SplitSearchOutcomeCounters { @@ -64,61 +104,76 @@ impl fmt::Display for SplitSearchOutcomeCounters { impl SplitSearchOutcomeCounters { /// Create a new SplitSearchOutcomeCounters instance, registered in prometheus. pub fn new_registered() -> Self { - let search_split_outcome_vec = new_counter_vec( - "split_search_outcome", - "Count the state in which each leaf search split ended", - "search", - &[], - ["category"], - ); - Self::new_from_counter_vec(search_split_outcome_vec) + Self::new_registered_from_counter(&*SPLIT_SEARCH_OUTCOME) } - /// Create a new SplitSearchOutcomeCounters instance, but this one won't be reported to - /// prometheus. + /// Create a new SplitSearchOutcomeCounters instance that is not reported. pub fn new_unregistered() -> Self { - let search_split_outcome_vec = IntCounterVec::new( - "split_search_outcome", - "Count the state in which each leaf search split ended", - "search", - &[], - ["category"], - ); - Self::new_from_counter_vec(search_split_outcome_vec) + SplitSearchOutcomeCounters { + cancel_before_warmup: SplitSearchOutcomeCounter::local(), + cache_hit: SplitSearchOutcomeCounter::local(), + pruned_before_warmup: SplitSearchOutcomeCounter::local(), + cancel_warmup: SplitSearchOutcomeCounter::local(), + pruned_after_warmup: SplitSearchOutcomeCounter::local(), + cancel_cpu_queue: SplitSearchOutcomeCounter::local(), + cancel_cpu: SplitSearchOutcomeCounter::local(), + success: SplitSearchOutcomeCounter::local(), + } } - pub fn new_from_counter_vec(search_split_outcome_vec: IntCounterVec<1>) -> Self { + fn new_registered_from_counter(search_split_outcome: &Counter) -> Self { SplitSearchOutcomeCounters { - cancel_before_warmup: search_split_outcome_vec - .with_label_values(["cancel_before_warmup"]), - cache_hit: search_split_outcome_vec.with_label_values(["cache_hit"]), - pruned_before_warmup: search_split_outcome_vec - .with_label_values(["pruned_before_warmup"]), - cancel_warmup: search_split_outcome_vec.with_label_values(["cancel_warmup"]), - pruned_after_warmup: search_split_outcome_vec - .with_label_values(["pruned_after_warmup"]), - cancel_cpu_queue: search_split_outcome_vec.with_label_values(["cancel_cpu_queue"]), - cancel_cpu: search_split_outcome_vec.with_label_values(["cancel_cpu"]), - success: search_split_outcome_vec.with_label_values(["success"]), + cancel_before_warmup: SplitSearchOutcomeCounter::registered(counter!( + parent: search_split_outcome, + "category" => "cancel_before_warmup", + )), + cache_hit: SplitSearchOutcomeCounter::registered(counter!( + parent: search_split_outcome, + "category" => "cache_hit", + )), + pruned_before_warmup: SplitSearchOutcomeCounter::registered(counter!( + parent: search_split_outcome, + "category" => "pruned_before_warmup", + )), + cancel_warmup: SplitSearchOutcomeCounter::registered(counter!( + parent: search_split_outcome, + "category" => "cancel_warmup", + )), + pruned_after_warmup: SplitSearchOutcomeCounter::registered(counter!( + parent: search_split_outcome, + "category" => "pruned_after_warmup", + )), + cancel_cpu_queue: SplitSearchOutcomeCounter::registered(counter!( + parent: search_split_outcome, + "category" => "cancel_cpu_queue", + )), + cancel_cpu: SplitSearchOutcomeCounter::registered(counter!( + parent: search_split_outcome, + "category" => "cancel_cpu", + )), + success: SplitSearchOutcomeCounter::registered(counter!( + parent: search_split_outcome, + "category" => "success", + )), } } } pub struct SearchMetrics { - pub root_search_requests_total: IntCounterVec<1>, - pub root_search_request_duration_seconds: HistogramVec<1>, - pub root_search_targeted_splits: HistogramVec<1>, - pub leaf_search_requests_total: IntCounterVec<1>, - pub leaf_search_request_duration_seconds: HistogramVec<1>, - pub leaf_search_targeted_splits: HistogramVec<1>, - pub leaf_list_terms_splits_total: IntCounter, + pub root_search_requests_total: Counter, + pub root_search_request_duration_seconds: Histogram, + pub root_search_targeted_splits: Histogram, + pub leaf_search_requests_total: Counter, + pub leaf_search_request_duration_seconds: Histogram, + pub leaf_search_targeted_splits: Histogram, + pub leaf_list_terms_splits_total: Counter, pub split_search_outcome_total: SplitSearchOutcomeCounters, pub leaf_search_split_duration_secs: Histogram, - pub job_assigned_total: IntCounterVec<1>, - pub leaf_search_single_split_tasks_pending: IntGauge, - pub leaf_search_single_split_tasks_ongoing: IntGauge, + pub job_assigned_total: Counter, + pub leaf_search_single_split_tasks_pending: Gauge, + pub leaf_search_single_split_tasks_ongoing: Gauge, pub leaf_search_single_split_warmup_num_bytes: Histogram, - pub searcher_local_kv_store_size_bytes: IntGauge, + pub searcher_local_kv_store_size_bytes: Gauge, } /// From 0.008s to 131.072s @@ -126,126 +181,182 @@ fn duration_buckets() -> Vec { exponential_buckets(0.008, 2.0, 15).unwrap() } +fn targeted_splits_buckets() -> Vec { + [ + linear_buckets(0.0, 10.0, 10).unwrap(), + linear_buckets(100.0, 100.0, 9).unwrap(), + linear_buckets(1000.0, 1000.0, 9).unwrap(), + linear_buckets(10000.0, 10000.0, 10).unwrap(), + ] + .iter() + .flatten() + .copied() + .collect() +} + +fn pseudo_exponential_bytes_buckets() -> Vec { + vec![ + ByteSize::mb(10).as_u64() as f64, + ByteSize::mb(20).as_u64() as f64, + ByteSize::mb(50).as_u64() as f64, + ByteSize::mb(100).as_u64() as f64, + ByteSize::mb(200).as_u64() as f64, + ByteSize::mb(500).as_u64() as f64, + ByteSize::gb(1).as_u64() as f64, + ByteSize::gb(2).as_u64() as f64, + ByteSize::gb(5).as_u64() as f64, + ] +} + +static SPLIT_SEARCH_OUTCOME: LazyLock = LazyLock::new(|| { + counter!( + name: "split_search_outcome", + description: "Count the state in which each leaf search split ended", + subsystem: "search", + observable: true, + ) +}); + +static LEAF_SEARCH_SINGLE_SPLIT_TASKS: LazyLock = LazyLock::new(|| { + gauge!( + name: "leaf_search_single_split_tasks", + description: "Number of single split search tasks pending or ongoing", + subsystem: "search", + ) +}); + +static ROOT_SEARCH_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "root_search_requests_total", + description: "Total number of root search gRPC requests processed.", + subsystem: "search", + ) +}); + +static ROOT_SEARCH_REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { + histogram!( + name: "root_search_request_duration_seconds", + description: "Duration of root search gRPC requests in seconds.", + subsystem: "search", + buckets: duration_buckets(), + ) +}); + +static ROOT_SEARCH_TARGETED_SPLITS: LazyLock = LazyLock::new(|| { + histogram!( + name: "root_search_targeted_splits", + description: "Number of splits targeted per root search GRPC request.", + subsystem: "search", + buckets: targeted_splits_buckets(), + ) +}); + +static LEAF_SEARCH_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "leaf_search_requests_total", + description: "Total number of leaf search gRPC requests processed.", + subsystem: "search", + ) +}); + +static LEAF_SEARCH_REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { + histogram!( + name: "leaf_search_request_duration_seconds", + description: "Duration of leaf search gRPC requests in seconds.", + subsystem: "search", + buckets: duration_buckets(), + ) +}); + +static LEAF_SEARCH_TARGETED_SPLITS: LazyLock = LazyLock::new(|| { + histogram!( + name: "leaf_search_targeted_splits", + description: "Number of splits targeted per leaf search GRPC request.", + subsystem: "search", + buckets: targeted_splits_buckets(), + ) +}); + +static LEAF_LIST_TERMS_SPLITS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "leaf_list_terms_splits_total", + description: "Number of list terms splits total", + subsystem: "search", + ) +}); + +static LEAF_SEARCH_SPLIT_DURATION_SECS: LazyLock = LazyLock::new(|| { + histogram!( + name: "leaf_search_split_duration_secs", + description: "Number of seconds required to run a leaf search over a single split. The timer starts after the semaphore is obtained.", + subsystem: "search", + buckets: duration_buckets(), + ) +}); + +static LEAF_SEARCH_SINGLE_SPLIT_WARMUP_NUM_BYTES: LazyLock = LazyLock::new(|| { + histogram!( + name: "leaf_search_single_split_warmup_num_bytes", + description: "Size of the short lived cache for a single split once the warmup is done.", + subsystem: "search", + buckets: pseudo_exponential_bytes_buckets(), + ) +}); + +static JOB_ASSIGNED_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "job_assigned_total", + description: "Number of job assigned to searchers, per affinity rank.", + subsystem: "search", + ) +}); + +static SEARCHER_LOCAL_KV_STORE_SIZE_BYTES: LazyLock = LazyLock::new(|| { + gauge!( + name: "searcher_local_kv_store_size_bytes", + description: "Size of the searcher kv store in bytes. This store is used to cache scroll contexts.", + subsystem: "search", + ) +}); + impl Default for SearchMetrics { fn default() -> Self { - let targeted_splits_buckets: Vec = [ - linear_buckets(0.0, 10.0, 10).unwrap(), - linear_buckets(100.0, 100.0, 9).unwrap(), - linear_buckets(1000.0, 1000.0, 9).unwrap(), - linear_buckets(10000.0, 10000.0, 10).unwrap(), - ] - .iter() - .flatten() - .copied() - .collect(); - - let pseudo_exponential_bytes_buckets = vec![ - ByteSize::mb(10).as_u64() as f64, - ByteSize::mb(20).as_u64() as f64, - ByteSize::mb(50).as_u64() as f64, - ByteSize::mb(100).as_u64() as f64, - ByteSize::mb(200).as_u64() as f64, - ByteSize::mb(500).as_u64() as f64, - ByteSize::gb(1).as_u64() as f64, - ByteSize::gb(2).as_u64() as f64, - ByteSize::gb(5).as_u64() as f64, - ]; - - let leaf_search_single_split_tasks = new_gauge_vec::<1>( - "leaf_search_single_split_tasks", - "Number of single split search tasks pending or ongoing", - "search", - &[], - ["status"], // takes values "ongoing" or "pending" - ); - SearchMetrics { - root_search_requests_total: new_counter_vec( - "root_search_requests_total", - "Total number of root search gRPC requests processed.", - "search", - &[("kind", "server")], - ["status"], + root_search_requests_total: counter!( + parent: &*ROOT_SEARCH_REQUESTS_TOTAL, + "kind" => "server", ), - root_search_request_duration_seconds: new_histogram_vec( - "root_search_request_duration_seconds", - "Duration of root search gRPC requests in seconds.", - "search", - &[("kind", "server")], - ["status"], - duration_buckets(), + root_search_request_duration_seconds: histogram!( + parent: &*ROOT_SEARCH_REQUEST_DURATION_SECONDS, + "kind" => "server", ), - root_search_targeted_splits: new_histogram_vec( - "root_search_targeted_splits", - "Number of splits targeted per root search GRPC request.", - "search", - &[], - ["status"], - targeted_splits_buckets.clone(), + root_search_targeted_splits: ROOT_SEARCH_TARGETED_SPLITS.clone(), + leaf_search_requests_total: counter!( + parent: &*LEAF_SEARCH_REQUESTS_TOTAL, + "kind" => "server", ), - leaf_search_requests_total: new_counter_vec( - "leaf_search_requests_total", - "Total number of leaf search gRPC requests processed.", - "search", - &[("kind", "server")], - ["status"], - ), - leaf_search_request_duration_seconds: new_histogram_vec( - "leaf_search_request_duration_seconds", - "Duration of leaf search gRPC requests in seconds.", - "search", - &[("kind", "server")], - ["status"], - duration_buckets(), - ), - leaf_search_targeted_splits: new_histogram_vec( - "leaf_search_targeted_splits", - "Number of splits targeted per leaf search GRPC request.", - "search", - &[], - ["status"], - targeted_splits_buckets, + leaf_search_request_duration_seconds: histogram!( + parent: &*LEAF_SEARCH_REQUEST_DURATION_SECONDS, + "kind" => "server", ), + leaf_search_targeted_splits: LEAF_SEARCH_TARGETED_SPLITS.clone(), - leaf_list_terms_splits_total: new_counter( - "leaf_list_terms_splits_total", - "Number of list terms splits total", - "search", - &[], - ), + leaf_list_terms_splits_total: LEAF_LIST_TERMS_SPLITS_TOTAL.clone(), split_search_outcome_total: SplitSearchOutcomeCounters::new_registered(), - leaf_search_split_duration_secs: new_histogram( - "leaf_search_split_duration_secs", - "Number of seconds required to run a leaf search over a single split. The timer \ - starts after the semaphore is obtained.", - "search", - duration_buckets(), - ), - leaf_search_single_split_tasks_ongoing: leaf_search_single_split_tasks - .with_label_values(["ongoing"]), - leaf_search_single_split_tasks_pending: leaf_search_single_split_tasks - .with_label_values(["pending"]), - leaf_search_single_split_warmup_num_bytes: new_histogram( - "leaf_search_single_split_warmup_num_bytes", - "Size of the short lived cache for a single split once the warmup is done.", - "search", - pseudo_exponential_bytes_buckets, - ), - job_assigned_total: new_counter_vec( - "job_assigned_total", - "Number of job assigned to searchers, per affinity rank.", - "search", - &[], - ["affinity"], + leaf_search_split_duration_secs: LEAF_SEARCH_SPLIT_DURATION_SECS.clone(), + leaf_search_single_split_tasks_ongoing: gauge!( + parent: &*LEAF_SEARCH_SINGLE_SPLIT_TASKS, + "status" => "ongoing", ), - searcher_local_kv_store_size_bytes: new_gauge( - "searcher_local_kv_store_size_bytes", - "Size of the searcher kv store in bytes. This store is used to cache scroll \ - contexts.", - "search", - &[], + leaf_search_single_split_tasks_pending: gauge!( + parent: &*LEAF_SEARCH_SINGLE_SPLIT_TASKS, + "status" => "pending", ), + leaf_search_single_split_warmup_num_bytes: LEAF_SEARCH_SINGLE_SPLIT_WARMUP_NUM_BYTES + .clone(), + job_assigned_total: JOB_ASSIGNED_TOTAL.clone(), + searcher_local_kv_store_size_bytes: SEARCHER_LOCAL_KV_STORE_SIZE_BYTES.clone(), } } } diff --git a/quickwit/quickwit-search/src/metrics_trackers.rs b/quickwit/quickwit-search/src/metrics_trackers.rs index 7f2f9fbbfb3..6e074a2648a 100644 --- a/quickwit/quickwit-search/src/metrics_trackers.rs +++ b/quickwit/quickwit-search/src/metrics_trackers.rs @@ -19,6 +19,7 @@ use std::task::{Context, Poll, ready}; use std::time::Instant; use pin_project::{pin_project, pinned_drop}; +use quickwit_common::metrics::{counter, histogram}; use quickwit_proto::search::LeafSearchResponse; use crate::SearchError; @@ -69,19 +70,21 @@ impl PinnedDrop for RootSearchMetricsFuture { ) => (*num_targeted_splits, "cancelled"), }; - let label_values = [status]; - SEARCH_METRICS - .root_search_requests_total - .with_label_values(label_values) - .inc(); - SEARCH_METRICS - .root_search_request_duration_seconds - .with_label_values(label_values) - .observe(self.start.elapsed().as_secs_f64()); - SEARCH_METRICS - .root_search_targeted_splits - .with_label_values(label_values) - .observe(num_targeted_splits as f64); + counter!( + parent: &SEARCH_METRICS.root_search_requests_total, + "status" => status, + ) + .increment(1); + histogram!( + parent: &SEARCH_METRICS.root_search_request_duration_seconds, + "status" => status, + ) + .record(self.start.elapsed().as_secs_f64()); + histogram!( + parent: &SEARCH_METRICS.root_search_targeted_splits, + "status" => status, + ) + .record(num_targeted_splits as f64); } } @@ -117,19 +120,22 @@ impl PinnedDrop for LeafSearchMetricsFuture where F: Future> { fn drop(self: Pin<&mut Self>) { - let label_values = [self.status.unwrap_or("cancelled")]; - SEARCH_METRICS - .leaf_search_requests_total - .with_label_values(label_values) - .inc(); - SEARCH_METRICS - .leaf_search_request_duration_seconds - .with_label_values(label_values) - .observe(self.start.elapsed().as_secs_f64()); - SEARCH_METRICS - .leaf_search_targeted_splits - .with_label_values(label_values) - .observe(self.targeted_splits as f64); + let status = self.status.unwrap_or("cancelled"); + counter!( + parent: &SEARCH_METRICS.leaf_search_requests_total, + "status" => status, + ) + .increment(1); + histogram!( + parent: &SEARCH_METRICS.leaf_search_request_duration_seconds, + "status" => status, + ) + .record(self.start.elapsed().as_secs_f64()); + histogram!( + parent: &SEARCH_METRICS.leaf_search_targeted_splits, + "status" => status, + ) + .record(self.targeted_splits as f64); } } diff --git a/quickwit/quickwit-search/src/scroll_context.rs b/quickwit/quickwit-search/src/scroll_context.rs index a4a31a856b5..f185c4079c2 100644 --- a/quickwit/quickwit-search/src/scroll_context.rs +++ b/quickwit/quickwit-search/src/scroll_context.rs @@ -123,7 +123,7 @@ impl ScrollContext { struct TrackedValue { content: Vec, - _total_size_metric_guard: GaugeGuard<'static>, + _total_size_metric_guard: GaugeGuard, } /// In memory key value store with TTL and limited size. diff --git a/quickwit/quickwit-search/src/search_job_placer.rs b/quickwit/quickwit-search/src/search_job_placer.rs index 8be9cced66b..b2c4e21dcea 100644 --- a/quickwit/quickwit-search/src/search_job_placer.rs +++ b/quickwit/quickwit-search/src/search_job_placer.rs @@ -21,6 +21,7 @@ use std::net::SocketAddr; use anyhow::bail; use async_trait::async_trait; use quickwit_common::SocketAddrLegacyHash; +use quickwit_common::metrics::counter; use quickwit_common::pubsub::EventSubscriber; use quickwit_common::rendezvous_hasher::{node_affinity, sort_by_rendez_vous_hash}; use quickwit_proto::search::{ReportSplit, ReportSplitsRequest}; @@ -217,10 +218,11 @@ impl SearchJobPlacer { 1 => "1", _ => "> 1", }; - SEARCH_METRICS - .job_assigned_total - .with_label_values([metric_node_idx]) - .inc(); + counter!( + parent: &SEARCH_METRICS.job_assigned_total, + "affinity" => metric_node_idx, + ) + .increment(1); chosen_node.load += job.cost(); job_assignments diff --git a/quickwit/quickwit-search/src/search_permit_provider.rs b/quickwit/quickwit-search/src/search_permit_provider.rs index 4cd5e99918f..b05a7e06235 100644 --- a/quickwit/quickwit-search/src/search_permit_provider.rs +++ b/quickwit/quickwit-search/src/search_permit_provider.rs @@ -352,12 +352,12 @@ impl SearchPermitActor { } crate::SEARCH_METRICS .leaf_search_single_split_tasks_pending - .set(self.permits_requests.len() as i64); + .set(self.permits_requests.len() as f64); } } pub struct SearchPermit { - _ongoing_gauge_guard: GaugeGuard<'static>, + _ongoing_gauge_guard: GaugeGuard, msg_sender: mpsc::WeakUnboundedSender, memory_allocation: u64, warmup_slot_freed: bool, diff --git a/quickwit/quickwit-serve/src/decompression.rs b/quickwit/quickwit-serve/src/decompression.rs index 9e2ddd28197..cf452def5e1 100644 --- a/quickwit/quickwit-serve/src/decompression.rs +++ b/quickwit/quickwit-serve/src/decompression.rs @@ -108,7 +108,7 @@ pub(crate) fn get_body_bytes() -> impl Filter, + _gauge_guard: GaugeGuard, _permit: LoadShieldPermit, } diff --git a/quickwit/quickwit-serve/src/load_shield.rs b/quickwit/quickwit-serve/src/load_shield.rs index 568ffabd53b..0a5e9a9d6fa 100644 --- a/quickwit/quickwit-serve/src/load_shield.rs +++ b/quickwit/quickwit-serve/src/load_shield.rs @@ -14,7 +14,7 @@ use std::time::Duration; -use quickwit_common::metrics::{GaugeGuard, IntGauge}; +use quickwit_common::metrics::{Gauge, GaugeGuard, gauge}; use tokio::sync::{Semaphore, SemaphorePermit}; use crate::rest::TooManyRequests; @@ -22,14 +22,14 @@ use crate::rest::TooManyRequests; pub struct LoadShield { in_flight_semaphore_opt: Option, // This one is doing the load shedding. concurrency_semaphore_opt: Option, - ongoing_gauge: IntGauge, - pending_gauge: IntGauge, + ongoing_gauge: Gauge, + pending_gauge: Gauge, } pub struct LoadShieldPermit { _concurrency_permit_opt: Option>, _in_flight_permit_opt: Option>, - _ongoing_gauge_guard: GaugeGuard<'static>, + _ongoing_gauge_guard: GaugeGuard, } impl LoadShield { @@ -43,12 +43,14 @@ impl LoadShield { quickwit_common::get_from_env_opt(&max_concurrency_env_key, false); let in_flight_semaphore_opt = max_in_flight_opt.map(Semaphore::new); let concurrency_semaphore_opt = max_concurrency_opt.map(Semaphore::new); - let pending_gauge = crate::metrics::SERVE_METRICS - .pending_requests - .with_label_values([endpoint_group]); - let ongoing_gauge = crate::metrics::SERVE_METRICS - .ongoing_requests - .with_label_values([endpoint_group]); + let pending_gauge = gauge!( + parent: &crate::metrics::SERVE_METRICS.pending_requests, + "endpoint_group" => endpoint_group, + ); + let ongoing_gauge = gauge!( + parent: &crate::metrics::SERVE_METRICS.ongoing_requests, + "endpoint_group" => endpoint_group, + ); LoadShield { in_flight_semaphore_opt, concurrency_semaphore_opt, diff --git a/quickwit/quickwit-serve/src/metrics.rs b/quickwit/quickwit-serve/src/metrics.rs index 7d922c7fb36..06d5a4085f8 100644 --- a/quickwit/quickwit-serve/src/metrics.rs +++ b/quickwit/quickwit-serve/src/metrics.rs @@ -15,58 +15,67 @@ use std::sync::LazyLock; use quickwit_common::metrics::{ - HistogramVec, IntCounter, IntCounterVec, IntGaugeVec, new_counter, new_counter_vec, - new_gauge_vec, new_histogram_vec, + Counter, Gauge, Histogram, counter, exponential_buckets, gauge, histogram, }; pub struct ServeMetrics { - pub http_requests_total: IntCounterVec<2>, - pub request_duration_secs: HistogramVec<2>, - pub ongoing_requests: IntGaugeVec<1>, - pub pending_requests: IntGaugeVec<1>, - pub circuit_break_total: IntCounter, + pub http_requests_total: Counter, + pub request_duration_secs: Histogram, + pub ongoing_requests: Gauge, + pub pending_requests: Gauge, + pub circuit_break_total: Counter, } +static HTTP_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "http_requests_total", + description: "Total number of HTTP requests processed.", + subsystem: "", + ) +}); + +static REQUEST_DURATION_SECS: LazyLock = LazyLock::new(|| { + histogram!( + name: "request_duration_secs", + description: "Response time in seconds", + subsystem: "", + // last bucket is 163.84s + buckets: exponential_buckets(0.02, 2.0, 14).unwrap(), + ) +}); + +static ONGOING_REQUESTS: LazyLock = LazyLock::new(|| { + gauge!( + name: "ongoing_requests", + description: "Number of ongoing requests.", + subsystem: "", + ) +}); + +static PENDING_REQUESTS: LazyLock = LazyLock::new(|| { + gauge!( + name: "pending_requests", + description: "Number of pending requests.", + subsystem: "", + ) +}); + +static CIRCUIT_BREAK_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "circuit_break_total", + description: "Circuit breaker counter", + subsystem: "grpc", + ) +}); + impl Default for ServeMetrics { fn default() -> Self { - let circuit_break_total = new_counter( - "circuit_break_total", - "Circuit breaker counter", - "grpc", - &[], - ); ServeMetrics { - http_requests_total: new_counter_vec( - "http_requests_total", - "Total number of HTTP requests processed.", - "", - &[], - ["method", "status_code"], - ), - request_duration_secs: new_histogram_vec( - "request_duration_secs", - "Response time in seconds", - "", - &[], - ["method", "status_code"], - // last bucket is 163.84s - quickwit_common::metrics::exponential_buckets(0.02, 2.0, 14).unwrap(), - ), - ongoing_requests: new_gauge_vec( - "ongoing_requests", - "Number of ongoing requests.", - "", - &[], - ["endpoint_group"], - ), - pending_requests: new_gauge_vec( - "pending_requests", - "Number of pending requests.", - "", - &[], - ["endpoint_group"], - ), - circuit_break_total, + http_requests_total: HTTP_REQUESTS_TOTAL.clone(), + request_duration_secs: REQUEST_DURATION_SECS.clone(), + ongoing_requests: ONGOING_REQUESTS.clone(), + pending_requests: PENDING_REQUESTS.clone(), + circuit_break_total: CIRCUIT_BREAK_TOTAL.clone(), } } } diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index c8451fb23b9..a13b0bb2457 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -19,6 +19,7 @@ use std::sync::Arc; use hyper_util::rt::{TokioExecutor, TokioIo}; use hyper_util::server::conn::auto::Builder; use hyper_util::service::TowerToHyperService; +use quickwit_common::metrics::{counter, histogram}; use quickwit_common::tower::BoxFutureInfaillible; use quickwit_config::{disable_ingest_v1, enable_ingest_v2}; use quickwit_search::SearchService; @@ -137,15 +138,20 @@ pub(crate) async fn start_rest_server( let request_counter = warp::log::custom(|info: Info| { let elapsed = info.elapsed(); let status = info.status(); - let label_values: [&str; 2] = [info.method().as_str(), status.as_str()]; - crate::SERVE_METRICS - .request_duration_secs - .with_label_values(label_values) - .observe(elapsed.as_secs_f64()); - crate::SERVE_METRICS - .http_requests_total - .with_label_values(label_values) - .inc(); + let method = info.method().as_str().to_string(); + let status_code = status.as_str().to_string(); + histogram!( + parent: &crate::SERVE_METRICS.request_duration_secs, + "method" => method.clone(), + "status_code" => status_code.clone(), + ) + .record(elapsed.as_secs_f64()); + counter!( + parent: &crate::SERVE_METRICS.http_requests_total, + "method" => method, + "status_code" => status_code, + ) + .increment(1); }); // Docs routes let api_doc = warp::path("openapi.json") diff --git a/quickwit/quickwit-storage/src/cache/base_cache.rs b/quickwit/quickwit-storage/src/cache/base_cache.rs index c7a4f7faceb..8aac977bbbe 100644 --- a/quickwit/quickwit-storage/src/cache/base_cache.rs +++ b/quickwit/quickwit-storage/src/cache/base_cache.rs @@ -133,10 +133,12 @@ pub struct Lru { impl Drop for Lru { fn drop(&mut self) { // we don't count this toward evicted entries, as we are clearing the whole cache - self.cache_metrics.in_cache_count.sub(self.num_items as i64); + self.cache_metrics + .in_cache_count + .decrement(self.num_items as f64); self.cache_metrics .in_cache_num_bytes - .sub(self.num_bytes as i64); + .decrement(self.num_bytes as f64); } } @@ -158,17 +160,21 @@ impl Lru { fn record_item(&mut self, num_bytes: u64) { self.num_items += 1; self.num_bytes += num_bytes; - self.cache_metrics.in_cache_count.inc(); - self.cache_metrics.in_cache_num_bytes.add(num_bytes as i64); + self.cache_metrics.in_cache_count.increment(1.0); + self.cache_metrics + .in_cache_num_bytes + .increment(num_bytes as f64); } fn drop_item(&mut self, num_bytes: u64) { self.num_items -= 1; self.num_bytes -= num_bytes; - self.cache_metrics.in_cache_count.dec(); - self.cache_metrics.in_cache_num_bytes.sub(num_bytes as i64); - self.cache_metrics.evict_num_items.inc(); - self.cache_metrics.evict_num_bytes.inc_by(num_bytes); + self.cache_metrics.in_cache_count.decrement(1.0); + self.cache_metrics + .in_cache_num_bytes + .decrement(num_bytes as f64); + self.cache_metrics.evict_num_items.increment(1); + self.cache_metrics.evict_num_bytes.increment(num_bytes); } pub fn get(&mut self, cache_key: &Q) -> Option @@ -178,11 +184,13 @@ impl Lru { { let item_opt = self.lru_cache.get_mut(cache_key); if let Some(item) = item_opt { - self.cache_metrics.hits_num_items.inc(); - self.cache_metrics.hits_num_bytes.inc_by(item.len() as u64); + self.cache_metrics.hits_num_items.increment(1); + self.cache_metrics + .hits_num_bytes + .increment(item.len() as u64); Some(item.payload()) } else { - self.cache_metrics.misses_num_items.inc(); + self.cache_metrics.misses_num_items.increment(1); None } } @@ -252,10 +260,10 @@ impl Drop for S3Fifo { // we don't count this toward evicted entries, as we are clearing the whole cache self.cache_metrics .in_cache_count - .sub(self.cache.len() as i64); + .decrement(self.cache.len() as f64); self.cache_metrics .in_cache_num_bytes - .sub(self.cache.weight() as i64); + .decrement(self.cache.weight() as f64); } } @@ -307,11 +315,13 @@ impl S3Fifo { { let item_opt = self.cache.get(cache_key); if let Some(item) = item_opt { - self.cache_metrics.hits_num_items.inc(); - self.cache_metrics.hits_num_bytes.inc_by(item.len() as u64); + self.cache_metrics.hits_num_items.increment(1); + self.cache_metrics + .hits_num_bytes + .increment(item.len() as u64); Some(item.clone()) } else { - self.cache_metrics.misses_num_items.inc(); + self.cache_metrics.misses_num_items.increment(1); None } } @@ -332,17 +342,19 @@ impl S3Fifo { return; } - self.cache_metrics.in_cache_count.inc(); + self.cache_metrics.in_cache_count.increment(1.0); self.cache_metrics .in_cache_num_bytes - .add(value.len() as i64); + .increment(value.len() as f64); let evicted = self.cache.insert_with_lifecycle(key, value); - self.cache_metrics.in_cache_count.sub(evicted.count as i64); + self.cache_metrics + .in_cache_count + .decrement(evicted.count as f64); self.cache_metrics .in_cache_num_bytes - .sub(evicted.bytes as i64); - self.cache_metrics.evict_num_items.inc_by(evicted.count); - self.cache_metrics.evict_num_bytes.inc_by(evicted.bytes); + .decrement(evicted.bytes as f64); + self.cache_metrics.evict_num_items.increment(evicted.count); + self.cache_metrics.evict_num_bytes.increment(evicted.bytes); } } @@ -355,10 +367,14 @@ struct CapacityTracker { impl Drop for CapacityTracker { fn drop(&mut self) { if let Some(cache_metrics) = self.cache_metrics.upgrade() { - cache_metrics.in_cache_count.dec(); - cache_metrics.in_cache_num_bytes.sub(self.item.len() as i64); - cache_metrics.evict_num_items.inc(); - cache_metrics.evict_num_bytes.inc_by(self.item.len() as u64); + cache_metrics.in_cache_count.decrement(1.0); + cache_metrics + .in_cache_num_bytes + .decrement(self.item.len() as f64); + cache_metrics.evict_num_items.increment(1); + cache_metrics + .evict_num_bytes + .increment(self.item.len() as u64); } } } @@ -380,10 +396,10 @@ impl Drop for TinyLfu { // we don't count this toward evicted entries, as we are clearing the whole cache self.cache_metrics .in_cache_count - .sub(self.cache.entry_count() as i64); + .decrement(self.cache.entry_count() as f64); self.cache_metrics .in_cache_num_bytes - .sub(self.cache.weighted_size() as i64); + .decrement(self.cache.weighted_size() as f64); } } @@ -411,13 +427,13 @@ impl NeedMutByteRangeCache { } else if let Some((k, v)) = self.merge_ranges(&key, byte_range.end) { (k, v) } else { - self.cache_counters.misses_num_items.inc(); + self.cache_counters.misses_num_items.increment(1); return None; }; @@ -90,10 +90,10 @@ impl NeedMutByteRangeCache { let end = byte_range.end - k.range_start; let result = v.bytes.slice(start..end); - self.cache_counters.hits_num_items.inc(); + self.cache_counters.hits_num_items.increment(1); self.cache_counters .hits_num_bytes - .inc_by((end - start) as u64); + .increment((end - start) as u64); Some(result) } @@ -293,7 +293,9 @@ impl NeedMutByteRangeCache { self.cache.insert(new_key, new_value); self.num_items -= (part_count - 1) as u64; - self.cache_counters.in_cache_count.sub(part_count - 1); + self.cache_counters + .in_cache_count + .decrement((part_count - 1) as f64); self.get_block(start, range_end) } @@ -301,17 +303,23 @@ impl NeedMutByteRangeCache { fn update_counter_record_item(&mut self, num_bytes: usize) { self.num_items += 1; self.num_bytes += num_bytes as u64; - self.cache_counters.in_cache_count.inc(); - self.cache_counters.in_cache_num_bytes.add(num_bytes as i64); + self.cache_counters.in_cache_count.increment(1.0); + self.cache_counters + .in_cache_num_bytes + .increment(num_bytes as f64); } fn update_counter_drop_item(&mut self, num_bytes: usize) { self.num_items -= 1; self.num_bytes -= num_bytes as u64; - self.cache_counters.in_cache_count.dec(); - self.cache_counters.in_cache_num_bytes.sub(num_bytes as i64); - self.cache_counters.evict_num_items.inc(); - self.cache_counters.evict_num_bytes.inc_by(num_bytes as u64); + self.cache_counters.in_cache_count.decrement(1.0); + self.cache_counters + .in_cache_num_bytes + .decrement(num_bytes as f64); + self.cache_counters.evict_num_items.increment(1); + self.cache_counters + .evict_num_bytes + .increment(num_bytes as u64); } } @@ -319,10 +327,10 @@ impl Drop for NeedMutByteRangeCache { fn drop(&mut self) { self.cache_counters .in_cache_count - .sub(self.num_items as i64); + .decrement(self.num_items as f64); self.cache_counters .in_cache_num_bytes - .sub(self.num_bytes as i64); + .decrement(self.num_bytes as f64); } } @@ -542,9 +550,9 @@ mod tests { let mutable_cache = cache.inner_arc.need_mut_byte_range_cache.lock().unwrap(); assert_eq!(mutable_cache.cache.len(), 4); assert_eq!(mutable_cache.num_items, 4); - assert_eq!(mutable_cache.cache_counters.in_cache_count.get(), 4); + assert_eq!(mutable_cache.cache_counters.in_cache_count.get(), 4.0); assert_eq!(mutable_cache.num_bytes, 20); - assert_eq!(mutable_cache.cache_counters.in_cache_num_bytes.get(), 20); + assert_eq!(mutable_cache.cache_counters.in_cache_num_bytes.get(), 20.0); } cache.get_slice(&key, 3..12).unwrap(); @@ -554,9 +562,9 @@ mod tests { let mutable_cache = cache.inner_arc.need_mut_byte_range_cache.lock().unwrap(); assert_eq!(mutable_cache.cache.len(), 2); assert_eq!(mutable_cache.num_items, 2); - assert_eq!(mutable_cache.cache_counters.in_cache_count.get(), 2); + assert_eq!(mutable_cache.cache_counters.in_cache_count.get(), 2.0); assert_eq!(mutable_cache.num_bytes, 20); - assert_eq!(mutable_cache.cache_counters.in_cache_num_bytes.get(), 20); + assert_eq!(mutable_cache.cache_counters.in_cache_num_bytes.get(), 20.0); } } } diff --git a/quickwit/quickwit-storage/src/file_descriptor_cache.rs b/quickwit/quickwit-storage/src/file_descriptor_cache.rs index 4626e71bee9..95aa30d4ce7 100644 --- a/quickwit/quickwit-storage/src/file_descriptor_cache.rs +++ b/quickwit/quickwit-storage/src/file_descriptor_cache.rs @@ -104,7 +104,7 @@ impl FileDescriptorCache { fd_cache_lock.push(split_id, split_file); self.fd_cache_metrics .in_cache_count - .set(fd_cache_lock.len() as i64); + .set(fd_cache_lock.len() as f64); } /// Evicts the given list of split ids from the file descriptor cache. @@ -116,10 +116,10 @@ impl FileDescriptorCache { } self.fd_cache_metrics .in_cache_count - .set(fd_cache_lock.len() as i64); + .set(fd_cache_lock.len() as f64); self.fd_cache_metrics .evict_num_items - .inc_by(split_ids.len() as u64); + .increment(split_ids.len() as u64); } pub async fn get_or_open_split_file( @@ -129,10 +129,10 @@ impl FileDescriptorCache { num_bytes: u64, ) -> std::io::Result { if let Some(split_file) = self.get_split_file(split_id) { - self.fd_cache_metrics.hits_num_items.inc(); + self.fd_cache_metrics.hits_num_items.increment(1); return Ok(split_file); } else { - self.fd_cache_metrics.misses_num_items.inc(); + self.fd_cache_metrics.misses_num_items.increment(1); } let split_path = get_split_file_path(root_path, split_id); let fd_semaphore_guard = Semaphore::acquire_owned(self.fd_semaphore.clone()) @@ -220,7 +220,7 @@ mod tests { .await .unwrap(); } - assert_eq!(cache_metrics.in_cache_count.get(), 10); + assert_eq!(cache_metrics.in_cache_count.get(), 10.0); assert_eq!(cache_metrics.hits_num_items.get(), 20); assert_eq!(cache_metrics.misses_num_items.get(), 10); } @@ -252,7 +252,7 @@ mod tests { .unwrap(); } } - assert_eq!(cache_metrics.in_cache_count.get(), 10); + assert_eq!(cache_metrics.in_cache_count.get(), 10.0); assert_eq!(cache_metrics.hits_num_items.get(), 100 * 9); assert_eq!(cache_metrics.misses_num_items.get(), 100); } diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs index 888d137cc18..da878d354e8 100644 --- a/quickwit/quickwit-storage/src/metrics.rs +++ b/quickwit/quickwit-storage/src/metrics.rs @@ -17,10 +17,7 @@ use std::collections::HashMap; use std::sync::{LazyLock, RwLock}; -use quickwit_common::metrics::{ - GaugeGuard, Histogram, IntCounter, IntCounterVec, IntGauge, new_counter, new_counter_vec, - new_gauge, new_histogram_vec, -}; +use quickwit_common::metrics::{Counter, Gauge, GaugeGuard, Histogram, counter, gauge, histogram}; use quickwit_config::CacheConfig; /// Counters associated to storage operations. @@ -32,65 +29,150 @@ pub struct StorageMetrics { pub fast_field_cache: CacheMetrics, pub split_footer_cache: CacheMetrics, pub searcher_split_cache: CacheMetrics, - pub get_slice_timeout_successes: [IntCounter; 3], - pub get_slice_timeout_all_timeouts: IntCounter, - pub object_storage_get_total: IntCounter, - pub object_storage_get_errors_total: IntCounterVec<1>, - pub object_storage_get_slice_in_flight_count: IntGauge, - pub object_storage_get_slice_in_flight_num_bytes: IntGauge, - pub object_storage_put_total: IntCounter, - pub object_storage_put_parts: IntCounter, - pub object_storage_download_num_bytes: IntCounter, - pub object_storage_upload_num_bytes: IntCounter, - - pub object_storage_delete_requests_total: IntCounter, - pub object_storage_bulk_delete_requests_total: IntCounter, + pub get_slice_timeout_successes: [Counter; 3], + pub get_slice_timeout_all_timeouts: Counter, + pub object_storage_get_total: Counter, + pub object_storage_get_errors_total: Counter, + pub object_storage_get_slice_in_flight_count: Gauge, + pub object_storage_get_slice_in_flight_num_bytes: Gauge, + pub object_storage_put_total: Counter, + pub object_storage_put_parts: Counter, + pub object_storage_download_num_bytes: Counter, + pub object_storage_upload_num_bytes: Counter, + + pub object_storage_delete_requests_total: Counter, + pub object_storage_bulk_delete_requests_total: Counter, pub object_storage_delete_request_duration: Histogram, pub object_storage_bulk_delete_request_duration: Histogram, } +static GET_SLICE_TIMEOUT_OUTCOME_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "get_slice_timeout_outcome", + description: "Outcome of get_slice operations. success_after_1_timeout means the operation succeeded after a retry caused by a timeout.", + subsystem: "storage", + ) +}); + +static OBJECT_STORAGE_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "object_storage_requests_total", + description: "Total number of object storage requests performed.", + subsystem: "storage", + ) +}); + +static OBJECT_STORAGE_REQUEST_DURATION: LazyLock = LazyLock::new(|| { + histogram!( + name: "object_storage_request_duration_seconds", + description: "Duration of object storage requests in seconds.", + subsystem: "storage", + buckets: vec![0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0], + ) +}); + +static OBJECT_STORAGE_GET_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "object_storage_gets_total", + description: "Number of objects fetched. Might be lower than get_slice_timeout_outcome if queries are debounced.", + subsystem: "storage", + ) +}); + +static OBJECT_STORAGE_GET_ERRORS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "object_storage_get_errors_total", + description: "Number of GetObject errors.", + subsystem: "storage", + ) +}); + +static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT: LazyLock = LazyLock::new(|| { + gauge!( + name: "object_storage_get_slice_in_flight_count", + description: "Number of GetObject for which the memory was allocated but the download is still in progress.", + subsystem: "storage", + ) +}); + +static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES: LazyLock = LazyLock::new(|| { + gauge!( + name: "object_storage_get_slice_in_flight_num_bytes", + description: "Memory allocated for GetObject requests that are still in progress.", + subsystem: "storage", + ) +}); + +static OBJECT_STORAGE_PUT_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "object_storage_puts_total", + description: "Number of objects uploaded. May differ from object_storage_requests_parts due to multipart upload.", + subsystem: "storage", + ) +}); + +static OBJECT_STORAGE_PUT_PARTS: LazyLock = LazyLock::new(|| { + counter!( + name: "object_storage_puts_parts", + description: "Number of object parts uploaded.", + subsystem: "", + ) +}); + +static OBJECT_STORAGE_DOWNLOAD_NUM_BYTES: LazyLock = LazyLock::new(|| { + counter!( + name: "object_storage_download_num_bytes", + description: "Amount of data downloaded from an object storage.", + subsystem: "storage", + ) +}); + +static OBJECT_STORAGE_UPLOAD_NUM_BYTES: LazyLock = LazyLock::new(|| { + counter!( + name: "object_storage_upload_num_bytes", + description: "Amount of data uploaded to an object storage.", + subsystem: "storage", + ) +}); + impl Default for StorageMetrics { fn default() -> Self { - let get_slice_timeout_outcome_total_vec = new_counter_vec( - "get_slice_timeout_outcome", - "Outcome of get_slice operations. success_after_1_timeout means the operation \ - succeeded after a retry caused by a timeout.", - "storage", - &[], - ["outcome"], - ); let get_slice_timeout_successes = [ - get_slice_timeout_outcome_total_vec.with_label_values(["success_after_0_timeout"]), - get_slice_timeout_outcome_total_vec.with_label_values(["success_after_1_timeout"]), - get_slice_timeout_outcome_total_vec.with_label_values(["success_after_2+_timeout"]), + counter!( + parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, + "outcome" => "success_after_0_timeout", + ), + counter!( + parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, + "outcome" => "success_after_1_timeout", + ), + counter!( + parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, + "outcome" => "success_after_2+_timeout", + ), ]; - let get_slice_timeout_all_timeouts = - get_slice_timeout_outcome_total_vec.with_label_values(["all_timeouts"]); - - let object_storage_requests_total = new_counter_vec( - "object_storage_requests_total", - "Total number of object storage requests performed.", - "storage", - &[], - ["action"], + let get_slice_timeout_all_timeouts = counter!( + parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, + "outcome" => "all_timeouts", ); - let object_storage_delete_requests_total = - object_storage_requests_total.with_label_values(["delete_object"]); - let object_storage_bulk_delete_requests_total = - object_storage_requests_total.with_label_values(["delete_objects"]); - - let object_storage_request_duration = new_histogram_vec( - "object_storage_request_duration_seconds", - "Duration of object storage requests in seconds.", - "storage", - &[], - ["action"], - vec![0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0], + + let object_storage_delete_requests_total = counter!( + parent: &*OBJECT_STORAGE_REQUESTS_TOTAL, + "action" => "delete_object", + ); + let object_storage_bulk_delete_requests_total = counter!( + parent: &*OBJECT_STORAGE_REQUESTS_TOTAL, + "action" => "delete_objects", + ); + + let object_storage_delete_request_duration = histogram!( + parent: &*OBJECT_STORAGE_REQUEST_DURATION, + "action" => "delete_object", + ); + let object_storage_bulk_delete_request_duration = histogram!( + parent: &*OBJECT_STORAGE_REQUEST_DURATION, + "action" => "delete_objects", ); - let object_storage_delete_request_duration = - object_storage_request_duration.with_label_values(["delete_object"]); - let object_storage_bulk_delete_request_duration = - object_storage_request_duration.with_label_values(["delete_objects"]); StorageMetrics { fast_field_cache: CacheMetrics::for_component("fastfields"), @@ -102,58 +184,16 @@ impl Default for StorageMetrics { split_footer_cache: CacheMetrics::for_component("splitfooter"), get_slice_timeout_successes, get_slice_timeout_all_timeouts, - object_storage_get_total: new_counter( - "object_storage_gets_total", - "Number of objects fetched. Might be lower than get_slice_timeout_outcome if \ - queries are debounced.", - "storage", - &[], - ), - object_storage_get_errors_total: new_counter_vec::<1>( - "object_storage_get_errors_total", - "Number of GetObject errors.", - "storage", - &[], - ["code"], - ), - object_storage_get_slice_in_flight_count: new_gauge( - "object_storage_get_slice_in_flight_count", - "Number of GetObject for which the memory was allocated but the download is still \ - in progress.", - "storage", - &[], - ), - object_storage_get_slice_in_flight_num_bytes: new_gauge( - "object_storage_get_slice_in_flight_num_bytes", - "Memory allocated for GetObject requests that are still in progress.", - "storage", - &[], - ), - object_storage_put_total: new_counter( - "object_storage_puts_total", - "Number of objects uploaded. May differ from object_storage_requests_parts due to \ - multipart upload.", - "storage", - &[], - ), - object_storage_put_parts: new_counter( - "object_storage_puts_parts", - "Number of object parts uploaded.", - "", - &[], - ), - object_storage_download_num_bytes: new_counter( - "object_storage_download_num_bytes", - "Amount of data downloaded from an object storage.", - "storage", - &[], - ), - object_storage_upload_num_bytes: new_counter( - "object_storage_upload_num_bytes", - "Amount of data uploaded to an object storage.", - "storage", - &[], - ), + object_storage_get_total: OBJECT_STORAGE_GET_TOTAL.clone(), + object_storage_get_errors_total: OBJECT_STORAGE_GET_ERRORS_TOTAL.clone(), + object_storage_get_slice_in_flight_count: OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT + .clone(), + object_storage_get_slice_in_flight_num_bytes: + OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES.clone(), + object_storage_put_total: OBJECT_STORAGE_PUT_TOTAL.clone(), + object_storage_put_parts: OBJECT_STORAGE_PUT_PARTS.clone(), + object_storage_download_num_bytes: OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.clone(), + object_storage_upload_num_bytes: OBJECT_STORAGE_UPLOAD_NUM_BYTES.clone(), object_storage_delete_requests_total, object_storage_bulk_delete_requests_total, object_storage_delete_request_duration, @@ -171,63 +211,47 @@ pub struct CacheMetrics { #[derive(Clone)] pub struct SingleCacheMetrics { - pub in_cache_count: IntGauge, - pub in_cache_num_bytes: IntGauge, - pub hits_num_items: IntCounter, - pub hits_num_bytes: IntCounter, - pub misses_num_items: IntCounter, - pub evict_num_items: IntCounter, - pub evict_num_bytes: IntCounter, + pub in_cache_count: Gauge, + pub in_cache_num_bytes: Gauge, + pub hits_num_items: Counter, + pub hits_num_bytes: Counter, + pub misses_num_items: Counter, + pub evict_num_items: Counter, + pub evict_num_bytes: Counter, } impl CacheMetrics { pub fn for_component(component_name: &str) -> Self { - const CACHE_METRICS_NAMESPACE: &str = "cache"; - let labels = [("component_name", component_name)]; CacheMetrics { component_name: component_name.to_string(), cache_metrics: SingleCacheMetrics { - in_cache_count: new_gauge( - "in_cache_count", - "Count of in cache by component", - CACHE_METRICS_NAMESPACE, - &labels, + in_cache_count: gauge!( + parent: &*CACHE_IN_CACHE_COUNT, + "component_name" => component_name.to_string(), ), - in_cache_num_bytes: new_gauge( - "in_cache_num_bytes", - "Number of bytes in cache by component", - CACHE_METRICS_NAMESPACE, - &labels, + in_cache_num_bytes: gauge!( + parent: &*CACHE_IN_CACHE_NUM_BYTES, + "component_name" => component_name.to_string(), ), - hits_num_items: new_counter( - "cache_hits_total", - "Number of cache hits by component", - CACHE_METRICS_NAMESPACE, - &labels, + hits_num_items: counter!( + parent: &*CACHE_HITS_TOTAL, + "component_name" => component_name.to_string(), ), - hits_num_bytes: new_counter( - "cache_hits_bytes", - "Number of cache hits in bytes by component", - CACHE_METRICS_NAMESPACE, - &labels, + hits_num_bytes: counter!( + parent: &*CACHE_HITS_BYTES, + "component_name" => component_name.to_string(), ), - misses_num_items: new_counter( - "cache_misses_total", - "Number of cache misses by component", - CACHE_METRICS_NAMESPACE, - &labels, + misses_num_items: counter!( + parent: &*CACHE_MISSES_TOTAL, + "component_name" => component_name.to_string(), ), - evict_num_items: new_counter( - "cache_evict_total", - "Number of cache entry evicted by component", - CACHE_METRICS_NAMESPACE, - &labels, + evict_num_items: counter!( + parent: &*CACHE_EVICT_TOTAL, + "component_name" => component_name.to_string(), ), - evict_num_bytes: new_counter( - "cache_evict_bytes", - "Number of cache entry evicted in bytes by component", - CACHE_METRICS_NAMESPACE, - &labels, + evict_num_bytes: counter!( + parent: &*CACHE_EVICT_BYTES, + "component_name" => component_name.to_string(), ), }, virtual_caches_metrics: RwLock::default(), @@ -240,56 +264,50 @@ impl CacheMetrics { return virtual_cache_metrics.clone(); } - const CACHE_METRICS_NAMESPACE: &str = "cache"; let capacity = config.capacity().as_u64().to_string(); let policy = config.policy().to_string(); - let labels = [ - ("component_name", self.component_name.as_str()), - ("capacity", &capacity), - ("policy", &policy), - ]; let new_virtual_cache_metrics = SingleCacheMetrics { - in_cache_count: new_gauge( - "virtual_in_cache_count", - "Count of in cache by component", - CACHE_METRICS_NAMESPACE, - &labels, + in_cache_count: gauge!( + parent: &*VIRTUAL_CACHE_IN_CACHE_COUNT, + "component_name" => self.component_name.clone(), + "capacity" => capacity.clone(), + "policy" => policy.clone(), ), - in_cache_num_bytes: new_gauge( - "virtual_in_cache_num_bytes", - "Number of bytes in cache by component", - CACHE_METRICS_NAMESPACE, - &labels, + in_cache_num_bytes: gauge!( + parent: &*VIRTUAL_CACHE_IN_CACHE_NUM_BYTES, + "component_name" => self.component_name.clone(), + "capacity" => capacity.clone(), + "policy" => policy.clone(), ), - hits_num_items: new_counter( - "virtual_cache_hits_total", - "Number of cache hits by component", - CACHE_METRICS_NAMESPACE, - &labels, + hits_num_items: counter!( + parent: &*VIRTUAL_CACHE_HITS_TOTAL, + "component_name" => self.component_name.clone(), + "capacity" => capacity.clone(), + "policy" => policy.clone(), ), - hits_num_bytes: new_counter( - "virtual_cache_hits_bytes", - "Number of cache hits in bytes by component", - CACHE_METRICS_NAMESPACE, - &labels, + hits_num_bytes: counter!( + parent: &*VIRTUAL_CACHE_HITS_BYTES, + "component_name" => self.component_name.clone(), + "capacity" => capacity.clone(), + "policy" => policy.clone(), ), - misses_num_items: new_counter( - "virtual_cache_misses_total", - "Number of cache misses by component", - CACHE_METRICS_NAMESPACE, - &labels, + misses_num_items: counter!( + parent: &*VIRTUAL_CACHE_MISSES_TOTAL, + "component_name" => self.component_name.clone(), + "capacity" => capacity.clone(), + "policy" => policy.clone(), ), - evict_num_items: new_counter( - "virtual_cache_evict_total", - "Number of cache entry evicted by component", - CACHE_METRICS_NAMESPACE, - &labels, + evict_num_items: counter!( + parent: &*VIRTUAL_CACHE_EVICT_TOTAL, + "component_name" => self.component_name.clone(), + "capacity" => capacity.clone(), + "policy" => policy.clone(), ), - evict_num_bytes: new_counter( - "virtual_cache_evict_bytes", - "Number of cache entry evicted in bytes by component", - CACHE_METRICS_NAMESPACE, - &labels, + evict_num_bytes: counter!( + parent: &*VIRTUAL_CACHE_EVICT_BYTES, + "component_name" => self.component_name.clone(), + "capacity" => capacity, + "policy" => policy, ), }; @@ -302,6 +320,132 @@ impl CacheMetrics { } } +static CACHE_IN_CACHE_COUNT: LazyLock = LazyLock::new(|| { + gauge!( + name: "in_cache_count", + description: "Count of in cache by component", + subsystem: "cache", + observable: true, + ) +}); + +static CACHE_IN_CACHE_NUM_BYTES: LazyLock = LazyLock::new(|| { + gauge!( + name: "in_cache_num_bytes", + description: "Number of bytes in cache by component", + subsystem: "cache", + observable: true, + ) +}); + +static CACHE_HITS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "cache_hits_total", + description: "Number of cache hits by component", + subsystem: "cache", + observable: true, + ) +}); + +static CACHE_HITS_BYTES: LazyLock = LazyLock::new(|| { + counter!( + name: "cache_hits_bytes", + description: "Number of cache hits in bytes by component", + subsystem: "cache", + observable: true, + ) +}); + +static CACHE_MISSES_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "cache_misses_total", + description: "Number of cache misses by component", + subsystem: "cache", + observable: true, + ) +}); + +static CACHE_EVICT_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "cache_evict_total", + description: "Number of cache entry evicted by component", + subsystem: "cache", + observable: true, + ) +}); + +static CACHE_EVICT_BYTES: LazyLock = LazyLock::new(|| { + counter!( + name: "cache_evict_bytes", + description: "Number of cache entry evicted in bytes by component", + subsystem: "cache", + observable: true, + ) +}); + +static VIRTUAL_CACHE_IN_CACHE_COUNT: LazyLock = LazyLock::new(|| { + gauge!( + name: "virtual_in_cache_count", + description: "Count of in cache by component", + subsystem: "cache", + observable: true, + ) +}); + +static VIRTUAL_CACHE_IN_CACHE_NUM_BYTES: LazyLock = LazyLock::new(|| { + gauge!( + name: "virtual_in_cache_num_bytes", + description: "Number of bytes in cache by component", + subsystem: "cache", + observable: true, + ) +}); + +static VIRTUAL_CACHE_HITS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "virtual_cache_hits_total", + description: "Number of cache hits by component", + subsystem: "cache", + observable: true, + ) +}); + +static VIRTUAL_CACHE_HITS_BYTES: LazyLock = LazyLock::new(|| { + counter!( + name: "virtual_cache_hits_bytes", + description: "Number of cache hits in bytes by component", + subsystem: "cache", + observable: true, + ) +}); + +static VIRTUAL_CACHE_MISSES_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "virtual_cache_misses_total", + description: "Number of cache misses by component", + subsystem: "cache", + observable: true, + ) +}); + +static VIRTUAL_CACHE_EVICT_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + name: "virtual_cache_evict_total", + description: "Number of cache entry evicted by component", + subsystem: "cache", + observable: true, + ) +}); + +static VIRTUAL_CACHE_EVICT_BYTES: LazyLock = LazyLock::new(|| { + counter!( + name: "virtual_cache_evict_bytes", + description: "Number of cache entry evicted in bytes by component", + subsystem: "cache", + observable: true, + ) +}); + /// Storage counters exposes a bunch a set of storage/cache related metrics through a prometheus /// endpoint. pub static STORAGE_METRICS: LazyLock = LazyLock::new(StorageMetrics::default); @@ -312,11 +456,11 @@ pub static CACHE_METRICS_FOR_TESTS: LazyLock = pub fn object_storage_get_slice_in_flight_guards( get_request_size: usize, -) -> (GaugeGuard<'static>, GaugeGuard<'static>) { +) -> (GaugeGuard, GaugeGuard) { let mut bytes_guard = GaugeGuard::from_gauge( &crate::STORAGE_METRICS.object_storage_get_slice_in_flight_num_bytes, ); - bytes_guard.add(get_request_size as i64); + bytes_guard.add_f64(get_request_size as f64); let mut count_guard = GaugeGuard::from_gauge(&crate::STORAGE_METRICS.object_storage_get_slice_in_flight_count); count_guard.add(1); diff --git a/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs b/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs index e10ec20b7c6..6540e20ee91 100644 --- a/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs +++ b/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs @@ -240,10 +240,10 @@ impl AzureBlobStorage { name: &'a str, payload: Box, ) -> StorageResult<()> { - crate::STORAGE_METRICS.object_storage_put_parts.inc(); + crate::STORAGE_METRICS.object_storage_put_parts.increment(1); crate::STORAGE_METRICS .object_storage_upload_num_bytes - .inc_by(payload.len()); + .increment(payload.len()); retry(&self.retry_params, || async { let data = Bytes::from(payload.read_all().await?.to_vec()); let hash = azure_storage_blobs::prelude::Hash::from(md5::compute(&data[..]).0); @@ -276,10 +276,10 @@ impl AzureBlobStorage { .map(|(num, range)| { let moved_blob_client = blob_client.clone(); let moved_payload = payload.clone(); - crate::STORAGE_METRICS.object_storage_put_parts.inc(); + crate::STORAGE_METRICS.object_storage_put_parts.increment(1); crate::STORAGE_METRICS .object_storage_upload_num_bytes - .inc_by(range.end - range.start); + .increment(range.end - range.start); async move { retry(&self.retry_params, || async { // zero pad block ids to make them sortable as strings @@ -349,7 +349,7 @@ impl Storage for AzureBlobStorage { path: &Path, payload: Box, ) -> crate::StorageResult<()> { - crate::STORAGE_METRICS.object_storage_put_total.inc(); + crate::STORAGE_METRICS.object_storage_put_total.increment(1); let name = self.blob_name(path); let total_len = payload.len(); let part_num_bytes = self.multipart_policy.part_num_bytes(total_len); @@ -378,7 +378,7 @@ impl Storage for AzureBlobStorage { let num_bytes_copied = tokio::io::copy_buf(&mut body_stream_reader, output).await?; STORAGE_METRICS .object_storage_download_num_bytes - .inc_by(num_bytes_copied); + .increment(num_bytes_copied); } output.flush().await?; Ok(()) @@ -579,7 +579,7 @@ async fn download_all( } crate::STORAGE_METRICS .object_storage_download_num_bytes - .inc_by(total_num_bytes as u64); + .increment(total_num_bytes as u64); Ok(coalesce_segments(segments, total_num_bytes)) } diff --git a/quickwit/quickwit-storage/src/object_storage/error.rs b/quickwit/quickwit-storage/src/object_storage/error.rs index 5f60fe1f944..04ca9ec5efe 100644 --- a/quickwit/quickwit-storage/src/object_storage/error.rs +++ b/quickwit/quickwit-storage/src/object_storage/error.rs @@ -22,6 +22,7 @@ use aws_sdk_s3::operation::get_object::GetObjectError; use aws_sdk_s3::operation::head_object::HeadObjectError; use aws_sdk_s3::operation::put_object::PutObjectError; use aws_sdk_s3::operation::upload_part::UploadPartError; +use quickwit_common::metrics::counter; use crate::{StorageError, StorageErrorKind}; @@ -62,11 +63,12 @@ pub trait ToStorageErrorKind { impl ToStorageErrorKind for GetObjectError { fn to_storage_error_kind(&self) -> StorageErrorKind { - let error_code = self.code().unwrap_or("unknown"); - crate::STORAGE_METRICS - .object_storage_get_errors_total - .with_label_values([error_code]) - .inc(); + let error_code = self.code().unwrap_or("unknown").to_string(); + counter!( + parent: &crate::STORAGE_METRICS.object_storage_get_errors_total, + "code" => error_code, + ) + .increment(1); match self { GetObjectError::InvalidObjectState(_) => StorageErrorKind::Service, GetObjectError::NoSuchKey(_) => StorageErrorKind::NotFound, diff --git a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs index aede14b7e21..42700456775 100644 --- a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs +++ b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs @@ -300,10 +300,10 @@ impl S3CompatibleObjectStorage { .await .map_err(|io_error| Retry::Permanent(StorageError::from(io_error)))?; - crate::STORAGE_METRICS.object_storage_put_parts.inc(); + crate::STORAGE_METRICS.object_storage_put_parts.increment(1); crate::STORAGE_METRICS .object_storage_upload_num_bytes - .inc_by(len); + .increment(len); self.s3_client .put_object() @@ -435,10 +435,10 @@ impl S3CompatibleObjectStorage { .map_err(Retry::Permanent)?; let md5 = BASE64_STANDARD.encode(part.md5.0); - crate::STORAGE_METRICS.object_storage_put_parts.inc(); + crate::STORAGE_METRICS.object_storage_put_parts.increment(1); crate::STORAGE_METRICS .object_storage_upload_num_bytes - .inc_by(part.len()); + .increment(part.len()); let upload_part_output = self .s3_client @@ -558,7 +558,7 @@ impl S3CompatibleObjectStorage { let key = self.key(path); let range_str = range_opt.map(|range| format!("bytes={}-{}", range.start, range.end - 1)); - crate::STORAGE_METRICS.object_storage_get_total.inc(); + crate::STORAGE_METRICS.object_storage_get_total.increment(1); let get_object_output = self .s3_client @@ -653,7 +653,7 @@ impl S3CompatibleObjectStorage { aws_retry(&self.retry_params, || async { crate::STORAGE_METRICS .object_storage_bulk_delete_requests_total - .inc(); + .increment(1); let _timer = crate::STORAGE_METRICS .object_storage_bulk_delete_request_duration .start_timer(); @@ -734,7 +734,7 @@ async fn download_all(byte_stream: ByteStream) -> StorageResult { let bytes = aggregated.into_bytes(); STORAGE_METRICS .object_storage_download_num_bytes - .inc_by(bytes.len() as u64); + .increment(bytes.len() as u64); Ok(bytes) } @@ -774,7 +774,7 @@ impl Storage for S3CompatibleObjectStorage { path: &Path, payload: Box, ) -> crate::StorageResult<()> { - crate::STORAGE_METRICS.object_storage_put_total.inc(); + crate::STORAGE_METRICS.object_storage_put_total.increment(1); let _permit = REQUEST_SEMAPHORE.acquire().await; let key = self.key(path); let total_len = payload.len(); @@ -796,7 +796,7 @@ impl Storage for S3CompatibleObjectStorage { let num_bytes_copied = tokio::io::copy_buf(&mut body_read, output).await?; STORAGE_METRICS .object_storage_download_num_bytes - .inc_by(num_bytes_copied); + .increment(num_bytes_copied); output.flush().await?; Ok(()) } @@ -808,7 +808,7 @@ impl Storage for S3CompatibleObjectStorage { let delete_res = aws_retry(&self.retry_params, || async { crate::STORAGE_METRICS .object_storage_delete_requests_total - .inc(); + .increment(1); let _timer = crate::STORAGE_METRICS .object_storage_delete_request_duration .start_timer(); diff --git a/quickwit/quickwit-storage/src/opendal_storage/base.rs b/quickwit/quickwit-storage/src/opendal_storage/base.rs index 9bdb21378c9..e1e9f2eeba8 100644 --- a/quickwit/quickwit-storage/src/opendal_storage/base.rs +++ b/quickwit/quickwit-storage/src/opendal_storage/base.rs @@ -80,7 +80,7 @@ impl Storage for OpendalStorage { } async fn put(&self, path: &Path, payload: Box) -> StorageResult<()> { - crate::STORAGE_METRICS.object_storage_put_total.inc(); + crate::STORAGE_METRICS.object_storage_put_total.increment(1); let path = path.as_os_str().to_string_lossy(); let mut payload_reader = payload.byte_stream().await?.into_async_read(); @@ -95,7 +95,7 @@ impl Storage for OpendalStorage { storage_writer.get_mut().close().await?; crate::STORAGE_METRICS .object_storage_upload_num_bytes - .inc_by(payload.len()); + .increment(payload.len()); Ok(()) } @@ -111,7 +111,7 @@ impl Storage for OpendalStorage { let num_bytes_copied = tokio::io::copy(&mut storage_reader, output).await?; crate::STORAGE_METRICS .object_storage_download_num_bytes - .inc_by(num_bytes_copied); + .increment(num_bytes_copied); output.flush().await?; Ok(()) } @@ -123,7 +123,7 @@ impl Storage for OpendalStorage { // Unlike other object store implementations, in flight requests are // recorded before issuing the query to the object store. let _inflight_guards = object_storage_get_slice_in_flight_guards(size); - crate::STORAGE_METRICS.object_storage_get_total.inc(); + crate::STORAGE_METRICS.object_storage_get_total.increment(1); // `Buffer::to_bytes` is zero-copy when the underlying buffer is contiguous, and coalesces // into a single `Bytes` otherwise — avoiding the extra `Vec` round-trip `to_vec` would // perform. @@ -158,7 +158,7 @@ impl Storage for OpendalStorage { let path = path.as_os_str().to_string_lossy(); crate::STORAGE_METRICS .object_storage_delete_requests_total - .inc(); + .increment(1); let _timer = crate::STORAGE_METRICS .object_storage_delete_request_duration .start_timer(); @@ -177,7 +177,7 @@ impl Storage for OpendalStorage { for (index, path) in paths.iter().enumerate() { crate::STORAGE_METRICS .object_storage_bulk_delete_requests_total - .inc(); + .increment(1); let _timer = crate::STORAGE_METRICS .object_storage_bulk_delete_request_duration .start_timer(); diff --git a/quickwit/quickwit-storage/src/split_cache/mod.rs b/quickwit/quickwit-storage/src/split_cache/mod.rs index 7417471af58..5979ef258d2 100644 --- a/quickwit/quickwit-storage/src/split_cache/mod.rs +++ b/quickwit/quickwit-storage/src/split_cache/mod.rs @@ -219,10 +219,10 @@ impl SplitCacheBackingStorage { fn record_hit_metrics(&self, result_opt: Option<&OwnedBytes>) { let split_metrics = &crate::STORAGE_METRICS.searcher_split_cache.cache_metrics; if let Some(result) = result_opt { - split_metrics.hits_num_items.inc(); - split_metrics.hits_num_bytes.inc_by(result.len() as u64); + split_metrics.hits_num_items.increment(1); + split_metrics.hits_num_bytes.increment(result.len() as u64); } else { - split_metrics.misses_num_items.inc(); + split_metrics.misses_num_items.increment(1); } } } diff --git a/quickwit/quickwit-storage/src/split_cache/split_table.rs b/quickwit/quickwit-storage/src/split_cache/split_table.rs index 3546aeff23f..943a5101acf 100644 --- a/quickwit/quickwit-storage/src/split_cache/split_table.rs +++ b/quickwit/quickwit-storage/src/split_cache/split_table.rs @@ -156,22 +156,22 @@ impl SplitTable { .searcher_split_cache .cache_metrics .in_cache_count - .dec(); + .decrement(1.0); crate::metrics::STORAGE_METRICS .searcher_split_cache .cache_metrics .in_cache_num_bytes - .sub(num_bytes as i64); + .decrement(num_bytes as f64); crate::metrics::STORAGE_METRICS .searcher_split_cache .cache_metrics .evict_num_items - .inc(); + .increment(1); crate::metrics::STORAGE_METRICS .searcher_split_cache .cache_metrics .evict_num_bytes - .inc_by(num_bytes); + .increment(num_bytes); &mut self.on_disk_splits } }; @@ -224,12 +224,12 @@ impl SplitTable { .searcher_split_cache .cache_metrics .in_cache_count - .inc(); + .increment(1.0); crate::metrics::STORAGE_METRICS .searcher_split_cache .cache_metrics .in_cache_num_bytes - .add(num_bytes as i64); + .increment(num_bytes as f64); self.on_disk_splits.insert(split_info.split_key) } }; diff --git a/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs b/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs index f8c665217c5..a51a74a04e9 100644 --- a/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs +++ b/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs @@ -102,7 +102,7 @@ impl Storage for TimeoutAndRetryStorage { .get(attempt_id) .or(crate::STORAGE_METRICS.get_slice_timeout_successes.last()) .unwrap() - .inc(); + .increment(1); return result; } Err(_elapsed) => { @@ -112,7 +112,9 @@ impl Storage for TimeoutAndRetryStorage { } } rate_limited_warn!(limit_per_min=60, num_bytes=num_bytes, path=%path.display(), "all get_slice attempts timeouted"); - crate::STORAGE_METRICS.get_slice_timeout_all_timeouts.inc(); + crate::STORAGE_METRICS + .get_slice_timeout_all_timeouts + .increment(1); return Err( StorageErrorKind::Timeout.with_error(anyhow::anyhow!("internal timeout on get_slice")) ); From 9d57dcf177931138ed0fdc9658cf0bc936b72ab9 Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Thu, 30 Apr 2026 10:36:02 +0200 Subject: [PATCH 04/54] Add Prometheus metrics recorder --- quickwit/Cargo.lock | 2 + quickwit/quickwit-cli/Cargo.toml | 1 + quickwit/quickwit-cli/src/logger.rs | 49 ++++++++++++++-- quickwit/quickwit-cli/src/main.rs | 8 ++- quickwit/quickwit-common/Cargo.toml | 1 + quickwit/quickwit-common/src/metrics/mod.rs | 9 ++- .../quickwit-common/src/metrics/prometheus.rs | 56 +++++++++++++++++++ .../quickwit-common/src/metrics/quickwit.rs | 4 -- quickwit/quickwit-common/src/metrics/tests.rs | 21 +++++++ 9 files changed, 134 insertions(+), 17 deletions(-) create mode 100644 quickwit/quickwit-common/src/metrics/prometheus.rs diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 298ca6cc003..c41d3f86b76 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -8348,6 +8348,7 @@ dependencies = [ "metrics-exporter-dogstatsd", "metrics-exporter-otel", "metrics-exporter-prometheus", + "metrics-util", "numfmt", "openssl-probe 0.1.6", "opentelemetry", @@ -8478,6 +8479,7 @@ dependencies = [ "inventory", "itertools 0.14.0", "metrics", + "metrics-exporter-prometheus", "metrics-util", "pin-project", "pnet", diff --git a/quickwit/quickwit-cli/Cargo.toml b/quickwit/quickwit-cli/Cargo.toml index 17a8db55bf9..5b68573e2a9 100644 --- a/quickwit/quickwit-cli/Cargo.toml +++ b/quickwit/quickwit-cli/Cargo.toml @@ -59,6 +59,7 @@ metrics = { workspace = true } metrics-exporter-dogstatsd = { workspace = true } metrics-exporter-otel = { workspace = true } metrics-exporter-prometheus = { workspace = true } +metrics-util = { workspace = true } quickwit-actors = { workspace = true } quickwit-cluster = { workspace = true } diff --git a/quickwit/quickwit-cli/src/logger.rs b/quickwit/quickwit-cli/src/logger.rs index d1c994e893b..880757f894f 100644 --- a/quickwit/quickwit-cli/src/logger.rs +++ b/quickwit/quickwit-cli/src/logger.rs @@ -17,6 +17,12 @@ use std::sync::Arc; use std::{env, fmt}; use anyhow::Context; +#[cfg(not(test))] +use metrics_exporter_dogstatsd::DogStatsDRecorder; +#[cfg(not(test))] +use metrics_exporter_prometheus::{Matcher, PrometheusBuilder, PrometheusRecorder}; +#[cfg(not(test))] +use metrics_util::{MetricKindMask, layers::RouterBuilder}; use opentelemetry::trace::TracerProvider; use opentelemetry::{KeyValue, global}; use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; @@ -238,9 +244,42 @@ pub fn setup_logging_and_tracing( )) } -/// Set up DogStatsD metrics exporter and invariant recorder. +/// Set up the global metrics recorder and invariant recorder. #[cfg(not(test))] pub fn setup_metrics(build_info: &BuildInfo) -> anyhow::Result<()> { + let prometheus_recorder = install_prometheus_recorder()?; + let dogstatsd_recorder = install_dogstatsd_recorder(build_info)?; + + let mut router = RouterBuilder::from_recorder(metrics::NoopRecorder); + router + .add_route(MetricKindMask::ALL, "quickwit_", prometheus_recorder) + .add_route(MetricKindMask::ALL, "pomsky.invariant.", dogstatsd_recorder); + let recorder = router.build(); + metrics::set_global_recorder(recorder) + .map_err(|_| anyhow::anyhow!("failed to install global metrics recorder"))?; + quickwit_common::metrics::describe_metrics(); + Ok(()) +} + +#[cfg(not(test))] +fn install_prometheus_recorder() -> anyhow::Result { + let mut prometheus_builder = PrometheusBuilder::new(); + for (name, buckets) in quickwit_common::metrics::histogram_buckets() { + prometheus_builder = prometheus_builder + .set_buckets_for_metric(Matcher::Full(name.to_string()), &buckets) + .with_context(|| { + format!("failed to configure Prometheus histogram buckets for `{name}`") + })?; + } + let prometheus_recorder = prometheus_builder.build_recorder(); + let prometheus_handle = prometheus_recorder.handle(); + quickwit_common::metrics::set_prometheus_handle(prometheus_handle.clone()) + .map_err(anyhow::Error::msg)?; + Ok(prometheus_recorder) +} + +#[cfg(not(test))] +fn install_dogstatsd_recorder(build_info: &BuildInfo) -> anyhow::Result { // Reading both `CLOUDPREM_*` and `CP_*` env vars for backward compatibility. The former is // deprecated and can be removed after 2026-04-01. let host: String = quickwit_common::get_from_env_opt("CLOUDPREM_DOGSTATSD_SERVER_HOST", false) @@ -270,15 +309,15 @@ pub fn setup_metrics(build_info: &BuildInfo) -> anyhow::Result<()> { global_labels.push(::metrics::Label::new(label_key, label_val)); } } - metrics_exporter_dogstatsd::DogStatsDBuilder::default() + let recorder = metrics_exporter_dogstatsd::DogStatsDBuilder::default() .set_global_prefix("cloudprem") .with_global_labels(global_labels) .with_remote_address(addr) .context("failed to parse DogStatsD server address")? - .install() - .context("failed to register DogStatsD exporter")?; + .build() + .context("failed to build DogStatsD exporter")?; quickwit_dst::invariants::set_invariant_recorder(invariant_recorder); - Ok(()) + Ok(recorder) } #[cfg(not(test))] diff --git a/quickwit/quickwit-cli/src/main.rs b/quickwit/quickwit-cli/src/main.rs index 8828398edba..aa455a9db97 100644 --- a/quickwit/quickwit-cli/src/main.rs +++ b/quickwit/quickwit-cli/src/main.rs @@ -40,6 +40,11 @@ fn get_main_runtime_num_threads() -> usize { } fn main() -> anyhow::Result<()> { + #[cfg(not(test))] + let build_info = BuildInfo::get(); + #[cfg(not(test))] + quickwit_cli::logger::setup_metrics(build_info)?; + let main_runtime_num_threads: usize = get_main_runtime_num_threads(); let rt = tokio::runtime::Builder::new_multi_thread() .enable_all() @@ -101,9 +106,6 @@ async fn main_impl() -> anyhow::Result<()> { let (env_filter_reload_fn, tracer_provider_opt) = setup_logging_and_tracing(command.default_log_level(), ansi_colors, build_info)?; - #[cfg(not(test))] - quickwit_cli::logger::setup_metrics(build_info)?; - let return_code: i32 = if let Err(command_error) = command.execute(env_filter_reload_fn).await { error!(error=%command_error, "command failed"); eprintln!( diff --git a/quickwit/quickwit-common/Cargo.toml b/quickwit/quickwit-common/Cargo.toml index 9ed6850b695..5c3609f27b2 100644 --- a/quickwit/quickwit-common/Cargo.toml +++ b/quickwit/quickwit-common/Cargo.toml @@ -32,6 +32,7 @@ hyper-util = { workspace = true, optional = true } itertools = { workspace = true } inventory = { workspace = true } metrics = { workspace = true } +metrics-exporter-prometheus = { workspace = true } metrics-util = { workspace = true } pin-project = { workspace = true } pnet = { workspace = true } diff --git a/quickwit/quickwit-common/src/metrics/mod.rs b/quickwit/quickwit-common/src/metrics/mod.rs index 18c9db29bb9..195b6bd06ac 100644 --- a/quickwit/quickwit-common/src/metrics/mod.rs +++ b/quickwit/quickwit-common/src/metrics/mod.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub use ::prometheus::{exponential_buckets, linear_buckets}; #[doc(hidden)] pub use atomic_float as __atomic_float; #[doc(hidden)] @@ -22,11 +23,11 @@ pub use inventory as __inventory; pub use metrics as __metrics; pub use metrics::{CounterFn, GaugeFn, HistogramFn}; pub use metrics_util::MetricKind; -pub use prometheus::{exponential_buckets, linear_buckets}; mod counter; mod gauge; mod histogram; +mod prometheus; mod quickwit; pub use counter::Counter; @@ -36,10 +37,8 @@ pub use counter::CounterShadow; pub use gauge::GaugeShadow; pub use gauge::{Gauge, GaugeGuard}; pub use histogram::{Histogram, HistogramConfig, HistogramTimer}; -pub use quickwit::{ - InFlightDataGauges, MEMORY_METRICS, MemoryMetrics, index_label, metrics_text_payload, - register_info, -}; +pub use prometheus::{metrics_text_payload, set_prometheus_handle}; +pub use quickwit::{InFlightDataGauges, MEMORY_METRICS, MemoryMetrics, index_label, register_info}; #[cfg(test)] mod tests; diff --git a/quickwit/quickwit-common/src/metrics/prometheus.rs b/quickwit/quickwit-common/src/metrics/prometheus.rs new file mode 100644 index 00000000000..5b45bd29138 --- /dev/null +++ b/quickwit/quickwit-common/src/metrics/prometheus.rs @@ -0,0 +1,56 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::OnceLock; +#[cfg(not(test))] +use std::time::Duration; + +use metrics_exporter_prometheus::PrometheusHandle; + +static PROMETHEUS_HANDLE: OnceLock = OnceLock::new(); + +pub fn set_prometheus_handle(handle: PrometheusHandle) -> Result<(), String> { + #[cfg(not(test))] + let upkeep_handle = handle.clone(); + PROMETHEUS_HANDLE + .set(handle) + .map_err(|_| "Prometheus metrics renderer is already installed".to_string())?; + #[cfg(not(test))] + spawn_prometheus_upkeep(upkeep_handle)?; + Ok(()) +} + +pub fn metrics_text_payload() -> Result { + let handle = PROMETHEUS_HANDLE + .get() + .ok_or_else(|| "Prometheus metrics rendering is not installed yet".to_string())?; + Ok(handle.render()) +} + +#[cfg(not(test))] +fn spawn_prometheus_upkeep(handle: PrometheusHandle) -> Result<(), String> { + // Quickwit serves the existing `/metrics` route itself, so we build only the + // Prometheus recorder instead of using the exporter's HTTP listener. That lower-level + // API does not spawn the upkeep task that periodically drains histogram buffers. + std::thread::Builder::new() + .name("metrics-exporter-prometheus-upkeep".to_string()) + .spawn(move || { + loop { + std::thread::sleep(Duration::from_secs(5)); + handle.run_upkeep(); + } + }) + .map(|_| ()) + .map_err(|error| format!("failed to spawn Prometheus metrics upkeep thread: {error}")) +} diff --git a/quickwit/quickwit-common/src/metrics/quickwit.rs b/quickwit/quickwit-common/src/metrics/quickwit.rs index 9254a819be8..df522935d6d 100644 --- a/quickwit/quickwit-common/src/metrics/quickwit.rs +++ b/quickwit/quickwit-common/src/metrics/quickwit.rs @@ -32,10 +32,6 @@ pub fn register_info(name: &'static str, help: &'static str, kvs: BTreeMap<&'sta }); } -pub fn metrics_text_payload() -> Result { - Err("Prometheus metrics rendering is not installed yet".to_string()) -} - #[derive(Clone)] pub struct MemoryMetrics { pub active_bytes: Gauge, diff --git a/quickwit/quickwit-common/src/metrics/tests.rs b/quickwit/quickwit-common/src/metrics/tests.rs index 9ec338f76c0..76d590f9f60 100644 --- a/quickwit/quickwit-common/src/metrics/tests.rs +++ b/quickwit/quickwit-common/src/metrics/tests.rs @@ -13,6 +13,7 @@ // limitations under the License. use metrics::with_local_recorder; +use metrics_exporter_prometheus::PrometheusBuilder; use metrics_util::debugging::{DebugValue, DebuggingRecorder}; use super::*; @@ -366,3 +367,23 @@ fn describe_metrics_sets_debugging_recorder_description() { .expect("described counter should be recorded"); assert_eq!(description.as_deref(), Some("described counter")); } + +#[test] +fn metrics_text_payload_renders_prometheus_handle() { + let recorder = PrometheusBuilder::new().build_recorder(); + set_prometheus_handle(recorder.handle()).expect("Prometheus handle should be set once"); + + with_local_recorder(&recorder, || { + let counter = counter!( + name: "prometheus_payload_counter", + description: "prometheus payload counter", + subsystem: "metrics_tests", + ); + describe_metrics(); + counter.increment(1); + }); + + let payload = metrics_text_payload().expect("Prometheus payload should render"); + assert!(payload.contains("# HELP quickwit_metrics_tests_prometheus_payload_counter")); + assert!(payload.contains("quickwit_metrics_tests_prometheus_payload_counter 1")); +} From 029187187c37d0e31f6fd8497fcd7db0b63d92a8 Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Thu, 30 Apr 2026 13:10:08 +0200 Subject: [PATCH 05/54] Fix CLI help before metrics setup --- quickwit/quickwit-cli/src/main.rs | 39 +++++++++++++++++-------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/quickwit/quickwit-cli/src/main.rs b/quickwit/quickwit-cli/src/main.rs index aa455a9db97..dd2299ec0b2 100644 --- a/quickwit/quickwit-cli/src/main.rs +++ b/quickwit/quickwit-cli/src/main.rs @@ -40,6 +40,8 @@ fn get_main_runtime_num_threads() -> usize { } fn main() -> anyhow::Result<()> { + let (command, ansi_colors) = parse_cli_command(); + #[cfg(not(test))] let build_info = BuildInfo::get(); #[cfg(not(test))] @@ -57,7 +59,25 @@ fn main() -> anyhow::Result<()> { scrape_tokio_runtime_metrics(rt.handle(), "main"); - rt.block_on(main_impl()) + rt.block_on(main_impl(command, ansi_colors)) +} + +fn parse_cli_command() -> (CliCommand, bool) { + let about_text = about_text(); + let version_text = BuildInfo::get_version_text(); + + let app = build_cli().about(about_text).version(version_text); + let matches = app.get_matches(); + let ansi_colors = !matches.get_flag("no-color"); + + let command = match CliCommand::parse_cli_args(matches) { + Ok(command) => command, + Err(error) => { + eprintln!("failed to parse command line arguments: {error:?}"); + std::process::exit(1); + } + }; + (command, ansi_colors) } fn register_build_info_metric() { @@ -75,28 +95,13 @@ fn register_build_info_metric() { quickwit_common::metrics::register_info("build_info", "Quickwit's build info", build_kvs); } -async fn main_impl() -> anyhow::Result<()> { +async fn main_impl(command: CliCommand, ansi_colors: bool) -> anyhow::Result<()> { #[cfg(feature = "openssl-support")] unsafe { openssl_probe::init_openssl_env_vars() }; register_build_info_metric(); - let about_text = about_text(); - let version_text = BuildInfo::get_version_text(); - - let app = build_cli().about(about_text).version(version_text); - let matches = app.get_matches(); - let ansi_colors = !matches.get_flag("no-color"); - - let command = match CliCommand::parse_cli_args(matches) { - Ok(command) => command, - Err(error) => { - eprintln!("failed to parse command line arguments: {error:?}"); - std::process::exit(1); - } - }; - install_default_crypto_ring_provider(); #[cfg(feature = "jemalloc")] From 073d744f658fd87d7588276cddabbcdc7975bbf6 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Thu, 30 Apr 2026 16:18:21 +0200 Subject: [PATCH 06/54] Add quickwit-metrics crate Port the metricspp library into the quickwit workspace as a single crate with type-safe, zero-allocation metric declarations built on the metrics crate. Includes two-level caching (thread-local + global DashMap), observable counters/gauges with shadow atomics, RAII GaugeGuard, Labels templates, inventory-based metric discovery, integration tests, property-based hash tests, criterion benchmarks, the http_service example, and the inventory binary. Made-with: Cursor --- quickwit/Cargo.lock | 43 + quickwit/Cargo.toml | 5 + quickwit/quickwit-metrics/Cargo.toml | 47 + quickwit/quickwit-metrics/benches/baseline.rs | 314 ++++++ quickwit/quickwit-metrics/benches/cache.rs | 463 +++++++++ .../benches/quickwit_metrics.rs | 902 ++++++++++++++++++ quickwit/quickwit-metrics/bin/inventory.rs | 40 + .../quickwit-metrics/examples/http_service.rs | 218 +++++ .../quickwit-metrics/scripts/run_index.sh | 33 + quickwit/quickwit-metrics/src/counter.rs | 338 +++++++ quickwit/quickwit-metrics/src/gauge.rs | 378 ++++++++ quickwit/quickwit-metrics/src/histogram.rs | 289 ++++++ quickwit/quickwit-metrics/src/inner.rs | 311 ++++++ quickwit/quickwit-metrics/src/labels.rs | 117 +++ quickwit/quickwit-metrics/src/lib.rs | 373 ++++++++ quickwit/quickwit-metrics/tests/common.rs | 41 + quickwit/quickwit-metrics/tests/counter.rs | 242 +++++ quickwit/quickwit-metrics/tests/gauge.rs | 239 +++++ quickwit/quickwit-metrics/tests/histogram.rs | 118 +++ 19 files changed, 4511 insertions(+) create mode 100644 quickwit/quickwit-metrics/Cargo.toml create mode 100644 quickwit/quickwit-metrics/benches/baseline.rs create mode 100644 quickwit/quickwit-metrics/benches/cache.rs create mode 100644 quickwit/quickwit-metrics/benches/quickwit_metrics.rs create mode 100644 quickwit/quickwit-metrics/bin/inventory.rs create mode 100644 quickwit/quickwit-metrics/examples/http_service.rs create mode 100755 quickwit/quickwit-metrics/scripts/run_index.sh create mode 100644 quickwit/quickwit-metrics/src/counter.rs create mode 100644 quickwit/quickwit-metrics/src/gauge.rs create mode 100644 quickwit/quickwit-metrics/src/histogram.rs create mode 100644 quickwit/quickwit-metrics/src/inner.rs create mode 100644 quickwit/quickwit-metrics/src/labels.rs create mode 100644 quickwit/quickwit-metrics/src/lib.rs create mode 100644 quickwit/quickwit-metrics/tests/common.rs create mode 100644 quickwit/quickwit-metrics/tests/counter.rs create mode 100644 quickwit/quickwit-metrics/tests/gauge.rs create mode 100644 quickwit/quickwit-metrics/tests/histogram.rs diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index c41d3f86b76..3d604ea3aaa 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -6133,11 +6133,17 @@ checksum = "5c0ca2990f7f78a72c4000ddce186db7d1b700477426563ee851c95ea3c0d0c4" dependencies = [ "base64 0.22.1", "evmap", + "http-body-util", + "hyper 1.9.0", + "hyper-util", "indexmap 2.14.0", + "ipnet", "metrics", "metrics-util", "quanta", "thiserror 2.0.18", + "tokio", + "tracing", ] [[package]] @@ -7098,6 +7104,16 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "papaya" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "997ee03cd38c01469a7046643714f0ad28880bcb9e6679ff0666e24817ca19b7" +dependencies = [ + "equivalent", + "seize", +] + [[package]] name = "papergrid" version = "0.17.0" @@ -9025,6 +9041,23 @@ dependencies = [ "uuid", ] +[[package]] +name = "quickwit-metrics" +version = "0.8.0" +dependencies = [ + "atomic_float", + "const_format", + "criterion", + "dashmap 6.1.0", + "inventory", + "metrics", + "metrics-exporter-prometheus", + "metrics-util", + "papaya", + "proptest", + "rustc-hash", +] + [[package]] name = "quickwit-opentelemetry" version = "0.8.0" @@ -10473,6 +10506,16 @@ dependencies = [ "libc", ] +[[package]] +name = "seize" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b55fb86dfd3a2f5f76ea78310a88f96c4ea21a3031f8d212443d56123fd0521" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "semver" version = "1.0.28" diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 59d3ce85d44..3d933943f72 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -26,6 +26,7 @@ members = [ "quickwit-lambda-server", "quickwit-macros", "quickwit-metastore", + "quickwit-metrics", # Disabling metastore-utils from the quickwit projects to ease build/deps. # We can reenable it when we need it. @@ -69,6 +70,7 @@ default-members = [ "quickwit-lambda-server", "quickwit-macros", "quickwit-metastore", + "quickwit-metrics", "quickwit-opentelemetry", "quickwit-parquet-engine", "quickwit-proto", @@ -90,6 +92,7 @@ authors = ["Quickwit, Inc. "] license = "Apache-2.0" [workspace.dependencies] +ahash = "0.8" anyhow = "1" arc-swap = "1.8" arrow = { version = "58", default-features = false, features = ["ipc"] } @@ -117,6 +120,7 @@ console-subscriber = "0.5" const_format = "0.2" criterion = { version = "0.8", features = ["async_tokio"] } cron = "0.16" +dashmap = "6.1" dialoguer = { version = "0.12", default-features = false } dotenvy = "0.15" dyn-clone = "1.0" @@ -382,6 +386,7 @@ quickwit-lambda-client = { path = "quickwit-lambda-client" } quickwit-lambda-server = { path = "quickwit-lambda-server" } quickwit-macros = { path = "quickwit-macros" } quickwit-metastore = { path = "quickwit-metastore" } +quickwit-metrics = { path = "quickwit-metrics" } quickwit-opentelemetry = { path = "quickwit-opentelemetry" } quickwit-parquet-engine = { path = "quickwit-parquet-engine" } quickwit-proto = { path = "quickwit-proto" } diff --git a/quickwit/quickwit-metrics/Cargo.toml b/quickwit/quickwit-metrics/Cargo.toml new file mode 100644 index 00000000000..52bb71cfddc --- /dev/null +++ b/quickwit/quickwit-metrics/Cargo.toml @@ -0,0 +1,47 @@ +[package] +name = "quickwit-metrics" +version = { workspace = true } +edition = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } +repository = { workspace = true } +authors = { workspace = true } +license = { workspace = true } +description = "Type-safe, zero-allocation metric declarations built on the metrics crate" +categories = ["development-tools::profiling"] + +[dependencies] +metrics = { workspace = true } +metrics-util = { workspace = true } +inventory = { workspace = true } +const_format = { workspace = true } +atomic_float = { workspace = true } +dashmap = { workspace = true } +rustc-hash = { workspace = true } + +[dev-dependencies] +criterion = { workspace = true } +metrics-exporter-prometheus = { workspace = true, features = ["http-listener"] } +metrics-util = { workspace = true } +papaya = "0.2" +proptest = { workspace = true } + +[[bench]] +name = "baseline" +harness = false + +[[bench]] +name = "quickwit_metrics" +harness = false + +[[bench]] +name = "cache" +harness = false + +[[bin]] +name = "inventory" +path = "bin/inventory.rs" + +[[example]] +name = "http_service" +path = "examples/http_service.rs" diff --git a/quickwit/quickwit-metrics/benches/baseline.rs b/quickwit/quickwit-metrics/benches/baseline.rs new file mode 100644 index 00000000000..e486606ba46 --- /dev/null +++ b/quickwit/quickwit-metrics/benches/baseline.rs @@ -0,0 +1,314 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::{LazyLock, OnceLock}; + +use criterion::{Criterion, criterion_group, criterion_main}; +use metrics::{Counter, Gauge, Histogram, Label, counter, gauge, histogram}; + +// --------------------------------------------------------------------------- +// Recorders +// --------------------------------------------------------------------------- + +struct NoopRecorder; + +impl metrics::Recorder for NoopRecorder { + fn describe_counter( + &self, + _key: metrics::KeyName, + _unit: Option, + _desc: metrics::SharedString, + ) { + } + fn describe_gauge( + &self, + _key: metrics::KeyName, + _unit: Option, + _desc: metrics::SharedString, + ) { + } + fn describe_histogram( + &self, + _key: metrics::KeyName, + _unit: Option, + _desc: metrics::SharedString, + ) { + } + fn register_counter(&self, _key: &metrics::Key, _metadata: &metrics::Metadata<'_>) -> Counter { + Counter::noop() + } + fn register_gauge(&self, _key: &metrics::Key, _metadata: &metrics::Metadata<'_>) -> Gauge { + Gauge::noop() + } + fn register_histogram( + &self, + _key: &metrics::Key, + _metadata: &metrics::Metadata<'_>, + ) -> Histogram { + Histogram::noop() + } +} + +// --------------------------------------------------------------------------- +// Recorder setup — RECORDER env-var is mandatory. +// +// RECORDER=noop cargo bench --bench baseline # noop recorder +// RECORDER=prometheus cargo bench --bench baseline # prometheus +// --------------------------------------------------------------------------- + +static INSTALL_RECORDER: OnceLock<()> = OnceLock::new(); + +fn install_recorder() { + INSTALL_RECORDER.get_or_init(|| { + let recorder = std::env::var("RECORDER") + .expect("RECORDER env var is required (set to \"noop\" or \"prometheus\")"); + + match recorder.to_ascii_lowercase().as_str() { + "noop" => { + eprintln!("[bench] Using noop recorder"); + metrics::set_global_recorder(NoopRecorder) + .expect("failed to install noop recorder"); + } + "prometheus" => { + eprintln!("[bench] Using prometheus recorder"); + let _handle = metrics_exporter_prometheus::PrometheusBuilder::new() + .install_recorder() + .expect("failed to install prometheus recorder"); + } + other => { + panic!("unknown RECORDER value \"{other}\", expected \"noop\" or \"prometheus\"") + } + } + }); +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +fn make_labels(n: usize) -> Vec { fn get_actor_inboxes_count_gauge_guard() -> GaugeGuard { let mut gauge_guard = GaugeGuard::from_gauge(&INBOX_GAUGE); - gauge_guard.add(1); + gauge_guard.increment(1.0); gauge_guard } @@ -452,7 +452,7 @@ mod tests { use std::mem; use std::time::Duration; - use quickwit_common::metrics::counter; + use quickwit_metrics::counter; use super::*; use crate::tests::{Ping, PingReceiverActor}; diff --git a/quickwit/quickwit-actors/src/spawn_builder.rs b/quickwit/quickwit-actors/src/spawn_builder.rs index f8359c3f32c..922cfc4d71d 100644 --- a/quickwit/quickwit-actors/src/spawn_builder.rs +++ b/quickwit/quickwit-actors/src/spawn_builder.rs @@ -16,7 +16,7 @@ use std::fmt; use std::time::Duration; use anyhow::Context; -use quickwit_common::metrics::Counter; +use quickwit_metrics::Counter; use sync_wrapper::SyncWrapper; use tokio::sync::watch; use tracing::{debug, error, info}; diff --git a/quickwit/quickwit-cli/Cargo.toml b/quickwit/quickwit-cli/Cargo.toml index 5b68573e2a9..b9d04fefc10 100644 --- a/quickwit/quickwit-cli/Cargo.toml +++ b/quickwit/quickwit-cli/Cargo.toml @@ -64,6 +64,7 @@ metrics-util = { workspace = true } quickwit-actors = { workspace = true } quickwit-cluster = { workspace = true } quickwit-common = { workspace = true } +quickwit-metrics = { workspace = true } quickwit-config = { workspace = true } quickwit-dst = { workspace = true } quickwit-index-management = { workspace = true } diff --git a/quickwit/quickwit-cli/src/logger.rs b/quickwit/quickwit-cli/src/logger.rs index a913c85f0c1..558e168f575 100644 --- a/quickwit/quickwit-cli/src/logger.rs +++ b/quickwit/quickwit-cli/src/logger.rs @@ -447,13 +447,13 @@ fn init_metrics_provider( let recorder = router.build(); metrics::set_global_recorder(recorder) .map_err(|_| anyhow::anyhow!("failed to install global metrics recorder"))?; - quickwit_common::metrics::describe_metrics(); + quickwit_metrics::describe_metrics(); Ok(meter_provider) } fn install_prometheus_recorder() -> anyhow::Result { let mut prometheus_builder = PrometheusBuilder::new(); - for (name, buckets) in quickwit_common::metrics::histogram_buckets() { + for (name, buckets) in quickwit_metrics::histogram_buckets() { prometheus_builder = prometheus_builder .set_buckets_for_metric(Matcher::Full(name.to_string()), &buckets) .with_context(|| { @@ -485,7 +485,7 @@ fn install_otlp_metrics_recorder( let meter = metrics_provider.meter("quickwit"); let recorder = OpenTelemetryRecorder::new(meter); - for (name, buckets) in quickwit_common::metrics::histogram_buckets() { + for (name, buckets) in quickwit_metrics::histogram_buckets() { recorder.set_histogram_bounds(&metrics::KeyName::from(name), buckets); } Ok((recorder, metrics_provider)) diff --git a/quickwit/quickwit-cli/src/metrics.rs b/quickwit/quickwit-cli/src/metrics.rs index fe1323fa886..572ffbe4f92 100644 --- a/quickwit/quickwit-cli/src/metrics.rs +++ b/quickwit/quickwit-cli/src/metrics.rs @@ -14,7 +14,8 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{Histogram, exponential_buckets, histogram}; +use quickwit_common::metrics::exponential_buckets; +use quickwit_metrics::{Histogram, histogram}; pub struct CliMetrics { pub thread_unpark_duration_microseconds: Histogram, diff --git a/quickwit/quickwit-cluster/Cargo.toml b/quickwit/quickwit-cluster/Cargo.toml index ab01f587cb9..758b3af3cdb 100644 --- a/quickwit/quickwit-cluster/Cargo.toml +++ b/quickwit/quickwit-cluster/Cargo.toml @@ -29,6 +29,7 @@ tracing = { workspace = true } utoipa = { workspace = true } quickwit-common = { workspace = true } +quickwit-metrics = { workspace = true } quickwit-config = { workspace = true } quickwit-proto = { workspace = true } diff --git a/quickwit/quickwit-cluster/src/lib.rs b/quickwit/quickwit-cluster/src/lib.rs index af717fd3dfe..f52585dddc4 100644 --- a/quickwit/quickwit-cluster/src/lib.rs +++ b/quickwit/quickwit-cluster/src/lib.rs @@ -31,10 +31,10 @@ use chitchat::transport::{Socket, Transport, UdpSocket}; use chitchat::{ChitchatMessage, Serializable}; pub use chitchat::{FailureDetectorConfig, KeyChangeEvent, ListenerHandle}; pub use grpc_service::cluster_grpc_server; -use quickwit_common::metrics::Counter; use quickwit_common::tower::ClientGrpcConfig; use quickwit_config::service::QuickwitService; use quickwit_config::{GrpcConfig, NodeConfig, TlsConfig}; +use quickwit_metrics::Counter; use quickwit_proto::indexing::CpuCapacity; use quickwit_proto::ingest::ingester::IngesterStatus; use quickwit_proto::tonic::transport::{Certificate, ClientTlsConfig, Identity}; diff --git a/quickwit/quickwit-cluster/src/metrics.rs b/quickwit/quickwit-cluster/src/metrics.rs index f3a192eace8..2a21c4da0fd 100644 --- a/quickwit/quickwit-cluster/src/metrics.rs +++ b/quickwit/quickwit-cluster/src/metrics.rs @@ -18,7 +18,7 @@ use std::sync::{LazyLock, Weak}; use std::time::Duration; use chitchat::{Chitchat, ChitchatId}; -use quickwit_common::metrics::{Counter, Gauge, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, counter, gauge}; use tokio::sync::Mutex; use crate::member::NodeStateExt; diff --git a/quickwit/quickwit-common/Cargo.toml b/quickwit/quickwit-common/Cargo.toml index 5c3609f27b2..7c7dc3cfbef 100644 --- a/quickwit/quickwit-common/Cargo.toml +++ b/quickwit/quickwit-common/Cargo.toml @@ -14,11 +14,9 @@ license.workspace = true anyhow = { workspace = true } async-speed-limit = { workspace = true } async-trait = { workspace = true } -atomic_float = { workspace = true } backtrace = { workspace = true, optional = true } bytesize = { workspace = true } coarsetime = { workspace = true } -const_format = { workspace = true } dyn-clone = { workspace = true } env_logger = { workspace = true } fnv = { workspace = true } @@ -30,13 +28,12 @@ http = { workspace = true } hyper = { workspace = true } hyper-util = { workspace = true, optional = true } itertools = { workspace = true } -inventory = { workspace = true } metrics = { workspace = true } metrics-exporter-prometheus = { workspace = true } -metrics-util = { workspace = true } pin-project = { workspace = true } pnet = { workspace = true } prometheus = { workspace = true } +quickwit-metrics = { workspace = true } rand = { workspace = true } rayon = { workspace = true } regex = { workspace = true } @@ -68,6 +65,7 @@ jemalloc-profiled = [ [dev-dependencies] hyper-util = { workspace = true } +metrics-util = { workspace = true } proptest = { workspace = true } serde_json = { workspace = true } serial_test = { workspace = true } diff --git a/quickwit/quickwit-common/src/io.rs b/quickwit/quickwit-common/src/io.rs index 466c16d5fd1..195fd205a52 100644 --- a/quickwit/quickwit-common/src/io.rs +++ b/quickwit/quickwit-common/src/io.rs @@ -34,9 +34,9 @@ use async_speed_limit::clock::StandardClock; use async_speed_limit::limiter::Consume; use bytesize::ByteSize; use pin_project::pin_project; +use quickwit_metrics::{Counter, counter}; use tokio::io::AsyncWrite; -use crate::metrics::{Counter, counter}; use crate::{KillSwitch, Progress, ProtectedZoneGuard}; // Max 1MB at a time. @@ -341,6 +341,7 @@ mod tests { use std::time::Duration; use bytesize::ByteSize; + use quickwit_metrics::counter; use tokio::io::{AsyncWriteExt, sink}; use tokio::time::Instant; @@ -348,7 +349,14 @@ mod tests { #[tokio::test] async fn test_controlled_writer_limited_async() { - let io_controls = IoControls::default().set_throughput_limit(ByteSize::mb(2)); + let io_controls = IoControls::default() + .set_bytes_counter(counter!( + name: "test_controlled_writer_limited_async_num_bytes", + description: "Test bytes counter.", + subsystem: "", + observable: true, + )) + .set_throughput_limit(ByteSize::mb(2)); let mut controlled_write = io_controls.clone().wrap_write(sink()); let buf = vec![44u8; 1_000]; let start = Instant::now(); @@ -365,7 +373,12 @@ mod tests { #[tokio::test] async fn test_controlled_writer_no_limit_async() { - let io_controls = IoControls::default(); + let io_controls = IoControls::default().set_bytes_counter(counter!( + name: "test_controlled_writer_no_limit_async_num_bytes", + description: "Test bytes counter.", + subsystem: "", + observable: true, + )); let mut controlled_write = io_controls.clone().wrap_write(sink()); let buf = vec![44u8; 1_000]; let start = Instant::now(); @@ -381,7 +394,14 @@ mod tests { #[test] fn test_controlled_writer_limited_sync() { - let io_controls = IoControls::default().set_throughput_limit(ByteSize::mb(2)); + let io_controls = IoControls::default() + .set_bytes_counter(counter!( + name: "test_controlled_writer_limited_sync_num_bytes", + description: "Test bytes counter.", + subsystem: "", + observable: true, + )) + .set_throughput_limit(ByteSize::mb(2)); let mut controlled_write = io_controls.clone().wrap_write(std::io::sink()); let buf = vec![44u8; 1_000]; let start = Instant::now(); @@ -398,7 +418,12 @@ mod tests { #[test] fn test_controlled_writer_no_limit_sync() { - let io_controls = IoControls::default(); + let io_controls = IoControls::default().set_bytes_counter(counter!( + name: "test_controlled_writer_no_limit_sync_num_bytes", + description: "Test bytes counter.", + subsystem: "", + observable: true, + )); let mut controlled_write = io_controls.clone().wrap_write(std::io::sink()); let buf = vec![44u8; 1_000]; let start = Instant::now(); diff --git a/quickwit/quickwit-common/src/metrics/quickwit.rs b/quickwit/quickwit-common/src/metrics.rs similarity index 61% rename from quickwit/quickwit-common/src/metrics/quickwit.rs rename to quickwit/quickwit-common/src/metrics.rs index df522935d6d..b32d21bf330 100644 --- a/quickwit/quickwit-common/src/metrics/quickwit.rs +++ b/quickwit/quickwit-common/src/metrics.rs @@ -14,9 +14,51 @@ use std::collections::BTreeMap; use std::sync::{LazyLock, OnceLock}; +#[cfg(not(test))] +use std::time::Duration; -use super::{Gauge, SYSTEM}; -use crate::gauge; +use metrics_exporter_prometheus::PrometheusHandle; +pub use prometheus::{exponential_buckets, linear_buckets}; +use quickwit_metrics::{Gauge, gauge}; + +const SYSTEM: &str = "quickwit"; + +static PROMETHEUS_HANDLE: OnceLock = OnceLock::new(); + +pub fn set_prometheus_handle(handle: PrometheusHandle) -> Result<(), String> { + #[cfg(not(test))] + let upkeep_handle = handle.clone(); + PROMETHEUS_HANDLE + .set(handle) + .map_err(|_| "Prometheus metrics renderer is already installed".to_string())?; + #[cfg(not(test))] + spawn_prometheus_upkeep(upkeep_handle)?; + Ok(()) +} + +pub fn metrics_text_payload() -> Result { + let handle = PROMETHEUS_HANDLE + .get() + .ok_or_else(|| "Prometheus metrics rendering is not installed yet".to_string())?; + Ok(handle.render()) +} + +#[cfg(not(test))] +fn spawn_prometheus_upkeep(handle: PrometheusHandle) -> Result<(), String> { + // Quickwit serves the existing `/metrics` route itself, so we build only the + // Prometheus recorder instead of using the exporter's HTTP listener. That lower-level + // API does not spawn the upkeep task that periodically drains histogram buffers. + std::thread::Builder::new() + .name("metrics-exporter-prometheus-upkeep".to_string()) + .spawn(move || { + loop { + std::thread::sleep(Duration::from_secs(5)); + handle.run_upkeep(); + } + }) + .map(|_| ()) + .map_err(|error| format!("failed to spawn Prometheus metrics upkeep thread: {error}")) +} pub fn register_info(name: &'static str, help: &'static str, kvs: BTreeMap<&'static str, String>) { let key_name = metric_key_name("", name); @@ -182,3 +224,63 @@ fn metric_key_name(subsystem: &str, name: &str) -> String { format!("{SYSTEM}_{subsystem}_{name}") } } + +#[cfg(test)] +mod tests { + use metrics::with_local_recorder; + use metrics_exporter_prometheus::PrometheusBuilder; + use metrics_util::debugging::{DebugValue, DebuggingRecorder}; + + use super::*; + + #[test] + fn metrics_text_payload_renders_prometheus_handle() { + let recorder = PrometheusBuilder::new().build_recorder(); + set_prometheus_handle(recorder.handle()).expect("Prometheus handle should be set once"); + + with_local_recorder(&recorder, || { + register_info( + "prometheus_payload_info", + "prometheus payload info", + BTreeMap::new(), + ); + }); + + let payload = metrics_text_payload().expect("Prometheus payload should render"); + assert!(payload.contains("# HELP quickwit_prometheus_payload_info")); + assert!(payload.contains("quickwit_prometheus_payload_info 1")); + } + + #[test] + fn register_info_records_labeled_counter() { + let recorder = DebuggingRecorder::new(); + let snapshotter = recorder.snapshotter(); + with_local_recorder(&recorder, || { + let labels = BTreeMap::from([("version", "test".to_string())]); + register_info("build_info_test", "build info test", labels); + }); + + let snapshot = snapshotter.snapshot().into_vec(); + let (_, _, description, value) = snapshot + .into_iter() + .find(|(composite_key, _, _, _)| { + let (_, key) = composite_key.clone().into_parts(); + key.name() == "quickwit_build_info_test" + && key + .labels() + .any(|label| label.key() == "version" && label.value() == "test") + }) + .expect("build info metric should be recorded"); + assert_eq!(description.as_deref(), Some("build info test")); + assert_eq!(value, DebugValue::Counter(1)); + } + + #[test] + fn bucket_helpers_are_reexported() { + assert_eq!(linear_buckets(0.0, 1.0, 3).unwrap(), vec![0.0, 1.0, 2.0]); + assert_eq!( + exponential_buckets(1.0, 2.0, 3).unwrap(), + vec![1.0, 2.0, 4.0] + ); + } +} diff --git a/quickwit/quickwit-common/src/metrics/counter.rs b/quickwit/quickwit-common/src/metrics/counter.rs deleted file mode 100644 index 3c93f91e431..00000000000 --- a/quickwit/quickwit-common/src/metrics/counter.rs +++ /dev/null @@ -1,267 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; -use std::sync::atomic::{AtomicU64, Ordering}; - -use metrics::CounterFn; - -use super::MetricInfo; - -#[doc(hidden)] -pub enum CounterShadow { - Noop, - Ref(&'static AtomicU64), - Arc(Arc), -} - -impl Clone for CounterShadow { - fn clone(&self) -> Self { - match self { - Self::Noop => Self::Noop, - Self::Ref(value) => Self::Ref(value), - Self::Arc(value) => Self::Arc(Arc::clone(value)), - } - } -} - -impl CounterShadow { - fn increment(&self, value: u64) { - match self { - Self::Noop => {} - Self::Ref(atomic) => { - atomic.fetch_add(value, Ordering::Relaxed); - } - Self::Arc(atomic) => { - atomic.fetch_add(value, Ordering::Relaxed); - } - } - } - - fn absolute(&self, value: u64) { - match self { - Self::Noop => {} - Self::Ref(atomic) => atomic.store(value, Ordering::Relaxed), - Self::Arc(atomic) => atomic.store(value, Ordering::Relaxed), - } - } - - fn get(&self) -> u64 { - match self { - Self::Noop => u64::MAX, - Self::Ref(atomic) => atomic.load(Ordering::Relaxed), - Self::Arc(atomic) => atomic.load(Ordering::Relaxed), - } - } -} - -#[derive(Clone)] -pub struct Counter { - pub(crate) info: &'static MetricInfo, - pub(crate) key: metrics::Key, - pub(crate) inner: metrics::Counter, - pub(crate) shadow: CounterShadow, -} - -impl std::fmt::Debug for Counter { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Counter").field("key", &self.key).finish() - } -} - -impl Counter { - #[doc(hidden)] - pub fn __new( - info: &'static MetricInfo, - key: metrics::Key, - inner: metrics::Counter, - shadow: CounterShadow, - ) -> Self { - Self { - info, - key, - inner, - shadow, - } - } - - #[doc(hidden)] - pub const fn __info(&self) -> &'static MetricInfo { - self.info - } - - pub const fn key(&self) -> &metrics::Key { - &self.key - } - - pub fn increment(&self, value: u64) { - self.shadow.increment(value); - self.inner.increment(value); - } - - pub fn absolute(&self, value: u64) { - self.shadow.absolute(value); - self.inner.absolute(value); - } - - pub fn get(&self) -> u64 { - self.shadow.get() - } -} - -impl CounterFn for Counter { - fn increment(&self, value: u64) { - Self::increment(self, value); - } - - fn absolute(&self, value: u64) { - Self::absolute(self, value); - } -} - -#[macro_export] -macro_rules! counter { - ( - name: $name:literal, - description: $description:literal, - subsystem: "" - $(, $label:literal => $value:literal)* $(,)? - ) => {{ - $crate::key_info_metadata!( - kind: $crate::metrics::MetricKind::Counter, - observable: false, - name: $name, - description: $description, - subsystem: "" - $(, $label => $value)* - ); - let inner = $crate::metrics::__metrics::with_recorder(|recorder| { - recorder.register_counter(&KEY, &METADATA) - }); - $crate::metrics::Counter::__new( - &INFO, - KEY.clone(), - inner, - $crate::metrics::CounterShadow::Noop, - ) - }}; - - ( - name: $name:literal, - description: $description:literal, - subsystem: "", - observable: true - $(, $label:literal => $value:literal)* $(,)? - ) => {{ - $crate::key_info_metadata!( - kind: $crate::metrics::MetricKind::Counter, - observable: true, - name: $name, - description: $description, - subsystem: "" - $(, $label => $value)* - ); - static SHADOW: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0); - let inner = $crate::metrics::__metrics::with_recorder(|recorder| { - recorder.register_counter(&KEY, &METADATA) - }); - $crate::metrics::Counter::__new( - &INFO, - KEY.clone(), - inner, - $crate::metrics::CounterShadow::Ref(&SHADOW), - ) - }}; - - ( - name: $name:literal, - description: $description:literal, - subsystem: $subsystem:literal - $(, $label:literal => $value:literal)* $(,)? - ) => {{ - $crate::key_info_metadata!( - kind: $crate::metrics::MetricKind::Counter, - observable: false, - name: $name, - description: $description, - subsystem: $subsystem - $(, $label => $value)* - ); - let inner = $crate::metrics::__metrics::with_recorder(|recorder| { - recorder.register_counter(&KEY, &METADATA) - }); - $crate::metrics::Counter::__new( - &INFO, - KEY.clone(), - inner, - $crate::metrics::CounterShadow::Noop, - ) - }}; - - ( - name: $name:literal, - description: $description:literal, - subsystem: $subsystem:literal, - observable: true - $(, $label:literal => $value:literal)* $(,)? - ) => {{ - $crate::key_info_metadata!( - kind: $crate::metrics::MetricKind::Counter, - observable: true, - name: $name, - description: $description, - subsystem: $subsystem - $(, $label => $value)* - ); - static SHADOW: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0); - let inner = $crate::metrics::__metrics::with_recorder(|recorder| { - recorder.register_counter(&KEY, &METADATA) - }); - $crate::metrics::Counter::__new( - &INFO, - KEY.clone(), - inner, - $crate::metrics::CounterShadow::Ref(&SHADOW), - ) - }}; - - ( - parent: $parent:expr, - $($label:literal => $value:expr),+ $(,)? - ) => {{ - let parent_key = $parent.key(); - let mut labels = - Vec::with_capacity(parent_key.labels().len() + $crate::count!($($label)*)); - labels.extend(parent_key.labels().cloned()); - $(labels.push($crate::metrics::__metrics::Label::new($label, $value));)+ - - let info = $parent.__info(); - let key = $crate::metrics::__metrics::Key::from_parts(info.key_name, labels); - let metadata = $crate::metadata!(info.subsystem); - - let inner = $crate::metrics::__metrics::with_recorder(|recorder| { - recorder.register_counter(&key, &metadata) - }); - - let shadow = if info.observable { - $crate::metrics::CounterShadow::Arc(std::sync::Arc::new( - std::sync::atomic::AtomicU64::new(0), - )) - } else { - $crate::metrics::CounterShadow::Noop - }; - - $crate::metrics::Counter::__new(info, key, inner, shadow) - }}; -} diff --git a/quickwit/quickwit-common/src/metrics/gauge.rs b/quickwit/quickwit-common/src/metrics/gauge.rs deleted file mode 100644 index 635413eff6d..00000000000 --- a/quickwit/quickwit-common/src/metrics/gauge.rs +++ /dev/null @@ -1,344 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; -use std::sync::atomic::Ordering; - -use atomic_float::AtomicF64; -use metrics::GaugeFn; - -use super::MetricInfo; - -#[doc(hidden)] -pub enum GaugeShadow { - Noop, - Ref(&'static AtomicF64), - Arc(Arc), -} - -impl Clone for GaugeShadow { - fn clone(&self) -> Self { - match self { - Self::Noop => Self::Noop, - Self::Ref(value) => Self::Ref(value), - Self::Arc(value) => Self::Arc(Arc::clone(value)), - } - } -} - -impl GaugeShadow { - fn increment(&self, value: f64) { - match self { - Self::Noop => {} - Self::Ref(atomic) => { - atomic.fetch_add(value, Ordering::Relaxed); - } - Self::Arc(atomic) => { - atomic.fetch_add(value, Ordering::Relaxed); - } - } - } - - fn decrement(&self, value: f64) { - match self { - Self::Noop => {} - Self::Ref(atomic) => { - atomic.fetch_sub(value, Ordering::Relaxed); - } - Self::Arc(atomic) => { - atomic.fetch_sub(value, Ordering::Relaxed); - } - } - } - - fn set(&self, value: f64) { - match self { - Self::Noop => {} - Self::Ref(atomic) => atomic.store(value, Ordering::Relaxed), - Self::Arc(atomic) => atomic.store(value, Ordering::Relaxed), - } - } - - fn get(&self) -> f64 { - match self { - Self::Noop => f64::NAN, - Self::Ref(atomic) => atomic.load(Ordering::Relaxed), - Self::Arc(atomic) => atomic.load(Ordering::Relaxed), - } - } -} - -#[derive(Clone)] -pub struct Gauge { - pub(crate) info: &'static MetricInfo, - pub(crate) key: metrics::Key, - pub(crate) inner: metrics::Gauge, - pub(crate) shadow: GaugeShadow, -} - -impl std::fmt::Debug for Gauge { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Gauge").field("key", &self.key).finish() - } -} - -impl Gauge { - #[doc(hidden)] - pub fn __new( - info: &'static MetricInfo, - key: metrics::Key, - inner: metrics::Gauge, - shadow: GaugeShadow, - ) -> Self { - Self { - info, - key, - inner, - shadow, - } - } - - #[doc(hidden)] - pub const fn __info(&self) -> &'static MetricInfo { - self.info - } - - pub const fn key(&self) -> &metrics::Key { - &self.key - } - - pub fn increment(&self, value: f64) { - self.shadow.increment(value); - self.inner.increment(value); - } - - pub fn decrement(&self, value: f64) { - self.shadow.decrement(value); - self.inner.decrement(value); - } - - pub fn set(&self, value: f64) { - self.shadow.set(value); - self.inner.set(value); - } - - pub fn get(&self) -> f64 { - self.shadow.get() - } -} - -impl GaugeFn for Gauge { - fn increment(&self, value: f64) { - Self::increment(self, value); - } - - fn decrement(&self, value: f64) { - Self::decrement(self, value); - } - - fn set(&self, value: f64) { - Self::set(self, value); - } -} - -#[derive(Debug)] -pub struct GaugeGuard { - gauge: Gauge, - delta: f64, -} - -impl GaugeGuard { - pub fn from_gauge(gauge: &Gauge) -> Self { - Self { - gauge: gauge.clone(), - delta: 0.0, - } - } - - pub fn increment(gauge: &Gauge, value: f64) -> Self { - let mut guard = Self::from_gauge(gauge); - guard.add_f64(value); - guard - } - - pub fn add(&mut self, delta: i64) { - self.add_f64(delta as f64); - } - - pub fn sub(&mut self, delta: i64) { - self.sub_f64(delta as f64); - } - - pub fn add_f64(&mut self, delta: f64) { - self.gauge.increment(delta); - self.delta += delta; - } - - pub fn sub_f64(&mut self, delta: f64) { - self.gauge.decrement(delta); - self.delta -= delta; - } - - pub fn get(&self) -> i64 { - self.delta as i64 - } - - pub fn value(&self) -> f64 { - self.delta - } -} - -impl Drop for GaugeGuard { - fn drop(&mut self) { - self.gauge.decrement(self.delta); - } -} - -#[macro_export] -macro_rules! gauge { - ( - name: $name:literal, - description: $description:literal, - subsystem: "" - $(, $label:literal => $value:literal)* $(,)? - ) => {{ - $crate::key_info_metadata!( - kind: $crate::metrics::MetricKind::Gauge, - observable: false, - name: $name, - description: $description, - subsystem: "" - $(, $label => $value)* - ); - let inner = $crate::metrics::__metrics::with_recorder(|recorder| { - recorder.register_gauge(&KEY, &METADATA) - }); - $crate::metrics::Gauge::__new( - &INFO, - KEY.clone(), - inner, - $crate::metrics::GaugeShadow::Noop, - ) - }}; - - ( - name: $name:literal, - description: $description:literal, - subsystem: "", - observable: true - $(, $label:literal => $value:literal)* $(,)? - ) => {{ - $crate::key_info_metadata!( - kind: $crate::metrics::MetricKind::Gauge, - observable: true, - name: $name, - description: $description, - subsystem: "" - $(, $label => $value)* - ); - static SHADOW: $crate::metrics::__atomic_float::AtomicF64 = - $crate::metrics::__atomic_float::AtomicF64::new(0.0); - let inner = $crate::metrics::__metrics::with_recorder(|recorder| { - recorder.register_gauge(&KEY, &METADATA) - }); - $crate::metrics::Gauge::__new( - &INFO, - KEY.clone(), - inner, - $crate::metrics::GaugeShadow::Ref(&SHADOW), - ) - }}; - - ( - name: $name:literal, - description: $description:literal, - subsystem: $subsystem:literal - $(, $label:literal => $value:literal)* $(,)? - ) => {{ - $crate::key_info_metadata!( - kind: $crate::metrics::MetricKind::Gauge, - observable: false, - name: $name, - description: $description, - subsystem: $subsystem - $(, $label => $value)* - ); - let inner = $crate::metrics::__metrics::with_recorder(|recorder| { - recorder.register_gauge(&KEY, &METADATA) - }); - $crate::metrics::Gauge::__new( - &INFO, - KEY.clone(), - inner, - $crate::metrics::GaugeShadow::Noop, - ) - }}; - - ( - name: $name:literal, - description: $description:literal, - subsystem: $subsystem:literal, - observable: true - $(, $label:literal => $value:literal)* $(,)? - ) => {{ - $crate::key_info_metadata!( - kind: $crate::metrics::MetricKind::Gauge, - observable: true, - name: $name, - description: $description, - subsystem: $subsystem - $(, $label => $value)* - ); - static SHADOW: $crate::metrics::__atomic_float::AtomicF64 = - $crate::metrics::__atomic_float::AtomicF64::new(0.0); - let inner = $crate::metrics::__metrics::with_recorder(|recorder| { - recorder.register_gauge(&KEY, &METADATA) - }); - $crate::metrics::Gauge::__new( - &INFO, - KEY.clone(), - inner, - $crate::metrics::GaugeShadow::Ref(&SHADOW), - ) - }}; - - ( - parent: $parent:expr, - $($label:literal => $value:expr),+ $(,)? - ) => {{ - let parent_key = $parent.key(); - let mut labels = - Vec::with_capacity(parent_key.labels().len() + $crate::count!($($label)*)); - labels.extend(parent_key.labels().cloned()); - $(labels.push($crate::metrics::__metrics::Label::new($label, $value));)+ - - let info = $parent.__info(); - let key = $crate::metrics::__metrics::Key::from_parts(info.key_name, labels); - let metadata = $crate::metadata!(info.subsystem); - - let inner = $crate::metrics::__metrics::with_recorder(|recorder| { - recorder.register_gauge(&key, &metadata) - }); - - let shadow = if info.observable { - $crate::metrics::GaugeShadow::Arc(std::sync::Arc::new( - $crate::metrics::__atomic_float::AtomicF64::new(0.0), - )) - } else { - $crate::metrics::GaugeShadow::Noop - }; - - $crate::metrics::Gauge::__new(info, key, inner, shadow) - }}; -} diff --git a/quickwit/quickwit-common/src/metrics/histogram.rs b/quickwit/quickwit-common/src/metrics/histogram.rs deleted file mode 100644 index 2b37acf1c8f..00000000000 --- a/quickwit/quickwit-common/src/metrics/histogram.rs +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::time::Instant; - -use metrics::HistogramFn; - -use super::MetricInfo; - -#[doc(hidden)] -#[derive(Clone, Copy)] -pub struct HistogramConfig { - pub info: &'static MetricInfo, - pub buckets_fn: fn() -> Vec, -} - -inventory::collect!(HistogramConfig); - -#[derive(Clone)] -pub struct Histogram { - pub(crate) info: &'static HistogramConfig, - pub(crate) key: metrics::Key, - pub(crate) inner: metrics::Histogram, -} - -impl std::fmt::Debug for Histogram { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Histogram").field("key", &self.key).finish() - } -} - -impl Histogram { - #[doc(hidden)] - pub fn __new( - info: &'static HistogramConfig, - key: metrics::Key, - inner: metrics::Histogram, - ) -> Self { - Self { info, key, inner } - } - - #[doc(hidden)] - pub const fn __info(&self) -> &'static HistogramConfig { - self.info - } - - pub const fn key(&self) -> &metrics::Key { - &self.key - } - - pub fn record(&self, value: f64) { - self.inner.record(value); - } - - pub fn start_timer(&self) -> HistogramTimer { - HistogramTimer::__new(self.clone()) - } -} - -impl HistogramFn for Histogram { - fn record(&self, value: f64) { - Self::record(self, value); - } -} - -#[derive(Debug)] -pub struct HistogramTimer { - histogram: Histogram, - start: Instant, - observed: bool, -} - -impl HistogramTimer { - fn __new(histogram: Histogram) -> Self { - Self { - histogram, - start: Instant::now(), - observed: false, - } - } - - pub fn observe_duration(self) { - let mut timer = self; - timer.observed = true; - timer.histogram.record(timer.start.elapsed().as_secs_f64()); - } -} - -impl Drop for HistogramTimer { - fn drop(&mut self) { - if !self.observed { - self.histogram.record(self.start.elapsed().as_secs_f64()); - } - } -} - -#[macro_export] -macro_rules! histogram { - ( - name: $name:literal, - description: $description:literal, - subsystem: "", - buckets: $buckets:expr - $(, $label:literal => $value:literal)* $(,)? - ) => {{ - $crate::key_info_metadata!( - kind: $crate::metrics::MetricKind::Histogram, - observable: false, - name: $name, - description: $description, - subsystem: "" - $(, $label => $value)* - ); - - static HISTOGRAM_CONFIG: $crate::metrics::HistogramConfig = - $crate::metrics::HistogramConfig { - info: &INFO, - buckets_fn: || $buckets, - }; - $crate::metrics::__inventory::submit!(HISTOGRAM_CONFIG); - - let inner = $crate::metrics::__metrics::with_recorder(|recorder| { - recorder.register_histogram(&KEY, &METADATA) - }); - $crate::metrics::Histogram::__new(&HISTOGRAM_CONFIG, KEY.clone(), inner) - }}; - - ( - name: $name:literal, - description: $description:literal, - subsystem: $subsystem:literal, - buckets: $buckets:expr - $(, $label:literal => $value:literal)* $(,)? - ) => {{ - $crate::key_info_metadata!( - kind: $crate::metrics::MetricKind::Histogram, - observable: false, - name: $name, - description: $description, - subsystem: $subsystem - $(, $label => $value)* - ); - - static HISTOGRAM_CONFIG: $crate::metrics::HistogramConfig = - $crate::metrics::HistogramConfig { - info: &INFO, - buckets_fn: || $buckets, - }; - $crate::metrics::__inventory::submit!(HISTOGRAM_CONFIG); - - let inner = $crate::metrics::__metrics::with_recorder(|recorder| { - recorder.register_histogram(&KEY, &METADATA) - }); - $crate::metrics::Histogram::__new(&HISTOGRAM_CONFIG, KEY.clone(), inner) - }}; - - ( - parent: $parent:expr, - $($label:literal => $value:expr),+ $(,)? - ) => {{ - let parent_key = $parent.key(); - let mut labels = - Vec::with_capacity(parent_key.labels().len() + $crate::count!($($label)*)); - labels.extend(parent_key.labels().cloned()); - $(labels.push($crate::metrics::__metrics::Label::new($label, $value));)+ - - let info = $parent.__info(); - let key = $crate::metrics::__metrics::Key::from_parts(info.info.key_name, labels); - let metadata = $crate::metadata!(info.info.subsystem); - - let inner = $crate::metrics::__metrics::with_recorder(|recorder| { - recorder.register_histogram(&key, &metadata) - }); - - $crate::metrics::Histogram::__new(info, key, inner) - }}; -} diff --git a/quickwit/quickwit-common/src/metrics/mod.rs b/quickwit/quickwit-common/src/metrics/mod.rs deleted file mode 100644 index 195b6bd06ac..00000000000 --- a/quickwit/quickwit-common/src/metrics/mod.rs +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -pub use ::prometheus::{exponential_buckets, linear_buckets}; -#[doc(hidden)] -pub use atomic_float as __atomic_float; -#[doc(hidden)] -pub use const_format::concatcp as __concatcp; -#[doc(hidden)] -pub use inventory as __inventory; -#[doc(hidden)] -pub use metrics as __metrics; -pub use metrics::{CounterFn, GaugeFn, HistogramFn}; -pub use metrics_util::MetricKind; - -mod counter; -mod gauge; -mod histogram; -mod prometheus; -mod quickwit; - -pub use counter::Counter; -#[doc(hidden)] -pub use counter::CounterShadow; -#[doc(hidden)] -pub use gauge::GaugeShadow; -pub use gauge::{Gauge, GaugeGuard}; -pub use histogram::{Histogram, HistogramConfig, HistogramTimer}; -pub use prometheus::{metrics_text_payload, set_prometheus_handle}; -pub use quickwit::{InFlightDataGauges, MEMORY_METRICS, MemoryMetrics, index_label, register_info}; - -#[cfg(test)] -mod tests; - -/// System-level prefix prepended to every metric name. -pub const SYSTEM: &str = "quickwit"; - -#[doc(hidden)] -#[derive(Clone, Copy)] -pub struct MetricInfo { - pub name: &'static str, - pub subsystem: &'static str, - pub key_name: &'static str, - pub description: &'static str, - pub kind: MetricKind, - pub observable: bool, -} - -inventory::collect!(MetricInfo); - -pub fn describe_metrics() { - metrics::with_recorder(|recorder| { - for info in inventory::iter:: { - let key_name = metrics::KeyName::from_const_str(info.key_name); - let description: metrics::SharedString = info.description.into(); - match info.kind { - MetricKind::Counter => recorder.describe_counter(key_name, None, description), - MetricKind::Gauge => recorder.describe_gauge(key_name, None, description), - MetricKind::Histogram => recorder.describe_histogram(key_name, None, description), - } - } - }); -} - -pub fn metrics_info() -> impl Iterator { - inventory::iter::.into_iter() -} - -pub fn histogram_buckets() -> impl Iterator)> { - inventory::iter:: - .into_iter() - .map(|config| (config.info.key_name, (config.buckets_fn)())) -} - -#[doc(hidden)] -#[macro_export] -macro_rules! key_name { - ("", $name:literal) => { - $crate::metrics::__concatcp!($crate::metrics::SYSTEM, "_", $name) - }; - ($subsystem:literal, $name:literal) => { - $crate::metrics::__concatcp!($crate::metrics::SYSTEM, "_", $subsystem, "_", $name) - }; -} - -#[doc(hidden)] -#[macro_export] -macro_rules! count { - () => { - 0usize - }; - ($head:tt $($tail:tt)*) => { - 1usize + $crate::count!($($tail)*) - }; -} - -#[doc(hidden)] -#[macro_export] -macro_rules! metadata { - ($subsystem:expr) => { - $crate::metrics::__metrics::Metadata::new( - $subsystem, - $crate::metrics::__metrics::Level::INFO, - Some(module_path!()), - ) - }; -} - -#[doc(hidden)] -#[macro_export] -macro_rules! key_info_metadata { - ( - kind: $kind:expr, - observable: $observable:expr, - name: $name:literal, - description: $description:literal, - subsystem: "" - $(, $label:literal => $value:literal)* $(,)? - ) => { - const KEY_NAME: &str = $crate::metrics::__concatcp!($crate::metrics::SYSTEM, "_", $name); - static INFO: $crate::metrics::MetricInfo = $crate::metrics::MetricInfo { - name: $name, - subsystem: "", - key_name: KEY_NAME, - description: $description, - kind: $kind, - observable: $observable, - }; - $crate::metrics::__inventory::submit!(INFO); - - static LABELS: [$crate::metrics::__metrics::Label; $crate::count!($($label)*)] = [ - $($crate::metrics::__metrics::Label::from_static_parts($label, $value)),* - ]; - static KEY: $crate::metrics::__metrics::Key = - $crate::metrics::__metrics::Key::from_static_parts(KEY_NAME, &LABELS); - static METADATA: $crate::metrics::__metrics::Metadata<'static> = - $crate::metadata!(""); - }; - - ( - kind: $kind:expr, - observable: $observable:expr, - name: $name:literal, - description: $description:literal, - subsystem: $subsystem:literal - $(, $label:literal => $value:literal)* $(,)? - ) => { - const KEY_NAME: &str = $crate::key_name!($subsystem, $name); - static INFO: $crate::metrics::MetricInfo = $crate::metrics::MetricInfo { - name: $name, - subsystem: $subsystem, - key_name: KEY_NAME, - description: $description, - kind: $kind, - observable: $observable, - }; - $crate::metrics::__inventory::submit!(INFO); - - static LABELS: [$crate::metrics::__metrics::Label; $crate::count!($($label)*)] = [ - $($crate::metrics::__metrics::Label::from_static_parts($label, $value)),* - ]; - static KEY: $crate::metrics::__metrics::Key = - $crate::metrics::__metrics::Key::from_static_parts(KEY_NAME, &LABELS); - static METADATA: $crate::metrics::__metrics::Metadata<'static> = - $crate::metadata!($subsystem); - }; -} - -pub use crate::{counter, gauge, histogram}; diff --git a/quickwit/quickwit-common/src/metrics/prometheus.rs b/quickwit/quickwit-common/src/metrics/prometheus.rs deleted file mode 100644 index 5b45bd29138..00000000000 --- a/quickwit/quickwit-common/src/metrics/prometheus.rs +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::OnceLock; -#[cfg(not(test))] -use std::time::Duration; - -use metrics_exporter_prometheus::PrometheusHandle; - -static PROMETHEUS_HANDLE: OnceLock = OnceLock::new(); - -pub fn set_prometheus_handle(handle: PrometheusHandle) -> Result<(), String> { - #[cfg(not(test))] - let upkeep_handle = handle.clone(); - PROMETHEUS_HANDLE - .set(handle) - .map_err(|_| "Prometheus metrics renderer is already installed".to_string())?; - #[cfg(not(test))] - spawn_prometheus_upkeep(upkeep_handle)?; - Ok(()) -} - -pub fn metrics_text_payload() -> Result { - let handle = PROMETHEUS_HANDLE - .get() - .ok_or_else(|| "Prometheus metrics rendering is not installed yet".to_string())?; - Ok(handle.render()) -} - -#[cfg(not(test))] -fn spawn_prometheus_upkeep(handle: PrometheusHandle) -> Result<(), String> { - // Quickwit serves the existing `/metrics` route itself, so we build only the - // Prometheus recorder instead of using the exporter's HTTP listener. That lower-level - // API does not spawn the upkeep task that periodically drains histogram buffers. - std::thread::Builder::new() - .name("metrics-exporter-prometheus-upkeep".to_string()) - .spawn(move || { - loop { - std::thread::sleep(Duration::from_secs(5)); - handle.run_upkeep(); - } - }) - .map(|_| ()) - .map_err(|error| format!("failed to spawn Prometheus metrics upkeep thread: {error}")) -} diff --git a/quickwit/quickwit-common/src/metrics/tests.rs b/quickwit/quickwit-common/src/metrics/tests.rs deleted file mode 100644 index 76d590f9f60..00000000000 --- a/quickwit/quickwit-common/src/metrics/tests.rs +++ /dev/null @@ -1,389 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use metrics::with_local_recorder; -use metrics_exporter_prometheus::PrometheusBuilder; -use metrics_util::debugging::{DebugValue, DebuggingRecorder}; - -use super::*; - -type MetricEntry = (String, Vec<(String, String)>, DebugValue); - -fn with_recorder(f: impl FnOnce()) -> Vec { - let recorder = DebuggingRecorder::new(); - let snapshotter = recorder.snapshotter(); - with_local_recorder(&recorder, f); - snapshotter - .snapshot() - .into_vec() - .into_iter() - .map(|(composite_key, _unit, _description, value)| { - let (_, key) = composite_key.into_parts(); - let labels = key - .labels() - .map(|label| (label.key().to_string(), label.value().to_string())) - .collect(); - (key.name().to_string(), labels, value) - }) - .collect() -} - -#[test] -fn counter_increment_and_absolute_values() { - let entries = with_recorder(|| { - let counter = counter!( - name: "test_counter_increment", - description: "test counter", - subsystem: "metrics_tests", - ); - counter.increment(5); - - let absolute = counter!( - name: "test_counter_absolute", - description: "absolute counter", - subsystem: "metrics_tests", - ); - absolute.absolute(42); - }); - - assert!(entries.contains(&( - "quickwit_metrics_tests_test_counter_increment".to_string(), - Vec::new(), - DebugValue::Counter(5), - ))); - assert!(entries.contains(&( - "quickwit_metrics_tests_test_counter_absolute".to_string(), - Vec::new(), - DebugValue::Counter(42), - ))); -} - -#[test] -fn gauge_set_increment_and_decrement() { - let entries = with_recorder(|| { - let gauge = gauge!( - name: "test_gauge", - description: "test gauge", - subsystem: "metrics_tests", - ); - gauge.set(10.0); - gauge.increment(5.0); - gauge.decrement(3.0); - }); - - assert_eq!( - entries[0], - ( - "quickwit_metrics_tests_test_gauge".to_string(), - Vec::new(), - DebugValue::Gauge(12.0.into()), - ) - ); -} - -#[test] -fn histogram_records_value() { - let entries = with_recorder(|| { - let histogram = histogram!( - name: "test_histogram", - description: "test histogram", - subsystem: "metrics_tests", - buckets: vec![1.0, 5.0, 10.0], - ); - histogram.record(3.5); - }); - - let (name, labels, value) = &entries[0]; - assert_eq!(name, "quickwit_metrics_tests_test_histogram"); - assert!(labels.is_empty()); - match value { - DebugValue::Histogram(values) => { - assert_eq!(values.len(), 1); - assert_eq!(values[0].into_inner(), 3.5); - } - other => panic!("expected histogram, got {other:?}"), - } -} - -#[test] -fn histogram_timer_records_value_on_drop() { - let entries = with_recorder(|| { - let histogram = histogram!( - name: "test_histogram_timer_drop", - description: "test histogram timer drop", - subsystem: "metrics_tests", - buckets: vec![1.0, 5.0, 10.0], - ); - let _timer = histogram.start_timer(); - }); - - let (name, labels, value) = &entries[0]; - assert_eq!(name, "quickwit_metrics_tests_test_histogram_timer_drop"); - assert!(labels.is_empty()); - match value { - DebugValue::Histogram(values) => { - assert_eq!(values.len(), 1); - assert!(values[0].into_inner() >= 0.0); - } - other => panic!("expected histogram, got {other:?}"), - } -} - -#[test] -fn histogram_timer_observe_duration_records_once() { - let entries = with_recorder(|| { - let histogram = histogram!( - name: "test_histogram_timer_observe_duration", - description: "test histogram timer observe duration", - subsystem: "metrics_tests", - buckets: vec![1.0, 5.0, 10.0], - ); - histogram.start_timer().observe_duration(); - }); - - let (name, labels, value) = &entries[0]; - assert_eq!( - name, - "quickwit_metrics_tests_test_histogram_timer_observe_duration" - ); - assert!(labels.is_empty()); - match value { - DebugValue::Histogram(values) => { - assert_eq!(values.len(), 1); - assert!(values[0].into_inner() >= 0.0); - } - other => panic!("expected histogram, got {other:?}"), - } -} - -#[test] -fn empty_subsystem_omits_double_underscore() { - let entries = with_recorder(|| { - let counter = counter!( - name: "empty_subsystem_counter", - description: "empty subsystem counter", - subsystem: "", - ); - counter.increment(1); - }); - - assert_eq!(entries[0].0, "quickwit_empty_subsystem_counter"); -} - -#[test] -fn static_labels_are_preserved() { - let entries = with_recorder(|| { - let counter = counter!( - name: "static_labels_counter", - description: "static labels counter", - subsystem: "metrics_tests", - "env" => "prod", - "region" => "eu", - ); - counter.increment(1); - }); - - assert_eq!( - entries[0].1, - vec![ - ("env".to_string(), "prod".to_string()), - ("region".to_string(), "eu".to_string()), - ] - ); -} - -#[test] -fn parent_labels_dynamic_values_and_nested_extension() { - let entries = with_recorder(|| { - let base = counter!( - name: "nested_counter", - description: "nested counter", - subsystem: "metrics_tests", - "env" => "prod", - ); - let region = String::from("us-east"); - let child = counter!(parent: base, "region" => region); - let grandchild = counter!(parent: child, "az" => "use1-a"); - grandchild.increment(7); - }); - - let grandchild = entries - .iter() - .find(|(name, labels, _)| { - name == "quickwit_metrics_tests_nested_counter" && labels.len() == 3 - }) - .expect("grandchild metric should be recorded"); - assert_eq!( - grandchild.1, - vec![ - ("env".to_string(), "prod".to_string()), - ("region".to_string(), "us-east".to_string()), - ("az".to_string(), "use1-a".to_string()), - ] - ); - assert_eq!(grandchild.2, DebugValue::Counter(7)); -} - -#[test] -fn observable_counter_and_gauge_get_values() { - with_recorder(|| { - let counter = counter!( - name: "observable_counter", - description: "observable counter", - subsystem: "metrics_tests", - observable: true, - ); - counter.increment(3); - counter.absolute(11); - assert_eq!(counter.get(), 11); - - let gauge = gauge!( - name: "observable_gauge", - description: "observable gauge", - subsystem: "metrics_tests", - observable: true, - ); - gauge.set(10.0); - gauge.increment(2.0); - gauge.decrement(1.0); - assert_eq!(gauge.get(), 11.0); - }); -} - -#[test] -fn non_observable_metrics_return_sentinel_values() { - with_recorder(|| { - let counter = counter!( - name: "non_observable_counter", - description: "non observable counter", - subsystem: "metrics_tests", - ); - counter.increment(1); - assert_eq!(counter.get(), u64::MAX); - - let gauge = gauge!( - name: "non_observable_gauge", - description: "non observable gauge", - subsystem: "metrics_tests", - ); - gauge.set(1.0); - assert!(gauge.get().is_nan()); - }); -} - -#[test] -fn gauge_guard_balances_variable_delta_on_drop() { - let entries = with_recorder(|| { - let gauge = gauge!( - name: "guarded_gauge", - description: "guarded gauge", - subsystem: "metrics_tests", - observable: true, - ); - gauge.set(10.0); - { - let mut guard = GaugeGuard::from_gauge(&gauge); - guard.add(5); - guard.sub(2); - assert_eq!(guard.get(), 3); - assert_eq!(guard.value(), 3.0); - assert_eq!(gauge.get(), 13.0); - } - assert_eq!(gauge.get(), 10.0); - }); - - assert_eq!(entries[0].2, DebugValue::Gauge(10.0.into())); -} - -#[test] -fn histogram_bucket_inventory_contains_declared_buckets() { - with_recorder(|| { - let _ = histogram!( - name: "bucketed_histogram", - description: "bucketed histogram", - subsystem: "metrics_tests", - buckets: vec![0.1, 1.0, 10.0], - ); - }); - - assert!(histogram_buckets().any(|(name, buckets)| { - name == "quickwit_metrics_tests_bucketed_histogram" && buckets == vec![0.1, 1.0, 10.0] - })); -} - -#[test] -fn metrics_info_contains_declared_metadata() { - with_recorder(|| { - let _ = counter!( - name: "metadata_counter", - description: "metadata counter", - subsystem: "metrics_tests", - observable: true, - ); - }); - - let info = metrics_info() - .find(|info| info.key_name == "quickwit_metrics_tests_metadata_counter") - .expect("metadata counter info should be registered"); - assert_eq!(info.name, "metadata_counter"); - assert_eq!(info.subsystem, "metrics_tests"); - assert_eq!(info.description, "metadata counter"); - assert_eq!(info.kind, MetricKind::Counter); - assert!(info.observable); -} - -#[test] -fn describe_metrics_sets_debugging_recorder_description() { - let recorder = DebuggingRecorder::new(); - let snapshotter = recorder.snapshotter(); - with_local_recorder(&recorder, || { - let counter = counter!( - name: "described_counter", - description: "described counter", - subsystem: "metrics_tests", - ); - describe_metrics(); - counter.increment(1); - }); - - let snapshot = snapshotter.snapshot().into_vec(); - let (_, _, description, _) = snapshot - .into_iter() - .find(|(composite_key, _, _, _)| { - let (_, key) = composite_key.clone().into_parts(); - key.name() == "quickwit_metrics_tests_described_counter" - }) - .expect("described counter should be recorded"); - assert_eq!(description.as_deref(), Some("described counter")); -} - -#[test] -fn metrics_text_payload_renders_prometheus_handle() { - let recorder = PrometheusBuilder::new().build_recorder(); - set_prometheus_handle(recorder.handle()).expect("Prometheus handle should be set once"); - - with_local_recorder(&recorder, || { - let counter = counter!( - name: "prometheus_payload_counter", - description: "prometheus payload counter", - subsystem: "metrics_tests", - ); - describe_metrics(); - counter.increment(1); - }); - - let payload = metrics_text_payload().expect("Prometheus payload should render"); - assert!(payload.contains("# HELP quickwit_metrics_tests_prometheus_payload_counter")); - assert!(payload.contains("quickwit_metrics_tests_prometheus_payload_counter 1")); -} diff --git a/quickwit/quickwit-common/src/runtimes.rs b/quickwit/quickwit-common/src/runtimes.rs index 082098c710e..98fb33975de 100644 --- a/quickwit/quickwit-common/src/runtimes.rs +++ b/quickwit/quickwit-common/src/runtimes.rs @@ -17,11 +17,10 @@ use std::sync::OnceLock; use std::sync::atomic::{AtomicUsize, Ordering}; use std::time::Duration; +use quickwit_metrics::{Counter, Gauge, counter, gauge}; use tokio::runtime::Runtime; use tokio_metrics::{RuntimeMetrics, RuntimeMonitor}; -use crate::metrics::{Counter, Gauge, counter, gauge}; - static RUNTIMES: OnceLock> = OnceLock::new(); static TOKIO_SCHEDULED_TASKS: std::sync::LazyLock = std::sync::LazyLock::new(|| { diff --git a/quickwit/quickwit-common/src/stream_utils.rs b/quickwit/quickwit-common/src/stream_utils.rs index ad060fb5af2..9c955388d1c 100644 --- a/quickwit/quickwit-common/src/stream_utils.rs +++ b/quickwit/quickwit-common/src/stream_utils.rs @@ -18,11 +18,11 @@ use std::pin::Pin; use bytesize::ByteSize; use futures::{Stream, StreamExt, TryStreamExt, stream}; +use quickwit_metrics::{Gauge, GaugeGuard}; use tokio::sync::{mpsc, watch}; use tokio_stream::wrappers::{ReceiverStream, UnboundedReceiverStream, WatchStream}; use tracing::warn; -use crate::metrics::{Gauge, GaugeGuard}; use crate::tower::RpcName; pub type BoxStream = Pin + Send + Unpin + 'static>>; @@ -240,8 +240,7 @@ where T: fmt::Debug impl InFlightValue { pub fn new(value: T, value_size: ByteSize, gauge: &'static Gauge) -> Self { let mut gauge_guard = GaugeGuard::from_gauge(gauge); - gauge_guard.add(value_size.as_u64() as i64); - + gauge_guard.increment(value_size.as_u64() as f64); Self(value, gauge_guard) } @@ -285,8 +284,9 @@ impl TrackedUnboundedSender { mod tests { use std::sync::LazyLock; + use quickwit_metrics::{Gauge, gauge}; + use super::*; - use crate::metrics::{Gauge, gauge}; #[tokio::test] async fn test_service_stream_map() { diff --git a/quickwit/quickwit-common/src/thread_pool.rs b/quickwit/quickwit-common/src/thread_pool.rs index 25a8e74a4a8..fef231b91b7 100644 --- a/quickwit/quickwit-common/src/thread_pool.rs +++ b/quickwit/quickwit-common/src/thread_pool.rs @@ -16,11 +16,10 @@ use std::fmt; use std::sync::Arc; use futures::{Future, TryFutureExt}; +use quickwit_metrics::{Gauge, GaugeGuard, gauge}; use tokio::sync::oneshot; use tracing::error; -use crate::metrics::{Gauge, GaugeGuard, gauge}; - static THREAD_POOL_ONGOING_TASKS: std::sync::LazyLock = std::sync::LazyLock::new(|| { gauge!( name: "ongoing_tasks", @@ -100,7 +99,7 @@ impl ThreadPool { let span = tracing::Span::current(); let ongoing_tasks = self.ongoing_tasks.clone(); let mut pending_tasks_guard = GaugeGuard::from_gauge(&self.pending_tasks); - pending_tasks_guard.add(1i64); + pending_tasks_guard.increment(1.0); let (tx, rx) = oneshot::channel(); self.thread_pool.spawn(move || { drop(pending_tasks_guard); @@ -108,8 +107,8 @@ impl ThreadPool { return; } let _guard = span.enter(); - let mut ongoing_task_guard = GaugeGuard::from_gauge(&ongoing_tasks); - ongoing_task_guard.add(1i64); + let mut _ongoing_task_guard = GaugeGuard::from_gauge(&ongoing_tasks); + _ongoing_task_guard.increment(1.0); let result = cpu_intensive_fn(); let _ = tx.send(result); }); diff --git a/quickwit/quickwit-common/src/tower/circuit_breaker.rs b/quickwit/quickwit-common/src/tower/circuit_breaker.rs index aaf98cd1c3d..c9e54750882 100644 --- a/quickwit/quickwit-common/src/tower/circuit_breaker.rs +++ b/quickwit/quickwit-common/src/tower/circuit_breaker.rs @@ -19,11 +19,10 @@ use std::task::{Context, Poll}; use std::time::Duration; use pin_project::pin_project; +use quickwit_metrics::Counter; use tokio::time::Instant; use tower::{Layer, Service}; -use crate::metrics::Counter; - /// The circuit breaker layer implements the [circuit breaker pattern](https://martinfowler.com/bliki/CircuitBreaker.html). /// /// It counts the errors emitted by the inner service, and if the number of errors exceeds a certain @@ -302,7 +301,7 @@ mod tests { const TIMEOUT: Duration = Duration::from_millis(500); - let int_counter = crate::metrics::counter!( + let int_counter = quickwit_metrics::counter!( name: "circuit_break_total_test", description: "test circuit breaker counter", subsystem: "", diff --git a/quickwit/quickwit-common/src/tower/metrics.rs b/quickwit/quickwit-common/src/tower/metrics.rs index 23e8845d1b2..77607d2c5dc 100644 --- a/quickwit/quickwit-common/src/tower/metrics.rs +++ b/quickwit/quickwit-common/src/tower/metrics.rs @@ -19,9 +19,10 @@ use std::time::Instant; use futures::{Future, ready}; use pin_project::{pin_project, pinned_drop}; +use quickwit_metrics::{Counter, Gauge, Histogram, counter, gauge, histogram}; use tower::{Layer, Service}; -use crate::metrics::{Counter, Gauge, Histogram, counter, exponential_buckets, gauge, histogram}; +use crate::metrics::exponential_buckets; pub trait RpcName { fn rpc_name() -> &'static str; diff --git a/quickwit/quickwit-control-plane/Cargo.toml b/quickwit/quickwit-control-plane/Cargo.toml index e7d9d012dd0..e0c8e951e98 100644 --- a/quickwit/quickwit-control-plane/Cargo.toml +++ b/quickwit/quickwit-control-plane/Cargo.toml @@ -30,6 +30,7 @@ ulid = { workspace = true } quickwit-actors = { workspace = true } quickwit-cluster = { workspace = true } quickwit-common = { workspace = true } +quickwit-metrics = { workspace = true } quickwit-config = { workspace = true } quickwit-ingest = { workspace = true } quickwit-metastore = { workspace = true } diff --git a/quickwit/quickwit-control-plane/src/metrics.rs b/quickwit/quickwit-control-plane/src/metrics.rs index 90ed921785e..0c6b9aa50e3 100644 --- a/quickwit/quickwit-control-plane/src/metrics.rs +++ b/quickwit/quickwit-control-plane/src/metrics.rs @@ -14,7 +14,7 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{Counter, Gauge, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, counter, gauge}; #[derive(Debug, Clone, Copy)] pub struct ShardLocalityMetrics { diff --git a/quickwit/quickwit-control-plane/src/model/shard_table.rs b/quickwit/quickwit-control-plane/src/model/shard_table.rs index 09a5d42e591..630718687f8 100644 --- a/quickwit/quickwit-control-plane/src/model/shard_table.rs +++ b/quickwit/quickwit-control-plane/src/model/shard_table.rs @@ -461,12 +461,12 @@ impl ShardTable { // can update the metrics for this specific index. if index_label == index_id { let shard_stats = table_entry.shards_stats(); - quickwit_common::metrics::gauge!( + quickwit_metrics::gauge!( parent: &crate::metrics::CONTROL_PLANE_METRICS.open_shards, "index_id" => index_label.to_string(), ) .set(shard_stats.num_open_shards as f64); - quickwit_common::metrics::gauge!( + quickwit_metrics::gauge!( parent: &crate::metrics::CONTROL_PLANE_METRICS.closed_shards, "index_id" => index_label.to_string(), ) @@ -484,12 +484,12 @@ impl ShardTable { num_closed_shards += 1; } } - quickwit_common::metrics::gauge!( + quickwit_metrics::gauge!( parent: &crate::metrics::CONTROL_PLANE_METRICS.open_shards, "index_id" => index_label.to_string(), ) .set(num_open_shards as f64); - quickwit_common::metrics::gauge!( + quickwit_metrics::gauge!( parent: &crate::metrics::CONTROL_PLANE_METRICS.closed_shards, "index_id" => index_label.to_string(), ) diff --git a/quickwit/quickwit-index-management/Cargo.toml b/quickwit/quickwit-index-management/Cargo.toml index d303125f65f..35978b9f5e7 100644 --- a/quickwit/quickwit-index-management/Cargo.toml +++ b/quickwit/quickwit-index-management/Cargo.toml @@ -21,6 +21,7 @@ tokio = { workspace = true } tracing = { workspace = true } quickwit-common = { workspace = true } +quickwit-metrics = { workspace = true } quickwit-config = { workspace = true } quickwit-indexing = { workspace = true } quickwit-metastore = { workspace = true } diff --git a/quickwit/quickwit-index-management/src/garbage_collection.rs b/quickwit/quickwit-index-management/src/garbage_collection.rs index 692a716578c..2b964b0a4e4 100644 --- a/quickwit/quickwit-index-management/src/garbage_collection.rs +++ b/quickwit/quickwit-index-management/src/garbage_collection.rs @@ -20,13 +20,13 @@ use std::time::Duration; use anyhow::Context; use futures::{Future, StreamExt}; use itertools::Itertools; -use quickwit_common::metrics::Counter; use quickwit_common::pretty::PrettySample; use quickwit_common::{Progress, rate_limited_info}; use quickwit_metastore::{ ListSplitsQuery, ListSplitsRequestExt, MetastoreServiceStreamSplitsExt, SplitInfo, SplitMetadata, SplitState, }; +use quickwit_metrics::Counter; use quickwit_proto::metastore::{ DeleteSplitsRequest, ListSplitsRequest, MarkSplitsForDeletionRequest, MetastoreError, MetastoreService, MetastoreServiceClient, diff --git a/quickwit/quickwit-indexing/Cargo.toml b/quickwit/quickwit-indexing/Cargo.toml index 2a0d581797d..10a0ef98b75 100644 --- a/quickwit/quickwit-indexing/Cargo.toml +++ b/quickwit/quickwit-indexing/Cargo.toml @@ -55,6 +55,7 @@ quickwit-actors = { workspace = true } quickwit-aws = { workspace = true } quickwit-cluster = { workspace = true } quickwit-common = { workspace = true } +quickwit-metrics = { workspace = true } quickwit-config = { workspace = true } quickwit-directories = { workspace = true } quickwit-doc-mapper = { workspace = true } diff --git a/quickwit/quickwit-indexing/src/actors/doc_processor.rs b/quickwit/quickwit-indexing/src/actors/doc_processor.rs index e67ebbe71ba..6881a2f9aa1 100644 --- a/quickwit/quickwit-indexing/src/actors/doc_processor.rs +++ b/quickwit/quickwit-indexing/src/actors/doc_processor.rs @@ -20,11 +20,11 @@ use anyhow::{Context, bail}; use async_trait::async_trait; use bytes::Bytes; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, Mailbox, QueueCapacity}; -use quickwit_common::metrics::{Counter, counter}; use quickwit_common::rate_limited_tracing::rate_limited_warn; use quickwit_common::runtimes::RuntimeType; use quickwit_config::{SourceInputFormat, TransformConfig}; use quickwit_doc_mapper::{DocMapper, DocParsingError, JsonObject}; +use quickwit_metrics::{Counter, counter}; use quickwit_opentelemetry::otlp::{ JsonLogIterator, JsonSpanIterator, OtlpLogsError, OtlpTracesError, parse_otlp_logs_json, parse_otlp_logs_protobuf, parse_otlp_spans_json, parse_otlp_spans_protobuf, diff --git a/quickwit/quickwit-indexing/src/actors/indexer.rs b/quickwit/quickwit-indexing/src/actors/indexer.rs index a0f968672aa..4f5f1b98040 100644 --- a/quickwit/quickwit-indexing/src/actors/indexer.rs +++ b/quickwit/quickwit-indexing/src/actors/indexer.rs @@ -27,12 +27,13 @@ use quickwit_actors::{ Actor, ActorContext, ActorExitStatus, Command, Handler, Mailbox, QueueCapacity, }; use quickwit_common::io::IoControls; -use quickwit_common::metrics::GaugeGuard; +use quickwit_common::metrics::MEMORY_METRICS; use quickwit_common::runtimes::RuntimeType; use quickwit_common::temp_dir::TempDirectory; use quickwit_config::IndexingSettings; use quickwit_doc_mapper::DocMapper; use quickwit_metastore::checkpoint::{IndexCheckpointDelta, SourceCheckpointDelta}; +use quickwit_metrics::GaugeGuard; use quickwit_proto::indexing::{IndexingPipelineId, PipelineMetrics}; use quickwit_proto::metastore::{ LastDeleteOpstampRequest, MetastoreService, MetastoreServiceClient, @@ -221,7 +222,7 @@ impl IndexerState { let mut split_builders_guard = GaugeGuard::from_gauge(&crate::metrics::INDEXER_METRICS.split_builders); - split_builders_guard.add(1); + split_builders_guard.increment(1.0); let workbench = IndexingWorkbench { workbench_id, @@ -233,11 +234,7 @@ impl IndexerState { publish_lock, publish_token_opt, last_delete_opstamp, - memory_usage: GaugeGuard::from_gauge( - &quickwit_common::metrics::MEMORY_METRICS - .in_flight - .index_writer, - ), + memory_usage: GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.index_writer), cooperative_indexing_period, split_builders_guard, }; @@ -335,7 +332,7 @@ impl IndexerState { memory_usage_delta += mem_usage_after as i64 - mem_usage_before as i64; ctx.record_progress(); } - memory_usage.add(memory_usage_delta); + memory_usage.increment(memory_usage_delta as f64); Ok(()) } } diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index 43fb9e5d69d..0ddb8b4843b 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -23,12 +23,12 @@ use quickwit_actors::{ QueueCapacity, Supervisable, }; use quickwit_common::KillSwitch; -use quickwit_common::metrics::{GaugeGuard, counter, gauge}; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_config::{IndexingSettings, RetentionPolicy, SourceConfig}; use quickwit_doc_mapper::DocMapper; use quickwit_ingest::IngesterPool; +use quickwit_metrics::{GaugeGuard, counter, gauge}; use quickwit_proto::indexing::IndexingPipelineId; use quickwit_proto::metastore::{MetastoreError, MetastoreServiceClient}; use quickwit_proto::types::ShardId; @@ -128,7 +128,7 @@ impl IndexingPipeline { "index" => params.pipeline_id.index_uid.index_id.clone(), ); let mut indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); - indexing_pipelines_gauge_guard.add(1); + indexing_pipelines_gauge_guard.increment(1.0); let params_fingerprint = params.params_fingerprint; IndexingPipeline { params, diff --git a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs index 1205f3e9162..a4e263c50cb 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs @@ -22,7 +22,6 @@ use quickwit_actors::{ }; use quickwit_common::KillSwitch; use quickwit_common::io::{IoControls, Limiter}; -use quickwit_common::metrics::counter; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_config::RetentionPolicy; @@ -31,6 +30,7 @@ use quickwit_metastore::{ ListSplitsQuery, ListSplitsRequestExt, MetastoreServiceStreamSplitsExt, SplitMetadata, SplitState, }; +use quickwit_metrics::counter; use quickwit_proto::indexing::MergePipelineId; use quickwit_proto::metastore::{ ListSplitsRequest, MetastoreError, MetastoreResult, MetastoreService, MetastoreServiceClient, diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs index 695d957f9d5..0b1638aa794 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs @@ -25,9 +25,9 @@ use std::sync::{Arc, OnceLock}; use anyhow::Context; use async_trait::async_trait; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, Mailbox, QueueCapacity}; -use quickwit_common::metrics::gauge; use quickwit_common::spawn_named_task; use quickwit_metastore::StageParquetSplitsRequestExt; +use quickwit_metrics::gauge; use quickwit_parquet_engine::split::{ParquetSplitKind, ParquetSplitMetadata}; use quickwit_proto::metastore::{MetastoreService, MetastoreServiceClient}; use quickwit_storage::Storage; diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs index 3379c456911..966ca94cd0d 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs @@ -33,11 +33,11 @@ use quickwit_actors::{ QueueCapacity, Supervisable, }; use quickwit_common::KillSwitch; -use quickwit_common::metrics::{GaugeGuard, gauge}; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_config::{IndexingSettings, SourceConfig}; use quickwit_ingest::IngesterPool; +use quickwit_metrics::{GaugeGuard, gauge}; use quickwit_proto::indexing::IndexingPipelineId; use quickwit_proto::metastore::{MetastoreError, MetastoreServiceClient}; use quickwit_proto::types::ShardId; @@ -149,7 +149,7 @@ impl MetricsPipeline { "index" => params.pipeline_id.index_uid.index_id.clone(), ); let mut indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); - indexing_pipelines_gauge_guard.add(1); + indexing_pipelines_gauge_guard.increment(1.0); let params_fingerprint = params.params_fingerprint; MetricsPipeline { params, diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs index cd6a3ea6f9d..1ff1fb0b905 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs @@ -20,8 +20,9 @@ use std::fmt; use arrow::record_batch::RecordBatch; -use quickwit_common::metrics::{GaugeGuard, MEMORY_METRICS}; +use quickwit_common::metrics::MEMORY_METRICS; use quickwit_metastore::checkpoint::SourceCheckpointDelta; +use quickwit_metrics::GaugeGuard; /// Batch of parquet data as Arrow RecordBatch for the parquet indexing pipeline. /// @@ -66,7 +67,7 @@ impl ProcessedParquetBatch { .sum(); let mut gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.indexer_mailbox); - gauge_guard.add(memory_size); + gauge_guard.increment(memory_size as f64); Self { batches, diff --git a/quickwit/quickwit-indexing/src/actors/uploader.rs b/quickwit/quickwit-indexing/src/actors/uploader.rs index e876d8f04a7..c0bf4a43fdf 100644 --- a/quickwit/quickwit-indexing/src/actors/uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/uploader.rs @@ -23,12 +23,12 @@ use async_trait::async_trait; use fail::fail_point; use itertools::Itertools; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, Mailbox, QueueCapacity}; -use quickwit_common::metrics::gauge; use quickwit_common::pubsub::EventBroker; use quickwit_common::spawn_named_task; use quickwit_config::RetentionPolicy; use quickwit_metastore::checkpoint::IndexCheckpointDelta; use quickwit_metastore::{SplitMetadata, StageSplitsRequestExt}; +use quickwit_metrics::gauge; use quickwit_proto::metastore::{MetastoreService, MetastoreServiceClient, StageSplitsRequest}; use quickwit_proto::search::{ReportSplit, ReportSplitsRequest}; use quickwit_proto::types::{IndexUid, PublishToken}; diff --git a/quickwit/quickwit-indexing/src/metrics.rs b/quickwit/quickwit-indexing/src/metrics.rs index a8350c794c7..4694be00dbf 100644 --- a/quickwit/quickwit-indexing/src/metrics.rs +++ b/quickwit/quickwit-indexing/src/metrics.rs @@ -14,7 +14,7 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{Counter, Gauge, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, counter, gauge}; pub struct IndexerMetrics { pub processed_docs_total: Counter, diff --git a/quickwit/quickwit-indexing/src/models/indexed_split.rs b/quickwit/quickwit-indexing/src/models/indexed_split.rs index 4453802959f..03728fe2f6a 100644 --- a/quickwit/quickwit-indexing/src/models/indexed_split.rs +++ b/quickwit/quickwit-indexing/src/models/indexed_split.rs @@ -16,9 +16,9 @@ use std::fmt; use std::path::Path; use quickwit_common::io::IoControls; -use quickwit_common::metrics::GaugeGuard; use quickwit_common::temp_dir::TempDirectory; use quickwit_metastore::checkpoint::IndexCheckpointDelta; +use quickwit_metrics::GaugeGuard; use quickwit_proto::indexing::IndexingPipelineId; use quickwit_proto::types::{DocMappingUid, IndexUid, PublishToken}; use tantivy::IndexBuilder; diff --git a/quickwit/quickwit-indexing/src/models/processed_doc.rs b/quickwit/quickwit-indexing/src/models/processed_doc.rs index d71545eb036..0575cbc0de4 100644 --- a/quickwit/quickwit-indexing/src/models/processed_doc.rs +++ b/quickwit/quickwit-indexing/src/models/processed_doc.rs @@ -14,8 +14,9 @@ use std::fmt; -use quickwit_common::metrics::{GaugeGuard, MEMORY_METRICS}; +use quickwit_common::metrics::MEMORY_METRICS; use quickwit_metastore::checkpoint::SourceCheckpointDelta; +use quickwit_metrics::GaugeGuard; use tantivy::{DateTime, TantivyDocument}; pub struct ProcessedDoc { @@ -52,7 +53,7 @@ impl ProcessedDocBatch { ) -> Self { let delta = docs.iter().map(|doc| doc.num_bytes as i64).sum::(); let mut gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.indexer_mailbox); - gauge_guard.add(delta); + gauge_guard.increment(delta as f64); Self { docs, checkpoint_delta, diff --git a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs index fe270e378c5..441e7b01dbf 100644 --- a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs +++ b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs @@ -15,8 +15,9 @@ use std::fmt; use bytes::Bytes; -use quickwit_common::metrics::{GaugeGuard, MEMORY_METRICS}; +use quickwit_common::metrics::MEMORY_METRICS; use quickwit_metastore::checkpoint::SourceCheckpointDelta; +use quickwit_metrics::GaugeGuard; pub struct RawDocBatch { // Do not directly append documents to this vector; otherwise, in-flight metrics will be @@ -36,7 +37,7 @@ impl RawDocBatch { let delta = docs.iter().map(|doc| doc.len() as i64).sum::(); let mut gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.doc_processor_mailbox); - gauge_guard.add(delta); + gauge_guard.increment(delta as f64); Self { docs, diff --git a/quickwit/quickwit-indexing/src/source/mod.rs b/quickwit/quickwit-indexing/src/source/mod.rs index 7a500cd9c98..162ad12262e 100644 --- a/quickwit/quickwit-indexing/src/source/mod.rs +++ b/quickwit/quickwit-indexing/src/source/mod.rs @@ -92,7 +92,7 @@ pub use pulsar_source::{PulsarSource, PulsarSourceFactory}; #[cfg(feature = "sqs")] pub use queue_sources::sqs_queue; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler}; -use quickwit_common::metrics::{GaugeGuard, MEMORY_METRICS}; +use quickwit_common::metrics::MEMORY_METRICS; use quickwit_common::pubsub::EventBroker; use quickwit_common::runtimes::RuntimeType; use quickwit_config::{ @@ -101,6 +101,7 @@ use quickwit_config::{ use quickwit_ingest::IngesterPool; use quickwit_metastore::IndexMetadataResponseExt; use quickwit_metastore::checkpoint::{SourceCheckpoint, SourceCheckpointDelta}; +use quickwit_metrics::GaugeGuard; use quickwit_proto::indexing::IndexingPipelineId; use quickwit_proto::metastore::{ IndexMetadataRequest, MetastoreError, MetastoreResult, MetastoreService, @@ -551,8 +552,8 @@ impl BatchBuilder { pub fn add_doc(&mut self, doc: Bytes) { let num_bytes = doc.len(); self.docs.push(doc); - self.gauge_guard.add(num_bytes as i64); self.num_bytes += num_bytes as u64; + self.gauge_guard.increment(num_bytes as f64); } pub fn force_commit(&mut self) { @@ -567,7 +568,7 @@ impl BatchBuilder { pub fn clear(&mut self) { self.docs.clear(); self.checkpoint_delta = SourceCheckpointDelta::default(); - self.gauge_guard.sub(self.num_bytes as i64); + self.gauge_guard.increment(-(self.num_bytes as f64)); self.num_bytes = 0; } } diff --git a/quickwit/quickwit-ingest/Cargo.toml b/quickwit/quickwit-ingest/Cargo.toml index 1bf15d76fd4..03121cf4cc6 100644 --- a/quickwit/quickwit-ingest/Cargo.toml +++ b/quickwit/quickwit-ingest/Cargo.toml @@ -38,6 +38,7 @@ utoipa = { workspace = true } quickwit-actors = { workspace = true } quickwit-cluster = { workspace = true } quickwit-common = { workspace = true, features = ["testsuite"] } +quickwit-metrics = { workspace = true } quickwit-config = { workspace = true } quickwit-doc-mapper = { workspace = true, features = ["testsuite"] } quickwit-proto = { workspace = true } diff --git a/quickwit/quickwit-ingest/src/ingest_api_service.rs b/quickwit/quickwit-ingest/src/ingest_api_service.rs index f7f16174538..1a61c28235d 100644 --- a/quickwit/quickwit-ingest/src/ingest_api_service.rs +++ b/quickwit/quickwit-ingest/src/ingest_api_service.rs @@ -20,9 +20,9 @@ use bytes::Bytes; use quickwit_actors::{ Actor, ActorContext, ActorExitStatus, DeferableReplyHandler, Handler, QueueCapacity, }; -use quickwit_common::metrics::counter; use quickwit_common::runtimes::RuntimeType; use quickwit_common::tower::Cost; +use quickwit_metrics::counter; use quickwit_proto::ingest::RateLimitingCause; use tracing::{error, info}; use ulid::Ulid; diff --git a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs index 61dd11e2ad9..c3dc9190591 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs @@ -25,11 +25,12 @@ use futures::StreamExt; use futures::stream::FuturesUnordered; use mrecordlog::error::CreateQueueError; use quickwit_cluster::Cluster; -use quickwit_common::metrics::{GaugeGuard, MEMORY_METRICS, counter}; +use quickwit_common::metrics::MEMORY_METRICS; use quickwit_common::pretty::PrettyDisplay; use quickwit_common::pubsub::{EventBroker, EventSubscriber}; use quickwit_common::rate_limiter::{RateLimiter, RateLimiterSettings}; use quickwit_common::{ServiceStream, rate_limited_error, rate_limited_warn}; +use quickwit_metrics::{GaugeGuard, counter}; use quickwit_proto::control_plane::{ AdviseResetShardsRequest, ControlPlaneService, ControlPlaneServiceClient, }; @@ -1126,8 +1127,8 @@ impl IngesterService for Ingester { _ => None, }) .sum::(); - let mut gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.ingester_persist); - gauge_guard.add(request_size_bytes as i64); + let mut _gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.ingester_persist); + _gauge_guard.increment(request_size_bytes as f64); self.persist_inner(persist_request).await } diff --git a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs index 62ed66372c2..2c6497a5504 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs @@ -15,9 +15,8 @@ use std::sync::LazyLock; use mrecordlog::ResourceUsage; -use quickwit_common::metrics::{ - Counter, Gauge, Histogram, counter, exponential_buckets, gauge, histogram, linear_buckets, -}; +use quickwit_common::metrics::{exponential_buckets, linear_buckets}; +use quickwit_metrics::{Counter, Gauge, Histogram, counter, gauge, histogram}; // Counter vec counting the different outcomes of ingest requests as // measure at the end of the router work. diff --git a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs index fe2bd941aff..2f22daa3220 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs @@ -18,8 +18,9 @@ use std::time::{Duration, Instant}; use bytesize::ByteSize; use futures::{Future, StreamExt}; use mrecordlog::error::CreateQueueError; -use quickwit_common::metrics::{GaugeGuard, MEMORY_METRICS}; +use quickwit_common::metrics::MEMORY_METRICS; use quickwit_common::{ServiceStream, rate_limited_warn}; +use quickwit_metrics::GaugeGuard; use quickwit_proto::ingest::ingester::{ AckReplicationMessage, IngesterStatus, InitReplicaRequest, InitReplicaResponse, ReplicateFailure, ReplicateFailureReason, ReplicateRequest, ReplicateResponse, @@ -504,8 +505,8 @@ impl ReplicationTask { ))); } let request_size_bytes = replicate_request.num_bytes(); - let mut gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.ingester_replicate); - gauge_guard.add(request_size_bytes as i64); + let mut _gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.ingester_replicate); + _gauge_guard.increment(request_size_bytes as f64); self.current_replication_seqno += 1; diff --git a/quickwit/quickwit-ingest/src/ingest_v2/router.rs b/quickwit/quickwit-ingest/src/ingest_v2/router.rs index 7830a72889e..f5b2b0b1a6a 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/router.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/router.rs @@ -20,9 +20,10 @@ use std::time::Duration; use async_trait::async_trait; use futures::stream::FuturesUnordered; use futures::{Future, StreamExt}; -use quickwit_common::metrics::{GaugeGuard, MEMORY_METRICS, counter}; +use quickwit_common::metrics::MEMORY_METRICS; use quickwit_common::pubsub::{EventBroker, EventSubscriber}; use quickwit_common::{rate_limited_error, rate_limited_warn}; +use quickwit_metrics::{GaugeGuard, counter}; use quickwit_proto::control_plane::{ ControlPlaneService, ControlPlaneServiceClient, GetOrCreateOpenShardsRequest, GetOrCreateOpenShardsSubrequest, @@ -587,8 +588,8 @@ impl IngestRouterService for IngestRouter { async fn ingest(&self, ingest_request: IngestRequestV2) -> IngestV2Result { let request_size_bytes = ingest_request.num_bytes(); - let mut gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.ingest_router); - gauge_guard.add(request_size_bytes as i64); + let mut _gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.ingest_router); + _gauge_guard.increment(request_size_bytes as f64); let num_subrequests = ingest_request.subrequests.len(); let _permit = self diff --git a/quickwit/quickwit-ingest/src/lib.rs b/quickwit/quickwit-ingest/src/lib.rs index 734dade2e83..e8fcaa313fb 100644 --- a/quickwit/quickwit-ingest/src/lib.rs +++ b/quickwit/quickwit-ingest/src/lib.rs @@ -109,7 +109,7 @@ pub async fn start_ingest_api_service( macro_rules! with_lock_metrics { ($future:expr, $operation:expr, $kind:expr) => { { - quickwit_common::metrics::gauge!( + quickwit_metrics::gauge!( parent: &$crate::ingest_v2::metrics::INGEST_V2_METRICS .wal_acquire_lock_requests_in_flight, "operation" => $operation, @@ -127,14 +127,14 @@ macro_rules! with_lock_metrics { "lock acquisition took {}ms", elapsed.as_millis() ); } - quickwit_common::metrics::gauge!( + quickwit_metrics::gauge!( parent: &$crate::ingest_v2::metrics::INGEST_V2_METRICS .wal_acquire_lock_requests_in_flight, "operation" => $operation, "type" => $kind, ) .decrement(1.0); - quickwit_common::metrics::histogram!( + quickwit_metrics::histogram!( parent: &$crate::ingest_v2::metrics::INGEST_V2_METRICS .wal_acquire_lock_request_duration_secs, "operation" => $operation, diff --git a/quickwit/quickwit-ingest/src/metrics.rs b/quickwit/quickwit-ingest/src/metrics.rs index badcd9689e9..ed4e1b012a2 100644 --- a/quickwit/quickwit-ingest/src/metrics.rs +++ b/quickwit/quickwit-ingest/src/metrics.rs @@ -14,7 +14,7 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{Counter, Gauge, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, counter, gauge}; pub struct IngestMetrics { pub docs_bytes_total: Counter, diff --git a/quickwit/quickwit-jaeger/Cargo.toml b/quickwit/quickwit-jaeger/Cargo.toml index 1ebebc8dbfb..a2d686c3f7a 100644 --- a/quickwit/quickwit-jaeger/Cargo.toml +++ b/quickwit/quickwit-jaeger/Cargo.toml @@ -26,6 +26,7 @@ tonic = { workspace = true } tracing = { workspace = true } quickwit-common = { workspace = true } +quickwit-metrics = { workspace = true } quickwit-config = { workspace = true } quickwit-opentelemetry = { workspace = true } quickwit-proto = { workspace = true } diff --git a/quickwit/quickwit-jaeger/src/lib.rs b/quickwit/quickwit-jaeger/src/lib.rs index 7fba262ee55..e4b68943762 100644 --- a/quickwit/quickwit-jaeger/src/lib.rs +++ b/quickwit/quickwit-jaeger/src/lib.rs @@ -21,8 +21,8 @@ use std::time::Instant; use itertools::{Either, Itertools}; use prost::Message; use prost_types::{Duration as WellKnownDuration, Timestamp as WellKnownTimestamp}; -use quickwit_common::metrics::{counter, histogram}; use quickwit_config::JaegerConfig; +use quickwit_metrics::{counter, histogram}; use quickwit_opentelemetry::otlp::{ Event as QwEvent, Link as QwLink, OTEL_TRACES_INDEX_ID, Span as QwSpan, SpanFingerprint, SpanId, SpanKind as QwSpanKind, SpanStatus as QwSpanStatus, TraceId, diff --git a/quickwit/quickwit-jaeger/src/metrics.rs b/quickwit/quickwit-jaeger/src/metrics.rs index 0761d4ab018..d986f3c98cc 100644 --- a/quickwit/quickwit-jaeger/src/metrics.rs +++ b/quickwit/quickwit-jaeger/src/metrics.rs @@ -14,7 +14,8 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{Counter, Histogram, counter, exponential_buckets, histogram}; +use quickwit_common::metrics::exponential_buckets; +use quickwit_metrics::{Counter, Histogram, counter, histogram}; pub struct JaegerServiceMetrics { pub requests_total: Counter, diff --git a/quickwit/quickwit-jaeger/src/v1.rs b/quickwit/quickwit-jaeger/src/v1.rs index 9634d58857e..6a96b1aa38b 100644 --- a/quickwit/quickwit-jaeger/src/v1.rs +++ b/quickwit/quickwit-jaeger/src/v1.rs @@ -17,7 +17,7 @@ use std::time::Instant; use async_trait::async_trait; -use quickwit_common::metrics::{counter, histogram}; +use quickwit_metrics::{counter, histogram}; use quickwit_opentelemetry::otlp::{ OTEL_TRACES_INDEX_ID, extract_otel_traces_index_id_patterns_from_metadata, }; diff --git a/quickwit/quickwit-jaeger/src/v2.rs b/quickwit/quickwit-jaeger/src/v2.rs index 1a9af33a702..c7905e35689 100644 --- a/quickwit/quickwit-jaeger/src/v2.rs +++ b/quickwit/quickwit-jaeger/src/v2.rs @@ -19,7 +19,7 @@ use std::time::Instant; use async_trait::async_trait; use prost_types::Timestamp as WellKnownTimestamp; -use quickwit_common::metrics::{counter, histogram}; +use quickwit_metrics::{counter, histogram}; use quickwit_opentelemetry::otlp::{ OTEL_TRACES_INDEX_ID, Span as QwSpan, TraceId, extract_otel_traces_index_id_patterns_from_metadata, diff --git a/quickwit/quickwit-janitor/Cargo.toml b/quickwit/quickwit-janitor/Cargo.toml index ecb243a9990..e8063895f24 100644 --- a/quickwit/quickwit-janitor/Cargo.toml +++ b/quickwit/quickwit-janitor/Cargo.toml @@ -26,6 +26,7 @@ utoipa = { workspace = true } quickwit-actors = { workspace = true } quickwit-common = { workspace = true } +quickwit-metrics = { workspace = true } quickwit-config = { workspace = true } quickwit-doc-mapper = { workspace = true } quickwit-index-management = { workspace = true } diff --git a/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs b/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs index 1372b6c7eca..ea7ab7c631e 100644 --- a/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs +++ b/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs @@ -21,12 +21,12 @@ use async_trait::async_trait; use itertools::Itertools; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, Mailbox, QueueCapacity}; use quickwit_common::extract_time_range; -use quickwit_common::metrics::gauge; use quickwit_common::uri::Uri; use quickwit_doc_mapper::tag_pruning::extract_tags_from_query; use quickwit_indexing::actors::{MergeSchedulerService, MergeSplitDownloader, schedule_merge}; use quickwit_indexing::merge_policy::MergeOperation; use quickwit_metastore::{ListSplitsResponseExt, Split, split_tag_filter, split_time_range_filter}; +use quickwit_metrics::gauge; use quickwit_proto::metastore::{ DeleteTask, LastDeleteOpstampRequest, ListDeleteTasksRequest, ListStaleSplitsRequest, MetastoreResult, MetastoreService, MetastoreServiceClient, UpdateSplitsDeleteOpstampRequest, diff --git a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs index 1a5fedbb51f..5aa86999ca9 100644 --- a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs +++ b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs @@ -20,10 +20,10 @@ use async_trait::async_trait; use futures::{StreamExt, stream}; use quickwit_actors::{Actor, ActorContext, Handler}; use quickwit_common::is_parquet_pipeline_index; -use quickwit_common::metrics::counter; use quickwit_common::shared_consts::split_deletion_grace_period; use quickwit_index_management::{GcMetrics, run_garbage_collect, run_parquet_garbage_collect}; use quickwit_metastore::ListIndexesMetadataResponseExt; +use quickwit_metrics::counter; use quickwit_proto::metastore::{ ListIndexesMetadataRequest, MetastoreService, MetastoreServiceClient, }; diff --git a/quickwit/quickwit-janitor/src/metrics.rs b/quickwit/quickwit-janitor/src/metrics.rs index 55327747868..713da1a937b 100644 --- a/quickwit/quickwit-janitor/src/metrics.rs +++ b/quickwit/quickwit-janitor/src/metrics.rs @@ -14,7 +14,7 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{Counter, Gauge, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, counter, gauge}; pub struct JanitorMetrics { pub ongoing_num_delete_operations_total: Gauge, diff --git a/quickwit/quickwit-lambda-client/Cargo.toml b/quickwit/quickwit-lambda-client/Cargo.toml index 9f8318e7c15..1d33060be2e 100644 --- a/quickwit/quickwit-lambda-client/Cargo.toml +++ b/quickwit/quickwit-lambda-client/Cargo.toml @@ -23,6 +23,7 @@ tokio = { workspace = true } tracing = { workspace = true } quickwit-common = { workspace = true } +quickwit-metrics = { workspace = true } quickwit-config = { workspace = true } quickwit-lambda-server = { workspace = true } quickwit-proto = { workspace = true } diff --git a/quickwit/quickwit-lambda-client/src/invoker.rs b/quickwit/quickwit-lambda-client/src/invoker.rs index 42cae2811f0..8fb32b642d5 100644 --- a/quickwit/quickwit-lambda-client/src/invoker.rs +++ b/quickwit/quickwit-lambda-client/src/invoker.rs @@ -23,9 +23,9 @@ use aws_sdk_lambda::primitives::Blob; use aws_sdk_lambda::types::InvocationType; use base64::prelude::*; use prost::Message; -use quickwit_common::metrics::{counter, histogram}; use quickwit_common::retry::RetryParams; use quickwit_lambda_server::{LambdaSearchRequestPayload, LambdaSearchResponsePayload}; +use quickwit_metrics::{counter, histogram}; use quickwit_proto::search::{LambdaSearchResponses, LambdaSingleSplitResult, LeafSearchRequest}; use quickwit_search::{LambdaLeafSearchInvoker, SearchError}; use tracing::{debug, info, instrument, warn}; diff --git a/quickwit/quickwit-lambda-client/src/metrics.rs b/quickwit/quickwit-lambda-client/src/metrics.rs index 300c4a560b6..3bab535d813 100644 --- a/quickwit/quickwit-lambda-client/src/metrics.rs +++ b/quickwit/quickwit-lambda-client/src/metrics.rs @@ -16,7 +16,8 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{Counter, Histogram, counter, exponential_buckets, histogram}; +use quickwit_common::metrics::exponential_buckets; +use quickwit_metrics::{Counter, Histogram, counter, histogram}; /// From 100ms to 73s seconds fn duration_buckets() -> Vec { diff --git a/quickwit/quickwit-metastore/Cargo.toml b/quickwit/quickwit-metastore/Cargo.toml index 8a8a4755feb..ac3e8d5406f 100644 --- a/quickwit/quickwit-metastore/Cargo.toml +++ b/quickwit/quickwit-metastore/Cargo.toml @@ -40,6 +40,7 @@ uuid = { workspace = true } utoipa = { workspace = true } quickwit-common = { workspace = true } +quickwit-metrics = { workspace = true } quickwit-config = { workspace = true } quickwit-doc-mapper = { workspace = true } quickwit-parquet-engine = { workspace = true } diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/metrics.rs b/quickwit/quickwit-metastore/src/metastore/postgres/metrics.rs index 7e540b6cbcf..7f0a6feec04 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/metrics.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/metrics.rs @@ -14,7 +14,7 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{Gauge, gauge}; +use quickwit_metrics::{Gauge, gauge}; #[derive(Clone)] pub(super) struct PostgresMetrics { diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs b/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs index 9142c3cf87c..455f7e0e848 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs @@ -14,7 +14,7 @@ use futures::future::BoxFuture; use futures::stream::BoxStream; -use quickwit_common::metrics::GaugeGuard; +use quickwit_metrics::GaugeGuard; use sqlx::pool::PoolConnection; use sqlx::pool::maybe::MaybePoolConnection; use sqlx::{ @@ -58,8 +58,8 @@ impl<'a, DB: Database> Acquire<'a> for &TrackedPool { .set(self.inner_pool.num_idle() as f64); Box::pin(async move { - let mut gauge_guard = GaugeGuard::from_gauge(&POSTGRES_METRICS.acquire_connections); - gauge_guard.add(1); + let mut _gauge_guard = GaugeGuard::from_gauge(&POSTGRES_METRICS.acquire_connections); + _gauge_guard.increment(1.0); let conn = acquire_conn_fut.await?; Ok(conn) diff --git a/quickwit/quickwit-metrics/examples/http_service.rs b/quickwit/quickwit-metrics/examples/http_service.rs index 778b567c0b1..1fac4097f30 100644 --- a/quickwit/quickwit-metrics/examples/http_service.rs +++ b/quickwit/quickwit-metrics/examples/http_service.rs @@ -102,7 +102,9 @@ const REGION_LABEL: Labels<1> = Labels::new(["region"]); fn track_connection(region: &'static str) -> GaugeGuard { let lv = REGION_LABEL.with_values([region]); let g = gauge!(parent: HTTP_ACTIVE_CONNECTIONS, labels: &lv); - GaugeGuard::increment(&g, 1.0) + let mut guard = GaugeGuard::from_gauge(&g); + guard.increment(1.0); + guard } // ─── Prometheus setup ─── @@ -158,7 +160,8 @@ fn handle_request(method: &'static str, path: &'static str, region: &'static str "method" => method, ); { - let _guard = GaugeGuard::increment(&conn_gauge, 1.0); + let mut _guard = GaugeGuard::from_gauge(&conn_gauge); + _guard.increment(1.0); } println!(" [{region}] {method} {path} -> {status} ({duration_ms:.3}s)"); diff --git a/quickwit/quickwit-metrics/src/counter.rs b/quickwit/quickwit-metrics/src/counter.rs index 3b2ed1504fa..e56e978c061 100644 --- a/quickwit/quickwit-metrics/src/counter.rs +++ b/quickwit/quickwit-metrics/src/counter.rs @@ -261,7 +261,7 @@ macro_rules! counter { ( name: $name:literal, description: $description:literal, - subsystem: $subsystem:literal + subsystem: $subsystem:tt $(, $label:literal => $value:literal)* $(,)? ) => {{ $crate::counter!( @@ -277,7 +277,7 @@ macro_rules! counter { ( name: $name:literal, description: $description:literal, - subsystem: $subsystem:literal, + subsystem: $subsystem:tt, observable: $observable:expr $(, $label:literal => $value:literal)* $(,)? ) => {{ diff --git a/quickwit/quickwit-metrics/src/gauge.rs b/quickwit/quickwit-metrics/src/gauge.rs index 74e658298e4..c9eb69f3430 100644 --- a/quickwit/quickwit-metrics/src/gauge.rs +++ b/quickwit/quickwit-metrics/src/gauge.rs @@ -224,45 +224,50 @@ impl GaugeFn for Gauge { } } -/// RAII guard that increments a [`Gauge`] on creation and decrements it -/// by the same amount when dropped. +/// RAII guard that tracks increments to a [`Gauge`] and decrements the +/// tracked amount when dropped. /// /// Useful for tracking in-flight work (connections, requests, etc.) /// with automatic cleanup on scope exit — even via `?`, `return`, or /// a panic. /// /// ```ignore -/// let _guard = GaugeGuard::increment(&gauge, 1.0); +/// let mut guard = GaugeGuard::from_gauge(&gauge); +/// guard.increment(1.0); /// // gauge is incremented by 1.0 /// // ... do work ... -/// // gauge is decremented by 1.0 when _guard drops +/// // gauge is decremented by 1.0 when guard drops /// ``` #[derive(Debug)] pub struct GaugeGuard { gauge: Gauge, - value: f64, + delta: f64, } impl GaugeGuard { - /// Increments `gauge` by `value` and returns a guard that will - /// decrement it back when dropped. - pub fn increment(gauge: &Gauge, value: f64) -> Self { - gauge.increment(value); + /// Creates a guard that tracks `gauge` without changing its value. + pub fn from_gauge(gauge: &Gauge) -> Self { Self { gauge: gauge.clone(), - value, + delta: 0.0, } } + /// Adds `delta` to the gauge and to the value this guard tracks. + pub fn increment(&mut self, delta: f64) { + self.delta += delta; + self.gauge.increment(delta); + } + /// Returns the value this guard is tracking. - pub fn value(&self) -> f64 { - self.value + pub fn get(&self) -> f64 { + self.delta } } impl Drop for GaugeGuard { fn drop(&mut self) { - self.gauge.decrement(self.value); + self.gauge.decrement(self.delta); } } @@ -291,7 +296,8 @@ impl Drop for GaugeGuard { /// /// ```ignore /// let child = gauge!(parent: base, "method" => method); -/// let _guard = GaugeGuard::increment(&child, 1.0); +/// let mut guard = GaugeGuard::from_gauge(&child); +/// guard.increment(1.0); /// ``` #[macro_export] macro_rules! gauge { @@ -300,7 +306,7 @@ macro_rules! gauge { ( name: $name:literal, description: $description:literal, - subsystem: $subsystem:literal + subsystem: $subsystem:tt $(, $label:literal => $value:literal)* $(,)? ) => {{ $crate::gauge!( @@ -316,7 +322,7 @@ macro_rules! gauge { ( name: $name:literal, description: $description:literal, - subsystem: $subsystem:literal, + subsystem: $subsystem:tt, observable: $observable:expr $(, $label:literal => $value:literal)* $(,)? ) => {{ diff --git a/quickwit/quickwit-metrics/src/histogram.rs b/quickwit/quickwit-metrics/src/histogram.rs index 191b1558568..b904d7522f0 100644 --- a/quickwit/quickwit-metrics/src/histogram.rs +++ b/quickwit/quickwit-metrics/src/histogram.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::sync::{Arc, LazyLock}; +use std::time::Instant; use dashmap::DashMap; use metrics::HistogramFn; @@ -174,6 +175,11 @@ impl Histogram { pub fn record(&self, value: f64) { self.0.inner.record(value); } + + /// Starts a timer that records the elapsed time in seconds when dropped. + pub fn start_timer(&self) -> HistogramTimer { + HistogramTimer::new(self.clone()) + } } /// Bridges `Histogram` into the `metrics` recorder trait so it can be @@ -184,6 +190,41 @@ impl HistogramFn for Histogram { } } +/// RAII timer that records elapsed wall-clock time into a [`Histogram`]. +#[derive(Debug)] +pub struct HistogramTimer { + histogram: Histogram, + start: Instant, + observed: bool, +} + +impl HistogramTimer { + fn new(histogram: Histogram) -> Self { + Self { + histogram, + start: Instant::now(), + observed: false, + } + } + + /// Records the elapsed duration immediately. + /// + /// The timer is consumed so the duration is not recorded again on drop. + pub fn observe_duration(self) { + let mut timer = self; + timer.observed = true; + timer.histogram.record(timer.start.elapsed().as_secs_f64()); + } +} + +impl Drop for HistogramTimer { + fn drop(&mut self) { + if !self.observed { + self.histogram.record(self.start.elapsed().as_secs_f64()); + } + } +} + /// Declares or extends a histogram metric. /// /// # Base declaration @@ -218,7 +259,7 @@ macro_rules! histogram { ( name: $name:literal, description: $description:literal, - subsystem: $subsystem:literal, + subsystem: $subsystem:tt, buckets: $buckets:expr $(, $label:literal => $value:literal)* $(,)? ) => {{ diff --git a/quickwit/quickwit-metrics/src/inner.rs b/quickwit/quickwit-metrics/src/inner.rs index e6aae4d7587..2c41ab87248 100644 --- a/quickwit/quickwit-metrics/src/inner.rs +++ b/quickwit/quickwit-metrics/src/inner.rs @@ -76,7 +76,7 @@ macro_rules! __key_info_metadata { observable: $observable:expr, name: $name:literal, description: $description:literal, - subsystem: $subsystem:literal + subsystem: $subsystem:tt $(, $label:literal => $value:literal)* $(,)? ) => { const KEY_NAME: &str = $crate::__key_name!($subsystem, $name); diff --git a/quickwit/quickwit-metrics/src/lib.rs b/quickwit/quickwit-metrics/src/lib.rs index 6d406a8ae89..651aa9ff71b 100644 --- a/quickwit/quickwit-metrics/src/lib.rs +++ b/quickwit/quickwit-metrics/src/lib.rs @@ -136,7 +136,8 @@ //! //! ```rust,ignore //! { -//! let _guard = GaugeGuard::increment(&ACTIVE_CONNS, 1.0); +//! let mut _guard = GaugeGuard::from_gauge(&ACTIVE_CONNS); +//! _guard.increment(1.0); //! // ... connection is alive here ... //! } //! // gauge decremented automatically on drop @@ -310,7 +311,7 @@ pub mod __inventory { // ─── Public types ─── pub use counter::Counter; pub use gauge::{Gauge, GaugeGuard}; -pub use histogram::{Histogram, HistogramConfig}; +pub use histogram::{Histogram, HistogramConfig, HistogramTimer}; pub use labels::{LabelValues, Labels}; // ─── metrics-rs re-exports ─── pub use metrics::{CounterFn, GaugeFn, HistogramFn}; diff --git a/quickwit/quickwit-metrics/tests/gauge.rs b/quickwit/quickwit-metrics/tests/gauge.rs index ae0f0da6c43..5be23bce11b 100644 --- a/quickwit/quickwit-metrics/tests/gauge.rs +++ b/quickwit/quickwit-metrics/tests/gauge.rs @@ -91,7 +91,8 @@ fn guard_decrements_on_drop() { ); g.set(0.0); { - let _guard = GaugeGuard::increment(&g, 5.0); + let mut _guard = GaugeGuard::from_gauge(&g); + _guard.increment(5.0); } }); @@ -109,8 +110,9 @@ fn guard_after_set() { ); g.set(10.0); { - let guard = GaugeGuard::increment(&g, 3.0); - assert_eq!(guard.value(), 3.0); + let mut guard = GaugeGuard::from_gauge(&g); + guard.increment(3.0); + assert_eq!(guard.get(), 3.0); } }); @@ -118,6 +120,30 @@ fn guard_after_set() { assert_eq!(entries[0].2, DebugValue::Gauge(10.0.into())); } +#[test] +fn mutable_guard_tracks_delta() { + let entries = with_recorder(|| { + let g = gauge!( + name: "g_mutable_guard", + description: "mutable guard", + subsystem: "test", + ); + g.set(0.0); + { + let mut guard = GaugeGuard::from_gauge(&g); + assert_eq!(guard.get(), 0.0); + guard.increment(5.0); + guard.increment(-2.0); + guard.increment(0.5); + guard.increment(-1.5); + assert_eq!(guard.get(), 2.0); + } + }); + + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].2, DebugValue::Gauge(0.0.into())); +} + #[test] fn multiple_guards() { let entries = with_recorder(|| { @@ -127,8 +153,10 @@ fn multiple_guards() { subsystem: "test", ); g.set(0.0); - let guard_a = GaugeGuard::increment(&g, 2.0); - let guard_b = GaugeGuard::increment(&g, 5.0); + let mut guard_a = GaugeGuard::from_gauge(&g); + guard_a.increment(2.0); + let mut guard_b = GaugeGuard::from_gauge(&g); + guard_b.increment(5.0); drop(guard_b); drop(guard_a); }); @@ -194,7 +222,8 @@ fn observable_guard_matches_recorder() { ); g.set(0.0); { - let _guard = GaugeGuard::increment(&g, 5.0); + let mut _guard = GaugeGuard::from_gauge(&g); + _guard.increment(5.0); assert_eq!(g.get(), 5.0); } g diff --git a/quickwit/quickwit-metrics/tests/histogram.rs b/quickwit/quickwit-metrics/tests/histogram.rs index 28235064611..747e420422d 100644 --- a/quickwit/quickwit-metrics/tests/histogram.rs +++ b/quickwit/quickwit-metrics/tests/histogram.rs @@ -115,3 +115,51 @@ fn config_stored() { assert_eq!(config.info.metadata.target(), "sub"); assert_eq!((config.buckets_fn)(), vec![1.0, 2.0]); } + +#[test] +fn timer_records_value_on_drop() { + let entries = with_recorder(|| { + let h = histogram!( + name: "h_timer_drop", + description: "timer histogram", + subsystem: "test", + buckets: vec![1.0, 5.0, 10.0] + ); + let _timer = h.start_timer(); + }); + + let (name, labels, value) = &entries[0]; + assert_eq!(name, "quickwit_test_h_timer_drop"); + assert!(labels.is_empty()); + match value { + DebugValue::Histogram(vals) => { + assert_eq!(vals.len(), 1); + assert!(vals[0].into_inner() >= 0.0); + } + other => panic!("expected Histogram, got {other:?}"), + } +} + +#[test] +fn timer_observe_duration_records_once() { + let entries = with_recorder(|| { + let h = histogram!( + name: "h_timer_observe_duration", + description: "timer histogram", + subsystem: "test", + buckets: vec![1.0, 5.0, 10.0] + ); + h.start_timer().observe_duration(); + }); + + let (name, labels, value) = &entries[0]; + assert_eq!(name, "quickwit_test_h_timer_observe_duration"); + assert!(labels.is_empty()); + match value { + DebugValue::Histogram(vals) => { + assert_eq!(vals.len(), 1); + assert!(vals[0].into_inner() >= 0.0); + } + other => panic!("expected Histogram, got {other:?}"), + } +} diff --git a/quickwit/quickwit-opentelemetry/Cargo.toml b/quickwit/quickwit-opentelemetry/Cargo.toml index 6f3d012d01f..535b8659e1a 100644 --- a/quickwit/quickwit-opentelemetry/Cargo.toml +++ b/quickwit/quickwit-opentelemetry/Cargo.toml @@ -24,6 +24,7 @@ tonic = { workspace = true } tracing = { workspace = true } quickwit-common = { workspace = true } +quickwit-metrics = { workspace = true } quickwit-config = { workspace = true } quickwit-ingest = { workspace = true } quickwit-parquet-engine = { workspace = true } diff --git a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs index 22b2eb747e8..1e26f8eb650 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs @@ -16,11 +16,11 @@ use std::collections::HashMap; use async_trait::async_trait; use prost::Message; -use quickwit_common::metrics::{counter, histogram}; use quickwit_common::thread_pool::run_cpu_intensive; use quickwit_common::uri::Uri; use quickwit_config::{ConfigFormat, IndexConfig, load_index_config_from_user_config}; use quickwit_ingest::{CommitType, JsonDocBatchV2Builder}; +use quickwit_metrics::{counter, histogram}; use quickwit_proto::ingest::DocBatchV2; use quickwit_proto::ingest::router::IngestRouterServiceClient; use quickwit_proto::opentelemetry::proto::collector::logs::v1::logs_service_server::LogsService; diff --git a/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs index a25aed52b7a..9cff954dedc 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs @@ -14,7 +14,8 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{Counter, Histogram, counter, exponential_buckets, histogram}; +use quickwit_common::metrics::exponential_buckets; +use quickwit_metrics::{Counter, Histogram, counter, histogram}; pub struct OtlpServiceMetrics { pub requests_total: Counter, diff --git a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs index 9a0945fb8b0..b88d08aff94 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs @@ -15,11 +15,11 @@ use std::collections::HashMap; use async_trait::async_trait; -use quickwit_common::metrics::{counter, histogram}; use quickwit_common::thread_pool::run_cpu_intensive; use quickwit_common::uri::Uri; use quickwit_config::{ConfigFormat, IndexConfig, load_index_config_from_user_config}; use quickwit_ingest::CommitType; +use quickwit_metrics::{counter, histogram}; use quickwit_parquet_engine::schema::REQUIRED_FIELDS; use quickwit_proto::ingest::DocBatchV2; use quickwit_proto::ingest::router::IngestRouterServiceClient; diff --git a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs index bb0925a8465..964e69777d7 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs @@ -18,11 +18,11 @@ use std::str::FromStr; use async_trait::async_trait; use prost::Message; -use quickwit_common::metrics::{counter, histogram}; use quickwit_common::thread_pool::run_cpu_intensive; use quickwit_common::uri::Uri; use quickwit_config::{ConfigFormat, IndexConfig, load_index_config_from_user_config}; use quickwit_ingest::{CommitType, JsonDocBatchV2Builder}; +use quickwit_metrics::{counter, histogram}; use quickwit_proto::ingest::DocBatchV2; use quickwit_proto::ingest::router::IngestRouterServiceClient; use quickwit_proto::opentelemetry::proto::collector::trace::v1::trace_service_server::TraceService; diff --git a/quickwit/quickwit-parquet-engine/Cargo.toml b/quickwit/quickwit-parquet-engine/Cargo.toml index d4893c8287e..b692d479825 100644 --- a/quickwit/quickwit-parquet-engine/Cargo.toml +++ b/quickwit/quickwit-parquet-engine/Cargo.toml @@ -18,6 +18,7 @@ chrono = { workspace = true } parquet = { workspace = true } prost = { workspace = true } quickwit-common = { workspace = true } +quickwit-metrics = { workspace = true } quickwit-dst = { workspace = true } quickwit-proto = { workspace = true } siphasher = { workspace = true } diff --git a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs index 3edc740df2b..e23c894ed60 100644 --- a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs +++ b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs @@ -18,7 +18,7 @@ use std::io::Cursor; use arrow::ipc::reader::StreamReader; use arrow::record_batch::RecordBatch; -use quickwit_common::metrics::counter; +use quickwit_metrics::counter; use tracing::{debug, instrument, warn}; use crate::metrics::PARQUET_ENGINE_METRICS; diff --git a/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs b/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs index 65aaf9f6bb9..ec599001051 100644 --- a/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs +++ b/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs @@ -16,7 +16,7 @@ use arrow::array::AsArray; use arrow::record_batch::RecordBatch; -use quickwit_common::metrics::counter; +use quickwit_metrics::counter; use tracing::{debug, instrument, warn}; use super::processor::IngestError; diff --git a/quickwit/quickwit-parquet-engine/src/metrics.rs b/quickwit/quickwit-parquet-engine/src/metrics.rs index 8d79bbb2ca4..d2ce2f37d45 100644 --- a/quickwit/quickwit-parquet-engine/src/metrics.rs +++ b/quickwit/quickwit-parquet-engine/src/metrics.rs @@ -19,7 +19,7 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{Counter, Histogram, counter, histogram}; +use quickwit_metrics::{Counter, Histogram, counter, histogram}; /// Histogram buckets for duration measurements (in seconds). /// Covers sub-millisecond to multi-second operations. diff --git a/quickwit/quickwit-search/Cargo.toml b/quickwit/quickwit-search/Cargo.toml index dcf2bd5a774..fbb4bee56a8 100644 --- a/quickwit/quickwit-search/Cargo.toml +++ b/quickwit/quickwit-search/Cargo.toml @@ -37,6 +37,7 @@ ulid = { workspace = true } utoipa = { workspace = true } quickwit-common = { workspace = true } +quickwit-metrics = { workspace = true } quickwit-config = { workspace = true } quickwit-directories = { workspace = true } quickwit-doc-mapper = { workspace = true } diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 1d974919628..f7da31bd6c6 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -19,9 +19,8 @@ use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, LazyLock}; use bytesize::ByteSize; -use quickwit_common::metrics::{ - Counter, Gauge, Histogram, counter, exponential_buckets, gauge, histogram, linear_buckets, -}; +use quickwit_common::metrics::{exponential_buckets, linear_buckets}; +use quickwit_metrics::{Counter, Gauge, Histogram, counter, gauge, histogram}; fn print_if_not_null( field_name: &'static str, diff --git a/quickwit/quickwit-search/src/metrics_trackers.rs b/quickwit/quickwit-search/src/metrics_trackers.rs index 6e074a2648a..32f22b78086 100644 --- a/quickwit/quickwit-search/src/metrics_trackers.rs +++ b/quickwit/quickwit-search/src/metrics_trackers.rs @@ -19,7 +19,7 @@ use std::task::{Context, Poll, ready}; use std::time::Instant; use pin_project::{pin_project, pinned_drop}; -use quickwit_common::metrics::{counter, histogram}; +use quickwit_metrics::{counter, histogram}; use quickwit_proto::search::LeafSearchResponse; use crate::SearchError; diff --git a/quickwit/quickwit-search/src/scroll_context.rs b/quickwit/quickwit-search/src/scroll_context.rs index f185c4079c2..786e3e4c7eb 100644 --- a/quickwit/quickwit-search/src/scroll_context.rs +++ b/quickwit/quickwit-search/src/scroll_context.rs @@ -22,9 +22,9 @@ use std::time::Duration; use anyhow::Context; use base64::Engine; use base64::prelude::BASE64_STANDARD; -use quickwit_common::metrics::GaugeGuard; use quickwit_common::shared_consts::SCROLL_BATCH_LEN; use quickwit_metastore::SplitMetadata; +use quickwit_metrics::GaugeGuard; use quickwit_proto::search::{LeafSearchResponse, PartialHit, SearchRequest, SplitSearchError}; use quickwit_proto::types::IndexUid; use serde::{Deserialize, Serialize}; @@ -150,7 +150,7 @@ impl MiniKV { pub async fn put(&self, key: Vec, payload: Vec, ttl: Duration) { let mut metric_guard = GaugeGuard::from_gauge(&crate::SEARCH_METRICS.searcher_local_kv_store_size_bytes); - metric_guard.add(payload.len() as i64); + metric_guard.increment(payload.len() as f64); let mut cache_lock = self.ttl_with_cache.write().await; cache_lock.insert( key, diff --git a/quickwit/quickwit-search/src/search_job_placer.rs b/quickwit/quickwit-search/src/search_job_placer.rs index b2c4e21dcea..d5d6961f7f4 100644 --- a/quickwit/quickwit-search/src/search_job_placer.rs +++ b/quickwit/quickwit-search/src/search_job_placer.rs @@ -21,9 +21,9 @@ use std::net::SocketAddr; use anyhow::bail; use async_trait::async_trait; use quickwit_common::SocketAddrLegacyHash; -use quickwit_common::metrics::counter; use quickwit_common::pubsub::EventSubscriber; use quickwit_common::rendezvous_hasher::{node_affinity, sort_by_rendez_vous_hash}; +use quickwit_metrics::counter; use quickwit_proto::search::{ReportSplit, ReportSplitsRequest}; use tracing::{info, warn}; diff --git a/quickwit/quickwit-search/src/search_permit_provider.rs b/quickwit/quickwit-search/src/search_permit_provider.rs index b05a7e06235..33729cfb7c9 100644 --- a/quickwit/quickwit-search/src/search_permit_provider.rs +++ b/quickwit/quickwit-search/src/search_permit_provider.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use std::task::{Context, Poll}; use bytesize::ByteSize; -use quickwit_common::metrics::GaugeGuard; +use quickwit_metrics::GaugeGuard; use quickwit_proto::search::SplitIdAndFooterOffsets; use tokio::sync::{mpsc, oneshot}; @@ -335,7 +335,7 @@ impl SearchPermitActor { let mut ongoing_gauge_guard = GaugeGuard::from_gauge( &crate::SEARCH_METRICS.leaf_search_single_split_tasks_ongoing, ); - ongoing_gauge_guard.add(1); + ongoing_gauge_guard.increment(1.0); self.total_memory_allocated += permit_request.permit_size; self.num_warmup_slots_available -= 1; permit_request diff --git a/quickwit/quickwit-serve/Cargo.toml b/quickwit/quickwit-serve/Cargo.toml index 79c4307d682..d473ec93d18 100644 --- a/quickwit/quickwit-serve/Cargo.toml +++ b/quickwit/quickwit-serve/Cargo.toml @@ -61,6 +61,7 @@ zstd = { workspace = true } quickwit-actors = { workspace = true } quickwit-cluster = { workspace = true } quickwit-common = { workspace = true } +quickwit-metrics = { workspace = true } quickwit-config = { workspace = true } quickwit-control-plane = { workspace = true } quickwit-datafusion = { workspace = true, optional = true } diff --git a/quickwit/quickwit-serve/src/decompression.rs b/quickwit/quickwit-serve/src/decompression.rs index cf452def5e1..f63ea806922 100644 --- a/quickwit/quickwit-serve/src/decompression.rs +++ b/quickwit/quickwit-serve/src/decompression.rs @@ -17,8 +17,9 @@ use std::sync::LazyLock; use bytes::Bytes; use flate2::read::{MultiGzDecoder, ZlibDecoder}; -use quickwit_common::metrics::{GaugeGuard, MEMORY_METRICS}; +use quickwit_common::metrics::MEMORY_METRICS; use quickwit_common::thread_pool::run_cpu_intensive; +use quickwit_metrics::GaugeGuard; use thiserror::Error; use warp::Filter; use warp::reject::Reject; @@ -115,7 +116,7 @@ pub(crate) struct Body { impl Body { pub fn new(content: Bytes, load_shield_permit: LoadShieldPermit) -> Body { let mut gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.rest_server); - gauge_guard.add(content.len() as i64); + gauge_guard.increment(content.len() as f64); Body { content, _gauge_guard: gauge_guard, diff --git a/quickwit/quickwit-serve/src/load_shield.rs b/quickwit/quickwit-serve/src/load_shield.rs index 0a5e9a9d6fa..03e0154e26d 100644 --- a/quickwit/quickwit-serve/src/load_shield.rs +++ b/quickwit/quickwit-serve/src/load_shield.rs @@ -14,7 +14,7 @@ use std::time::Duration; -use quickwit_common::metrics::{Gauge, GaugeGuard, gauge}; +use quickwit_metrics::{Gauge, GaugeGuard, gauge}; use tokio::sync::{Semaphore, SemaphorePermit}; use crate::rest::TooManyRequests; @@ -81,12 +81,12 @@ impl LoadShield { pub async fn acquire_permit(&'static self) -> Result { let mut pending_gauge_guard = GaugeGuard::from_gauge(&self.pending_gauge); - pending_gauge_guard.add(1); + pending_gauge_guard.increment(1.0); let in_flight_permit_opt = self.acquire_in_flight_permit().await?; let concurrency_permit_opt = self.acquire_concurrency_permit().await; drop(pending_gauge_guard); let mut ongoing_gauge_guard = GaugeGuard::from_gauge(&self.ongoing_gauge); - ongoing_gauge_guard.add(1); + ongoing_gauge_guard.increment(1.0); Ok(LoadShieldPermit { _in_flight_permit_opt: in_flight_permit_opt, _concurrency_permit_opt: concurrency_permit_opt, diff --git a/quickwit/quickwit-serve/src/metrics.rs b/quickwit/quickwit-serve/src/metrics.rs index 06d5a4085f8..9757439289c 100644 --- a/quickwit/quickwit-serve/src/metrics.rs +++ b/quickwit/quickwit-serve/src/metrics.rs @@ -14,9 +14,8 @@ use std::sync::LazyLock; -use quickwit_common::metrics::{ - Counter, Gauge, Histogram, counter, exponential_buckets, gauge, histogram, -}; +use quickwit_common::metrics::exponential_buckets; +use quickwit_metrics::{Counter, Gauge, Histogram, counter, gauge, histogram}; pub struct ServeMetrics { pub http_requests_total: Counter, diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index a13b0bb2457..21c5edde9f7 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -19,9 +19,9 @@ use std::sync::Arc; use hyper_util::rt::{TokioExecutor, TokioIo}; use hyper_util::server::conn::auto::Builder; use hyper_util::service::TowerToHyperService; -use quickwit_common::metrics::{counter, histogram}; use quickwit_common::tower::BoxFutureInfaillible; use quickwit_config::{disable_ingest_v1, enable_ingest_v2}; +use quickwit_metrics::{counter, histogram}; use quickwit_search::SearchService; use tokio::io::{AsyncRead, AsyncWrite}; use tokio::net::{TcpListener, TcpStream}; diff --git a/quickwit/quickwit-storage/Cargo.toml b/quickwit/quickwit-storage/Cargo.toml index 8f735e44db4..8fe4701f631 100644 --- a/quickwit/quickwit-storage/Cargo.toml +++ b/quickwit/quickwit-storage/Cargo.toml @@ -51,6 +51,7 @@ azure_storage_blobs = { workspace = true, optional = true } quickwit-aws = { workspace = true } quickwit-common = { workspace = true } +quickwit-metrics = { workspace = true } quickwit-config = { workspace = true } quickwit-proto = { workspace = true } diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs index da878d354e8..4e3f5986b11 100644 --- a/quickwit/quickwit-storage/src/metrics.rs +++ b/quickwit/quickwit-storage/src/metrics.rs @@ -17,8 +17,8 @@ use std::collections::HashMap; use std::sync::{LazyLock, RwLock}; -use quickwit_common::metrics::{Counter, Gauge, GaugeGuard, Histogram, counter, gauge, histogram}; use quickwit_config::CacheConfig; +use quickwit_metrics::{Counter, Gauge, GaugeGuard, Histogram, counter, gauge, histogram}; /// Counters associated to storage operations. pub struct StorageMetrics { @@ -460,9 +460,9 @@ pub fn object_storage_get_slice_in_flight_guards( let mut bytes_guard = GaugeGuard::from_gauge( &crate::STORAGE_METRICS.object_storage_get_slice_in_flight_num_bytes, ); - bytes_guard.add_f64(get_request_size as f64); + bytes_guard.increment(get_request_size as f64); let mut count_guard = GaugeGuard::from_gauge(&crate::STORAGE_METRICS.object_storage_get_slice_in_flight_count); - count_guard.add(1); + count_guard.increment(1.0); (bytes_guard, count_guard) } diff --git a/quickwit/quickwit-storage/src/object_storage/error.rs b/quickwit/quickwit-storage/src/object_storage/error.rs index 04ca9ec5efe..ca6ef0396b8 100644 --- a/quickwit/quickwit-storage/src/object_storage/error.rs +++ b/quickwit/quickwit-storage/src/object_storage/error.rs @@ -22,7 +22,7 @@ use aws_sdk_s3::operation::get_object::GetObjectError; use aws_sdk_s3::operation::head_object::HeadObjectError; use aws_sdk_s3::operation::put_object::PutObjectError; use aws_sdk_s3::operation::upload_part::UploadPartError; -use quickwit_common::metrics::counter; +use quickwit_metrics::counter; use crate::{StorageError, StorageErrorKind}; From 5644b8da78bda9b2efd5e1e2f901b2536efe1c2c Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Sat, 2 May 2026 12:53:02 +0200 Subject: [PATCH 12/54] refactor: remove aggregate metric holder structs --- quickwit/quickwit-cli/src/jemalloc.rs | 9 +- quickwit/quickwit-cli/src/lib.rs | 6 +- quickwit/quickwit-cli/src/metrics.rs | 17 +- quickwit/quickwit-cluster/src/grpc_gossip.rs | 3 +- quickwit/quickwit-cluster/src/lib.rs | 16 +- quickwit/quickwit-cluster/src/metrics.rs | 80 ++----- quickwit/quickwit-common/src/metrics.rs | 152 +++++--------- .../src/control_plane.rs | 12 +- .../src/indexing_scheduler/mod.rs | 10 +- .../src/ingest/ingest_controller.rs | 12 +- .../quickwit-control-plane/src/metrics.rs | 83 ++------ .../quickwit-control-plane/src/model/mod.rs | 4 +- .../src/model/shard_table.rs | 8 +- .../src/caching_directory.rs | 5 +- .../src/actors/doc_processor.rs | 4 +- .../quickwit-indexing/src/actors/indexer.rs | 6 +- .../src/actors/indexing_pipeline.rs | 12 +- .../src/actors/merge_pipeline.rs | 10 +- .../src/actors/merge_scheduler_service.rs | 20 +- .../metrics_pipeline/parquet_uploader.rs | 3 +- .../src/actors/metrics_pipeline/pipeline.rs | 2 +- .../processed_parquet_batch.rs | 4 +- .../quickwit-indexing/src/actors/uploader.rs | 7 +- quickwit/quickwit-indexing/src/metrics.rs | 58 ++---- .../src/models/processed_doc.rs | 4 +- .../src/models/raw_doc_batch.rs | 6 +- .../src/source/ingest/mod.rs | 9 +- .../src/source/kafka_source.rs | 4 +- quickwit/quickwit-indexing/src/source/mod.rs | 15 +- .../quickwit-ingest/src/ingest_api_service.rs | 5 +- .../src/ingest_v2/broadcast/local_shards.rs | 13 +- .../quickwit-ingest/src/ingest_v2/fetch.rs | 11 +- .../quickwit-ingest/src/ingest_v2/ingester.rs | 23 +- .../quickwit-ingest/src/ingest_v2/metrics.rs | 184 ++++++---------- .../src/ingest_v2/replication.rs | 13 +- .../quickwit-ingest/src/ingest_v2/router.rs | 75 +++---- quickwit/quickwit-ingest/src/lib.rs | 9 +- quickwit/quickwit-ingest/src/metrics.rs | 35 +--- quickwit/quickwit-jaeger/src/lib.rs | 14 +- quickwit/quickwit-jaeger/src/metrics.rs | 37 +--- quickwit/quickwit-jaeger/src/v1.rs | 7 +- quickwit/quickwit-jaeger/src/v2.rs | 11 +- .../src/actors/delete_task_planner.rs | 4 +- .../src/actors/garbage_collector.rs | 20 +- quickwit/quickwit-janitor/src/metrics.rs | 34 +-- .../quickwit-lambda-client/src/invoker.rs | 13 +- quickwit/quickwit-lambda-client/src/lib.rs | 1 - .../quickwit-lambda-client/src/metrics.rs | 59 ++---- .../src/metastore/postgres/metrics.rs | 26 +-- .../src/metastore/postgres/pool.rs | 12 +- .../quickwit-opentelemetry/src/otlp/logs.rs | 11 +- .../src/otlp/metrics.rs | 42 +--- .../src/otlp/otel_metrics.rs | 11 +- .../quickwit-opentelemetry/src/otlp/traces.rs | 11 +- .../src/index/accumulator.rs | 11 +- .../src/ingest/processor.rs | 7 +- .../src/ingest/sketch_processor.rs | 9 +- .../quickwit-parquet-engine/src/metrics.rs | 66 ++---- quickwit/quickwit-search/src/leaf.rs | 12 +- quickwit/quickwit-search/src/leaf_cache.rs | 7 +- quickwit/quickwit-search/src/lib.rs | 1 - .../quickwit-search/src/list_fields_cache.rs | 2 +- quickwit/quickwit-search/src/list_terms.rs | 11 +- quickwit/quickwit-search/src/metrics.rs | 134 +++++------- .../quickwit-search/src/metrics_trackers.rs | 13 +- .../quickwit-search/src/scroll_context.rs | 2 +- .../quickwit-search/src/search_job_placer.rs | 4 +- .../src/search_permit_provider.rs | 8 +- quickwit/quickwit-search/src/service.rs | 2 +- quickwit/quickwit-serve/src/decompression.rs | 4 +- quickwit/quickwit-serve/src/lib.rs | 3 +- quickwit/quickwit-serve/src/load_shield.rs | 4 +- quickwit/quickwit-serve/src/metrics.rs | 33 +-- quickwit/quickwit-serve/src/rest.rs | 4 +- .../src/cache/quickwit_cache.rs | 3 +- .../src/file_descriptor_cache.rs | 5 +- quickwit/quickwit-storage/src/lib.rs | 13 +- quickwit/quickwit-storage/src/metrics.rs | 197 ++++++++---------- .../src/object_storage/azure_blob_storage.rs | 26 +-- .../src/object_storage/error.rs | 2 +- .../object_storage/s3_compatible_storage.rs | 44 ++-- .../src/opendal_storage/base.rs | 30 +-- .../quickwit-storage/src/split_cache/mod.rs | 2 +- .../src/split_cache/split_table.rs | 18 +- .../src/timeout_and_retry_storage.rs | 15 +- 85 files changed, 652 insertions(+), 1302 deletions(-) diff --git a/quickwit/quickwit-cli/src/jemalloc.rs b/quickwit/quickwit-cli/src/jemalloc.rs index ab7f5772e61..66ca7c8f4df 100644 --- a/quickwit/quickwit-cli/src/jemalloc.rs +++ b/quickwit/quickwit-cli/src/jemalloc.rs @@ -14,7 +14,6 @@ use std::time::Duration; -use quickwit_common::metrics::MEMORY_METRICS; use tikv_jemallocator::Jemalloc; use tracing::error; @@ -30,8 +29,6 @@ pub static GLOBAL: Jemalloc = Jemalloc; const JEMALLOC_METRICS_POLLING_INTERVAL: Duration = Duration::from_secs(1); pub async fn jemalloc_metrics_loop() -> tikv_jemalloc_ctl::Result<()> { - let memory_metrics = MEMORY_METRICS.clone(); - // Obtain a MIB for the `epoch`, `stats.active`, `stats.allocated`, and `stats.resident` keys: let epoch_mib = tikv_jemalloc_ctl::epoch::mib()?; let active_mib = tikv_jemalloc_ctl::stats::active::mib()?; @@ -48,13 +45,13 @@ pub async fn jemalloc_metrics_loop() -> tikv_jemalloc_ctl::Result<()> { // Read statistics using MIB keys: let active = active_mib.read()?; - memory_metrics.active_bytes.set(active as f64); + quickwit_common::metrics::MEMORY_ACTIVE_BYTES.set(active as f64); let allocated = allocated_mib.read()?; - memory_metrics.allocated_bytes.set(allocated as f64); + quickwit_common::metrics::MEMORY_ALLOCATED_BYTES.set(allocated as f64); let resident = resident_mib.read()?; - memory_metrics.resident_bytes.set(resident as f64); + quickwit_common::metrics::MEMORY_RESIDENT_BYTES.set(resident as f64); } } diff --git a/quickwit/quickwit-cli/src/lib.rs b/quickwit/quickwit-cli/src/lib.rs index 174b385145b..afdde568ac3 100644 --- a/quickwit/quickwit-cli/src/lib.rs +++ b/quickwit/quickwit-cli/src/lib.rs @@ -354,8 +354,6 @@ pub mod busy_detector { use tracing::debug; - use crate::metrics::CLI_METRICS; - // we need that time reference to use an atomic and not a mutex for LAST_UNPARK static TIME_REF: LazyLock = LazyLock::new(Instant::now); static ENABLED: AtomicBool = AtomicBool::new(false); @@ -393,9 +391,7 @@ pub mod busy_detector { .unwrap_or_default(); let now = now.as_micros() as u64; let delta = now - time.load(Ordering::Relaxed); - CLI_METRICS - .thread_unpark_duration_microseconds - .record(delta as f64); + crate::metrics::THREAD_UNPARK_DURATION_MICROSECONDS.record(delta as f64); if delta > ALLOWED_DELAY_MICROS { emit_debug(delta, now); } diff --git a/quickwit/quickwit-cli/src/metrics.rs b/quickwit/quickwit-cli/src/metrics.rs index 572ffbe4f92..ae5d9647d9d 100644 --- a/quickwit/quickwit-cli/src/metrics.rs +++ b/quickwit/quickwit-cli/src/metrics.rs @@ -17,11 +17,7 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; use quickwit_metrics::{Histogram, histogram}; -pub struct CliMetrics { - pub thread_unpark_duration_microseconds: Histogram, -} - -static THREAD_UNPARK_DURATION_MICROSECONDS: LazyLock = LazyLock::new(|| { +pub(crate) static THREAD_UNPARK_DURATION_MICROSECONDS: LazyLock = LazyLock::new(|| { histogram!( name: "thread_unpark_duration_microseconds", description: "Duration for which a thread of the main tokio runtime is unparked.", @@ -29,14 +25,3 @@ static THREAD_UNPARK_DURATION_MICROSECONDS: LazyLock = LazyLock::new( buckets: exponential_buckets(5.0, 5.0, 5).unwrap(), ) }); - -impl Default for CliMetrics { - fn default() -> Self { - CliMetrics { - thread_unpark_duration_microseconds: THREAD_UNPARK_DURATION_MICROSECONDS.clone(), - } - } -} - -/// Serve counters exposes a bunch a set of metrics about the request received to quickwit. -pub static CLI_METRICS: LazyLock = LazyLock::new(CliMetrics::default); diff --git a/quickwit/quickwit-cluster/src/grpc_gossip.rs b/quickwit/quickwit-cluster/src/grpc_gossip.rs index 1cc6260e78e..ddd6f14c7bf 100644 --- a/quickwit/quickwit-cluster/src/grpc_gossip.rs +++ b/quickwit/quickwit-cluster/src/grpc_gossip.rs @@ -31,7 +31,6 @@ use tracing::{info, warn}; use crate::grpc_service::cluster_grpc_client; use crate::member::NodeStateExt; -use crate::metrics::CLUSTER_METRICS; const MAX_GOSSIP_PEERS: usize = 3; @@ -108,7 +107,7 @@ async fn perform_grpc_gossip_rounds( warn!("failed to fetch cluster state from node `{node_id}`"); continue; }; - CLUSTER_METRICS.grpc_gossip_rounds_total.increment(1); + crate::metrics::GRPC_GOSSIP_ROUNDS_TOTAL.increment(1); let mut chitchat_guard = chitchat.lock().await; diff --git a/quickwit/quickwit-cluster/src/lib.rs b/quickwit/quickwit-cluster/src/lib.rs index f52585dddc4..0387b4e5123 100644 --- a/quickwit/quickwit-cluster/src/lib.rs +++ b/quickwit/quickwit-cluster/src/lib.rs @@ -105,18 +105,10 @@ impl Transport for CountingUdpTransport { let socket = UdpSocket::open(listen_addr).await?; Ok(Box::new(CountingUdpSocket { socket, - gossip_recv: crate::metrics::CLUSTER_METRICS - .gossip_recv_messages_total - .clone(), - gossip_recv_bytes: crate::metrics::CLUSTER_METRICS - .gossip_recv_bytes_total - .clone(), - gossip_send: crate::metrics::CLUSTER_METRICS - .gossip_sent_messages_total - .clone(), - gossip_send_bytes: crate::metrics::CLUSTER_METRICS - .gossip_sent_bytes_total - .clone(), + gossip_recv: crate::metrics::GOSSIP_RECV_MESSAGES_TOTAL.clone(), + gossip_recv_bytes: crate::metrics::GOSSIP_RECV_BYTES_TOTAL.clone(), + gossip_send: crate::metrics::GOSSIP_SENT_MESSAGES_TOTAL.clone(), + gossip_send_bytes: crate::metrics::GOSSIP_SENT_BYTES_TOTAL.clone(), })) } } diff --git a/quickwit/quickwit-cluster/src/metrics.rs b/quickwit/quickwit-cluster/src/metrics.rs index 2a21c4da0fd..ab6fffe66af 100644 --- a/quickwit/quickwit-cluster/src/metrics.rs +++ b/quickwit/quickwit-cluster/src/metrics.rs @@ -23,22 +23,7 @@ use tokio::sync::Mutex; use crate::member::NodeStateExt; -pub struct ClusterMetrics { - pub live_nodes: Gauge, - pub ready_nodes: Gauge, - pub zombie_nodes: Gauge, - pub dead_nodes: Gauge, - pub cluster_state_size_bytes: Gauge, - pub node_state_size_bytes: Gauge, - pub node_state_keys: Gauge, - pub gossip_recv_messages_total: Counter, - pub gossip_recv_bytes_total: Counter, - pub gossip_sent_messages_total: Counter, - pub gossip_sent_bytes_total: Counter, - pub grpc_gossip_rounds_total: Counter, -} - -static LIVE_NODES: LazyLock = LazyLock::new(|| { +pub(crate) static LIVE_NODES: LazyLock = LazyLock::new(|| { gauge!( name: "live_nodes", description: "The number of live nodes observed locally.", @@ -46,7 +31,7 @@ static LIVE_NODES: LazyLock = LazyLock::new(|| { ) }); -static READY_NODES: LazyLock = LazyLock::new(|| { +pub(crate) static READY_NODES: LazyLock = LazyLock::new(|| { gauge!( name: "ready_nodes", description: "The number of ready nodes observed locally.", @@ -54,7 +39,7 @@ static READY_NODES: LazyLock = LazyLock::new(|| { ) }); -static ZOMBIE_NODES: LazyLock = LazyLock::new(|| { +pub(crate) static ZOMBIE_NODES: LazyLock = LazyLock::new(|| { gauge!( name: "zombie_nodes", description: "The number of zombie nodes observed locally.", @@ -62,7 +47,7 @@ static ZOMBIE_NODES: LazyLock = LazyLock::new(|| { ) }); -static DEAD_NODES: LazyLock = LazyLock::new(|| { +pub(crate) static DEAD_NODES: LazyLock = LazyLock::new(|| { gauge!( name: "dead_nodes", description: "The number of dead nodes observed locally.", @@ -70,7 +55,7 @@ static DEAD_NODES: LazyLock = LazyLock::new(|| { ) }); -static CLUSTER_STATE_SIZE_BYTES: LazyLock = LazyLock::new(|| { +pub(crate) static CLUSTER_STATE_SIZE_BYTES: LazyLock = LazyLock::new(|| { gauge!( name: "cluster_state_size_bytes", description: "The size of the cluster state in bytes.", @@ -78,7 +63,7 @@ static CLUSTER_STATE_SIZE_BYTES: LazyLock = LazyLock::new(|| { ) }); -static NODE_STATE_KEYS: LazyLock = LazyLock::new(|| { +pub(crate) static NODE_STATE_KEYS: LazyLock = LazyLock::new(|| { gauge!( name: "node_state_keys", description: "The number of keys in the node state.", @@ -86,7 +71,7 @@ static NODE_STATE_KEYS: LazyLock = LazyLock::new(|| { ) }); -static NODE_STATE_SIZE_BYTES: LazyLock = LazyLock::new(|| { +pub(crate) static NODE_STATE_SIZE_BYTES: LazyLock = LazyLock::new(|| { gauge!( name: "node_state_size_bytes", description: "The size of the node state in bytes.", @@ -94,7 +79,7 @@ static NODE_STATE_SIZE_BYTES: LazyLock = LazyLock::new(|| { ) }); -static GOSSIP_RECV_MESSAGES_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static GOSSIP_RECV_MESSAGES_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "gossip_recv_messages_total", description: "Total number of gossip messages received.", @@ -102,7 +87,7 @@ static GOSSIP_RECV_MESSAGES_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static GOSSIP_RECV_BYTES_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static GOSSIP_RECV_BYTES_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "gossip_recv_bytes_total", description: "Total amount of gossip data received in bytes.", @@ -110,7 +95,7 @@ static GOSSIP_RECV_BYTES_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static GOSSIP_SENT_MESSAGES_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static GOSSIP_SENT_MESSAGES_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "gossip_sent_messages_total", description: "Total number of gossip messages sent.", @@ -118,7 +103,7 @@ static GOSSIP_SENT_MESSAGES_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static GOSSIP_SENT_BYTES_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static GOSSIP_SENT_BYTES_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "gossip_sent_bytes_total", description: "Total amount of gossip data sent in bytes.", @@ -126,7 +111,7 @@ static GOSSIP_SENT_BYTES_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static GRPC_GOSSIP_ROUNDS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static GRPC_GOSSIP_ROUNDS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "grpc_gossip_rounds_total", description: "Total number of gRPC gossip rounds performed with peer nodes.", @@ -134,27 +119,6 @@ static GRPC_GOSSIP_ROUNDS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -impl Default for ClusterMetrics { - fn default() -> Self { - ClusterMetrics { - live_nodes: LIVE_NODES.clone(), - ready_nodes: READY_NODES.clone(), - zombie_nodes: ZOMBIE_NODES.clone(), - dead_nodes: DEAD_NODES.clone(), - cluster_state_size_bytes: CLUSTER_STATE_SIZE_BYTES.clone(), - node_state_keys: NODE_STATE_KEYS.clone(), - node_state_size_bytes: NODE_STATE_SIZE_BYTES.clone(), - gossip_recv_messages_total: GOSSIP_RECV_MESSAGES_TOTAL.clone(), - gossip_recv_bytes_total: GOSSIP_RECV_BYTES_TOTAL.clone(), - gossip_sent_messages_total: GOSSIP_SENT_MESSAGES_TOTAL.clone(), - gossip_sent_bytes_total: GOSSIP_SENT_BYTES_TOTAL.clone(), - grpc_gossip_rounds_total: GRPC_GOSSIP_ROUNDS_TOTAL.clone(), - } - } -} - -pub static CLUSTER_METRICS: LazyLock = LazyLock::new(ClusterMetrics::default); - pub(crate) fn spawn_metrics_task( weak_chitchat: Weak>, self_chitchat_id: ChitchatId, @@ -191,24 +155,18 @@ pub(crate) fn spawn_metrics_task( cluster_state_size_bytes += chitchat_id_size_bytes + node_state_size_bytes; if *chitchat_id == self_chitchat_id { - CLUSTER_METRICS - .node_state_keys - .set(node_state.num_key_values() as f64); - CLUSTER_METRICS - .node_state_size_bytes - .set(node_state_size_bytes as f64); + NODE_STATE_KEYS.set(node_state.num_key_values() as f64); + NODE_STATE_SIZE_BYTES.set(node_state_size_bytes as f64); } } drop(chitchat_guard); - CLUSTER_METRICS.live_nodes.set(num_live_nodes as f64); - CLUSTER_METRICS.ready_nodes.set(num_ready_nodes as f64); - CLUSTER_METRICS.zombie_nodes.set(num_zombie_nodes as f64); - CLUSTER_METRICS.dead_nodes.set(num_dead_nodes as f64); + LIVE_NODES.set(num_live_nodes as f64); + READY_NODES.set(num_ready_nodes as f64); + ZOMBIE_NODES.set(num_zombie_nodes as f64); + DEAD_NODES.set(num_dead_nodes as f64); - CLUSTER_METRICS - .cluster_state_size_bytes - .set(cluster_state_size_bytes as f64); + CLUSTER_STATE_SIZE_BYTES.set(cluster_state_size_bytes as f64); } }; tokio::spawn(future); diff --git a/quickwit/quickwit-common/src/metrics.rs b/quickwit/quickwit-common/src/metrics.rs index b32d21bf330..eb90d64a5d2 100644 --- a/quickwit/quickwit-common/src/metrics.rs +++ b/quickwit/quickwit-common/src/metrics.rs @@ -74,100 +74,6 @@ pub fn register_info(name: &'static str, help: &'static str, kvs: BTreeMap<&'sta }); } -#[derive(Clone)] -pub struct MemoryMetrics { - pub active_bytes: Gauge, - pub allocated_bytes: Gauge, - pub resident_bytes: Gauge, - pub in_flight: InFlightDataGauges, -} - -impl Default for MemoryMetrics { - fn default() -> Self { - Self { - active_bytes: MEMORY_ACTIVE_BYTES.clone(), - allocated_bytes: MEMORY_ALLOCATED_BYTES.clone(), - resident_bytes: MEMORY_RESIDENT_BYTES.clone(), - in_flight: InFlightDataGauges::default(), - } - } -} - -#[derive(Clone)] -pub struct InFlightDataGauges { - pub rest_server: Gauge, - pub ingest_router: Gauge, - pub ingester_persist: Gauge, - pub ingester_replicate: Gauge, - pub wal: Gauge, - pub fetch_stream: Gauge, - pub multi_fetch_stream: Gauge, - pub doc_processor_mailbox: Gauge, - pub indexer_mailbox: Gauge, - pub index_writer: Gauge, -} - -impl Default for InFlightDataGauges { - fn default() -> Self { - Self { - rest_server: in_flight_data_gauge("rest_server"), - ingest_router: in_flight_data_gauge("ingest_router"), - ingester_persist: in_flight_data_gauge("ingester_persist"), - ingester_replicate: in_flight_data_gauge("ingester_replicate"), - wal: in_flight_data_gauge("wal"), - fetch_stream: in_flight_data_gauge("fetch_stream"), - multi_fetch_stream: in_flight_data_gauge("multi_fetch_stream"), - doc_processor_mailbox: in_flight_data_gauge("doc_processor_mailbox"), - indexer_mailbox: in_flight_data_gauge("indexer_mailbox"), - index_writer: in_flight_data_gauge("index_writer"), - } - } -} - -impl InFlightDataGauges { - #[inline] - pub fn file(&self) -> &'static Gauge { - static GAUGE: OnceLock = OnceLock::new(); - GAUGE.get_or_init(|| in_flight_data_gauge("file_source")) - } - - #[inline] - pub fn ingest(&self) -> &'static Gauge { - static GAUGE: OnceLock = OnceLock::new(); - GAUGE.get_or_init(|| in_flight_data_gauge("ingest_source")) - } - - #[inline] - pub fn kafka(&self) -> &'static Gauge { - static GAUGE: OnceLock = OnceLock::new(); - GAUGE.get_or_init(|| in_flight_data_gauge("kafka_source")) - } - - #[inline] - pub fn kinesis(&self) -> &'static Gauge { - static GAUGE: OnceLock = OnceLock::new(); - GAUGE.get_or_init(|| in_flight_data_gauge("kinesis_source")) - } - - #[inline] - pub fn pubsub(&self) -> &'static Gauge { - static GAUGE: OnceLock = OnceLock::new(); - GAUGE.get_or_init(|| in_flight_data_gauge("pubsub_source")) - } - - #[inline] - pub fn pulsar(&self) -> &'static Gauge { - static GAUGE: OnceLock = OnceLock::new(); - GAUGE.get_or_init(|| in_flight_data_gauge("pulsar_source")) - } - - #[inline] - pub fn other(&self) -> &'static Gauge { - static GAUGE: OnceLock = OnceLock::new(); - GAUGE.get_or_init(|| in_flight_data_gauge("pulsar_source")) - } -} - pub fn index_label(index_id: &str) -> &str { static PER_INDEX_METRICS_ENABLED: LazyLock = LazyLock::new(|| !crate::get_bool_from_env("QW_DISABLE_PER_INDEX_METRICS", false)); @@ -179,9 +85,7 @@ pub fn index_label(index_id: &str) -> &str { } } -pub static MEMORY_METRICS: LazyLock = LazyLock::new(MemoryMetrics::default); - -static MEMORY_ACTIVE_BYTES: LazyLock = LazyLock::new(|| { +pub static MEMORY_ACTIVE_BYTES: LazyLock = LazyLock::new(|| { gauge!( name: "active_bytes", description: "Total number of bytes in active pages allocated by the application, as reported by jemalloc `stats.active`.", @@ -189,7 +93,7 @@ static MEMORY_ACTIVE_BYTES: LazyLock = LazyLock::new(|| { ) }); -static MEMORY_ALLOCATED_BYTES: LazyLock = LazyLock::new(|| { +pub static MEMORY_ALLOCATED_BYTES: LazyLock = LazyLock::new(|| { gauge!( name: "allocated_bytes", description: "Total number of bytes allocated by the application, as reported by jemalloc `stats.allocated`.", @@ -197,7 +101,7 @@ static MEMORY_ALLOCATED_BYTES: LazyLock = LazyLock::new(|| { ) }); -static MEMORY_RESIDENT_BYTES: LazyLock = LazyLock::new(|| { +pub static MEMORY_RESIDENT_BYTES: LazyLock = LazyLock::new(|| { gauge!( name: "resident_bytes", description: " Total number of bytes in physically resident data pages mapped by the allocator, as reported by jemalloc `stats.resident`.", @@ -213,6 +117,56 @@ static IN_FLIGHT_DATA_BYTES: LazyLock = LazyLock::new(|| { ) }); +pub static IN_FLIGHT_REST_SERVER: LazyLock = + LazyLock::new(|| in_flight_data_gauge("rest_server")); + +pub static IN_FLIGHT_INGEST_ROUTER: LazyLock = + LazyLock::new(|| in_flight_data_gauge("ingest_router")); + +pub static IN_FLIGHT_INGESTER_PERSIST: LazyLock = + LazyLock::new(|| in_flight_data_gauge("ingester_persist")); + +pub static IN_FLIGHT_INGESTER_REPLICATE: LazyLock = + LazyLock::new(|| in_flight_data_gauge("ingester_replicate")); + +pub static IN_FLIGHT_WAL: LazyLock = LazyLock::new(|| in_flight_data_gauge("wal")); + +pub static IN_FLIGHT_FETCH_STREAM: LazyLock = + LazyLock::new(|| in_flight_data_gauge("fetch_stream")); + +pub static IN_FLIGHT_MULTI_FETCH_STREAM: LazyLock = + LazyLock::new(|| in_flight_data_gauge("multi_fetch_stream")); + +pub static IN_FLIGHT_DOC_PROCESSOR_MAILBOX: LazyLock = + LazyLock::new(|| in_flight_data_gauge("doc_processor_mailbox")); + +pub static IN_FLIGHT_INDEXER_MAILBOX: LazyLock = + LazyLock::new(|| in_flight_data_gauge("indexer_mailbox")); + +pub static IN_FLIGHT_INDEX_WRITER: LazyLock = + LazyLock::new(|| in_flight_data_gauge("index_writer")); + +pub static IN_FLIGHT_FILE_SOURCE: LazyLock = + LazyLock::new(|| in_flight_data_gauge("file_source")); + +pub static IN_FLIGHT_INGEST_SOURCE: LazyLock = + LazyLock::new(|| in_flight_data_gauge("ingest_source")); + +pub static IN_FLIGHT_KAFKA_SOURCE: LazyLock = + LazyLock::new(|| in_flight_data_gauge("kafka_source")); + +pub static IN_FLIGHT_KINESIS_SOURCE: LazyLock = + LazyLock::new(|| in_flight_data_gauge("kinesis_source")); + +pub static IN_FLIGHT_PUBSUB_SOURCE: LazyLock = + LazyLock::new(|| in_flight_data_gauge("pubsub_source")); + +pub static IN_FLIGHT_PULSAR_SOURCE: LazyLock = + LazyLock::new(|| in_flight_data_gauge("pulsar_source")); + +pub static IN_FLIGHT_OTHER_SOURCE: LazyLock = + LazyLock::new(|| in_flight_data_gauge("pulsar_source")); + fn in_flight_data_gauge(component: &'static str) -> Gauge { gauge!(parent: &*IN_FLIGHT_DATA_BYTES, "component" => component) } diff --git a/quickwit/quickwit-control-plane/src/control_plane.rs b/quickwit/quickwit-control-plane/src/control_plane.rs index 4df822181a1..4453c4596da 100644 --- a/quickwit/quickwit-control-plane/src/control_plane.rs +++ b/quickwit/quickwit-control-plane/src/control_plane.rs @@ -219,9 +219,7 @@ impl Actor for ControlPlane { } async fn initialize(&mut self, ctx: &ActorContext) -> Result<(), ActorExitStatus> { - crate::metrics::CONTROL_PLANE_METRICS - .restart_total - .increment(1); + crate::metrics::RESTART_TOTAL.increment(1); self.model .load_from_metastore(&mut self.metastore, ctx.progress()) @@ -570,17 +568,13 @@ fn convert_metastore_error( // It will be up to the client to decide what to do there. error!(err=?metastore_error, transaction_outcome="aborted", "metastore error"); } - crate::metrics::CONTROL_PLANE_METRICS - .metastore_error_aborted - .increment(1); + crate::metrics::METASTORE_ERROR_ABORTED.increment(1); Ok(Err(ControlPlaneError::Metastore(metastore_error))) } else { // If the metastore transaction may have been executed, we need to restart the control plane // so that it gets resynced with the metastore state. error!(error=?metastore_error, transaction_outcome="maybe-executed", "metastore error"); - crate::metrics::CONTROL_PLANE_METRICS - .metastore_error_maybe_executed - .increment(1); + crate::metrics::METASTORE_ERROR_MAYBE_EXECUTED.increment(1); Err(ActorExitStatus::from(anyhow::anyhow!(metastore_error))) } } diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs index e260e5444a3..f8621f0fff8 100644 --- a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs +++ b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs @@ -295,9 +295,7 @@ impl IndexingScheduler { // Prefer not calling this method directly, and instead call // `ControlPlane::rebuild_indexing_plan_debounced`. pub(crate) fn rebuild_plan(&mut self, model: &ControlPlaneModel) { - crate::metrics::CONTROL_PLANE_METRICS - .schedule_total - .increment(1); + crate::metrics::SCHEDULE_TOTAL.increment(1); let notify_on_drop = self.next_rebuild_tracker.start_rebuild(); @@ -332,7 +330,7 @@ impl IndexingScheduler { ); let shard_locality_metrics = get_shard_locality_metrics(&new_physical_plan, &shard_locations); - crate::metrics::CONTROL_PLANE_METRICS.set_shard_locality_metrics(shard_locality_metrics); + crate::metrics::set_shard_locality_metrics(shard_locality_metrics); if let Some(last_applied_plan) = &self.state.last_applied_physical_plan { let plans_diff = get_indexing_plans_diff( last_applied_plan.indexing_tasks_per_indexer(), @@ -399,9 +397,7 @@ impl IndexingScheduler { notify_on_drop: Option>, ) { debug!(new_physical_plan=?new_physical_plan, "apply physical indexing plan"); - crate::metrics::CONTROL_PLANE_METRICS - .apply_plan_total - .increment(1); + crate::metrics::APPLY_PLAN_TOTAL.increment(1); for (node_id, indexing_tasks) in new_physical_plan.indexing_tasks_per_indexer() { // We don't want to block on a slow indexer so we apply this change asynchronously // TODO not blocking is cool, but we need to make sure there is not accumulation diff --git a/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs b/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs index 47f16bad3aa..5eafeb86296 100644 --- a/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs +++ b/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs @@ -1024,9 +1024,7 @@ impl IngestController { let shards_to_rebalance: Vec = self.compute_shards_to_rebalance(model); - crate::metrics::CONTROL_PLANE_METRICS - .rebalance_shards - .set(shards_to_rebalance.len() as f64); + crate::metrics::REBALANCE_SHARDS.set(shards_to_rebalance.len() as f64); if shards_to_rebalance.is_empty() { debug!("skipping rebalance: no shards to rebalance"); @@ -1049,16 +1047,12 @@ impl IngestController { .await .inspect_err(|error| { error!(%error, "failed to open shards during rebalance"); - crate::metrics::CONTROL_PLANE_METRICS - .rebalance_shards - .set(0.0); + crate::metrics::REBALANCE_SHARDS.set(0.0); })?; let num_opened_shards: usize = per_source_num_opened_shards.values().sum(); - crate::metrics::CONTROL_PLANE_METRICS - .rebalance_shards - .set(num_opened_shards as f64); + crate::metrics::REBALANCE_SHARDS.set(num_opened_shards as f64); for source_uid in per_source_num_opened_shards.keys() { // We temporarily disable the ability the scale down the number of shards for diff --git a/quickwit/quickwit-control-plane/src/metrics.rs b/quickwit/quickwit-control-plane/src/metrics.rs index 0c6b9aa50e3..c22d12f83b1 100644 --- a/quickwit/quickwit-control-plane/src/metrics.rs +++ b/quickwit/quickwit-control-plane/src/metrics.rs @@ -22,37 +22,12 @@ pub struct ShardLocalityMetrics { pub num_local_shards: usize, } -pub struct ControlPlaneMetrics { - // Indexes and shards tracked by the control plane. - pub indexes_total: Gauge, - pub open_shards: Gauge, - pub closed_shards: Gauge, - - // Operations performed by the control plane. - pub apply_plan_total: Counter, - pub rebalance_shards: Gauge, - pub restart_total: Counter, - pub schedule_total: Counter, - - // Metastore errors. - pub metastore_error_aborted: Counter, - pub metastore_error_maybe_executed: Counter, - - // Indexing plan metrics. - pub local_shards: Gauge, - pub remote_shards: Gauge, +pub fn set_shard_locality_metrics(shard_locality_metrics: ShardLocalityMetrics) { + LOCAL_SHARDS.set(shard_locality_metrics.num_local_shards as f64); + REMOTE_SHARDS.set(shard_locality_metrics.num_remote_shards as f64); } -impl ControlPlaneMetrics { - pub fn set_shard_locality_metrics(&self, shard_locality_metrics: ShardLocalityMetrics) { - self.local_shards - .set(shard_locality_metrics.num_local_shards as f64); - self.remote_shards - .set(shard_locality_metrics.num_remote_shards as f64); - } -} - -static INDEXES_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static INDEXES_TOTAL: LazyLock = LazyLock::new(|| { gauge!( name: "indexes_total", description: "Number of indexes tracked by the control plane.", @@ -68,6 +43,12 @@ static SHARDS: LazyLock = LazyLock::new(|| { ) }); +pub(crate) static OPEN_SHARDS: LazyLock = + LazyLock::new(|| gauge!(parent: &*SHARDS, "state" => "open")); + +pub(crate) static CLOSED_SHARDS: LazyLock = + LazyLock::new(|| gauge!(parent: &*SHARDS, "state" => "closed")); + static INDEXED_SHARDS: LazyLock = LazyLock::new(|| { gauge!( name: "indexed_shards", @@ -76,7 +57,13 @@ static INDEXED_SHARDS: LazyLock = LazyLock::new(|| { ) }); -static APPLY_PLAN_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static LOCAL_SHARDS: LazyLock = + LazyLock::new(|| gauge!(parent: &*INDEXED_SHARDS, "locality" => "local")); + +pub(crate) static REMOTE_SHARDS: LazyLock = + LazyLock::new(|| gauge!(parent: &*INDEXED_SHARDS, "locality" => "remote")); + +pub(crate) static APPLY_PLAN_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "apply_plan_total", description: "Number of control plane `apply plan` operations.", @@ -84,7 +71,7 @@ static APPLY_PLAN_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static REBALANCE_SHARDS: LazyLock = LazyLock::new(|| { +pub(crate) static REBALANCE_SHARDS: LazyLock = LazyLock::new(|| { gauge!( name: "rebalance_shards", description: "Number of shards rebalanced by the control plane.", @@ -92,7 +79,7 @@ static REBALANCE_SHARDS: LazyLock = LazyLock::new(|| { ) }); -static RESTART_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static RESTART_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "restart_total", description: "Number of control plane restarts.", @@ -100,7 +87,7 @@ static RESTART_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static SCHEDULE_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static SCHEDULE_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "schedule_total", description: "Number of control plane `schedule` operations.", @@ -108,7 +95,7 @@ static SCHEDULE_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static METASTORE_ERROR_ABORTED: LazyLock = LazyLock::new(|| { +pub(crate) static METASTORE_ERROR_ABORTED: LazyLock = LazyLock::new(|| { counter!( name: "metastore_error_aborted", description: "Number of aborted metastore transaction (= do not trigger a control plane restart)", @@ -116,36 +103,10 @@ static METASTORE_ERROR_ABORTED: LazyLock = LazyLock::new(|| { ) }); -static METASTORE_ERROR_MAYBE_EXECUTED: LazyLock = LazyLock::new(|| { +pub(crate) static METASTORE_ERROR_MAYBE_EXECUTED: LazyLock = LazyLock::new(|| { counter!( name: "metastore_error_maybe_executed", description: "Number of metastore transaction with an uncertain outcome (= do trigger a control plane restart)", subsystem: "control_plane", ) }); - -impl Default for ControlPlaneMetrics { - fn default() -> Self { - let open_shards = gauge!(parent: &*SHARDS, "state" => "open"); - let closed_shards = gauge!(parent: &*SHARDS, "state" => "closed"); - let local_shards = gauge!(parent: &*INDEXED_SHARDS, "locality" => "local"); - let remote_shards = gauge!(parent: &*INDEXED_SHARDS, "locality" => "remote"); - - ControlPlaneMetrics { - indexes_total: INDEXES_TOTAL.clone(), - open_shards, - closed_shards, - apply_plan_total: APPLY_PLAN_TOTAL.clone(), - rebalance_shards: REBALANCE_SHARDS.clone(), - restart_total: RESTART_TOTAL.clone(), - schedule_total: SCHEDULE_TOTAL.clone(), - metastore_error_aborted: METASTORE_ERROR_ABORTED.clone(), - metastore_error_maybe_executed: METASTORE_ERROR_MAYBE_EXECUTED.clone(), - local_shards, - remote_shards, - } - } -} - -pub static CONTROL_PLANE_METRICS: LazyLock = - LazyLock::new(ControlPlaneMetrics::default); diff --git a/quickwit/quickwit-control-plane/src/model/mod.rs b/quickwit/quickwit-control-plane/src/model/mod.rs index 1a1007ffe89..8ffa677e315 100644 --- a/quickwit/quickwit-control-plane/src/model/mod.rs +++ b/quickwit/quickwit-control-plane/src/model/mod.rs @@ -167,9 +167,7 @@ impl ControlPlaneModel { } fn update_metrics(&self) { - crate::metrics::CONTROL_PLANE_METRICS - .indexes_total - .set(self.index_table.len() as f64); + crate::metrics::INDEXES_TOTAL.set(self.index_table.len() as f64); } pub(crate) fn source_configs(&self) -> impl Iterator + '_ { diff --git a/quickwit/quickwit-control-plane/src/model/shard_table.rs b/quickwit/quickwit-control-plane/src/model/shard_table.rs index 630718687f8..960e6d258d7 100644 --- a/quickwit/quickwit-control-plane/src/model/shard_table.rs +++ b/quickwit/quickwit-control-plane/src/model/shard_table.rs @@ -462,12 +462,12 @@ impl ShardTable { if index_label == index_id { let shard_stats = table_entry.shards_stats(); quickwit_metrics::gauge!( - parent: &crate::metrics::CONTROL_PLANE_METRICS.open_shards, + parent: &crate::metrics::OPEN_SHARDS, "index_id" => index_label.to_string(), ) .set(shard_stats.num_open_shards as f64); quickwit_metrics::gauge!( - parent: &crate::metrics::CONTROL_PLANE_METRICS.closed_shards, + parent: &crate::metrics::CLOSED_SHARDS, "index_id" => index_label.to_string(), ) .set(shard_stats.num_closed_shards as f64); @@ -485,12 +485,12 @@ impl ShardTable { } } quickwit_metrics::gauge!( - parent: &crate::metrics::CONTROL_PLANE_METRICS.open_shards, + parent: &crate::metrics::OPEN_SHARDS, "index_id" => index_label.to_string(), ) .set(num_open_shards as f64); quickwit_metrics::gauge!( - parent: &crate::metrics::CONTROL_PLANE_METRICS.closed_shards, + parent: &crate::metrics::CLOSED_SHARDS, "index_id" => index_label.to_string(), ) .set(num_closed_shards as f64); diff --git a/quickwit/quickwit-directories/src/caching_directory.rs b/quickwit/quickwit-directories/src/caching_directory.rs index 6e9461f5493..4da1981eed2 100644 --- a/quickwit/quickwit-directories/src/caching_directory.rs +++ b/quickwit/quickwit-directories/src/caching_directory.rs @@ -37,9 +37,8 @@ impl CachingDirectory { /// Warning: The resulting CacheDirectory will cache all information without ever /// removing any item from the cache. pub fn new_unbounded(underlying: Arc) -> CachingDirectory { - let byte_range_cache = ByteRangeCache::with_infinite_capacity( - &quickwit_storage::STORAGE_METRICS.shortlived_cache, - ); + let byte_range_cache = + ByteRangeCache::with_infinite_capacity(&quickwit_storage::SHORTLIVED_CACHE); CachingDirectory::new(underlying, byte_range_cache) } diff --git a/quickwit/quickwit-indexing/src/actors/doc_processor.rs b/quickwit/quickwit-indexing/src/actors/doc_processor.rs index 6881a2f9aa1..5b7f3573d12 100644 --- a/quickwit/quickwit-indexing/src/actors/doc_processor.rs +++ b/quickwit/quickwit-indexing/src/actors/doc_processor.rs @@ -287,12 +287,12 @@ impl DocProcessorCounter { DocProcessorCounter { num_docs: Default::default(), num_docs_metric: counter!( - parent: &crate::metrics::INDEXER_METRICS.processed_docs_total, + parent: &crate::metrics::PROCESSED_DOCS_TOTAL, "index" => index_label.to_string(), "docs_processed_status" => outcome.to_string(), ), num_bytes_metric: counter!( - parent: &crate::metrics::INDEXER_METRICS.processed_bytes, + parent: &crate::metrics::PROCESSED_BYTES, "index" => index_label.to_string(), "docs_processed_status" => outcome.to_string(), ), diff --git a/quickwit/quickwit-indexing/src/actors/indexer.rs b/quickwit/quickwit-indexing/src/actors/indexer.rs index 4f5f1b98040..932dfaf1640 100644 --- a/quickwit/quickwit-indexing/src/actors/indexer.rs +++ b/quickwit/quickwit-indexing/src/actors/indexer.rs @@ -27,7 +27,6 @@ use quickwit_actors::{ Actor, ActorContext, ActorExitStatus, Command, Handler, Mailbox, QueueCapacity, }; use quickwit_common::io::IoControls; -use quickwit_common::metrics::MEMORY_METRICS; use quickwit_common::runtimes::RuntimeType; use quickwit_common::temp_dir::TempDirectory; use quickwit_config::IndexingSettings; @@ -220,8 +219,7 @@ impl IndexerState { let publish_lock = self.publish_lock.clone(); let publish_token_opt = self.publish_token_opt.clone(); - let mut split_builders_guard = - GaugeGuard::from_gauge(&crate::metrics::INDEXER_METRICS.split_builders); + let mut split_builders_guard = GaugeGuard::from_gauge(&crate::metrics::SPLIT_BUILDERS); split_builders_guard.increment(1.0); let workbench = IndexingWorkbench { @@ -234,7 +232,7 @@ impl IndexerState { publish_lock, publish_token_opt, last_delete_opstamp, - memory_usage: GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.index_writer), + memory_usage: GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_INDEX_WRITER), cooperative_indexing_period, split_builders_guard, }; diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index 0ddb8b4843b..c89b0d89488 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -124,7 +124,7 @@ impl Actor for IndexingPipeline { impl IndexingPipeline { pub fn new(params: IndexingPipelineParams) -> Self { let indexing_pipelines_gauge = gauge!( - parent: &crate::metrics::INDEXER_METRICS.indexing_pipelines, + parent: &crate::metrics::INDEXING_PIPELINES, "index" => params.pipeline_id.index_uid.index_id.clone(), ); let mut indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); @@ -314,7 +314,7 @@ impl IndexingPipeline { .spawn_actor() .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + parent: &crate::metrics::BACKPRESSURE_MICROS, "actor_name" => "publisher", )) .spawn(publisher); @@ -323,7 +323,7 @@ impl IndexingPipeline { let (sequencer_mailbox, sequencer_handle) = ctx .spawn_actor() .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + parent: &crate::metrics::BACKPRESSURE_MICROS, "actor_name" => "sequencer", )) .set_kill_switch(self.kill_switch.clone()) @@ -343,7 +343,7 @@ impl IndexingPipeline { let (uploader_mailbox, uploader_handle) = ctx .spawn_actor() .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + parent: &crate::metrics::BACKPRESSURE_MICROS, "actor_name" => "uploader", )) .set_kill_switch(self.kill_switch.clone()) @@ -377,7 +377,7 @@ impl IndexingPipeline { let (indexer_mailbox, indexer_handle) = ctx .spawn_actor() .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + parent: &crate::metrics::BACKPRESSURE_MICROS, "actor_name" => "indexer", )) .set_kill_switch(self.kill_switch.clone()) @@ -394,7 +394,7 @@ impl IndexingPipeline { let (doc_processor_mailbox, doc_processor_handle) = ctx .spawn_actor() .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + parent: &crate::metrics::BACKPRESSURE_MICROS, "actor_name" => "doc_processor", )) .set_kill_switch(self.kill_switch.clone()) diff --git a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs index a4e263c50cb..587b567ab30 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs @@ -274,7 +274,7 @@ impl MergePipeline { .spawn_actor() .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + parent: &crate::metrics::BACKPRESSURE_MICROS, "actor_name" => "merge_publisher", )) .spawn(merge_publisher); @@ -323,7 +323,7 @@ impl MergePipeline { .spawn_actor() .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + parent: &crate::metrics::BACKPRESSURE_MICROS, "actor_name" => "merge_executor", )) .spawn(merge_executor); @@ -338,7 +338,7 @@ impl MergePipeline { .spawn_actor() .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::INDEXER_METRICS.backpressure_micros, + parent: &crate::metrics::BACKPRESSURE_MICROS, "actor_name" => "merge_split_downloader", )) .spawn(merge_split_downloader); @@ -395,9 +395,7 @@ impl MergePipeline { handles.merge_planner.refresh_observe(); handles.merge_uploader.refresh_observe(); handles.merge_publisher.refresh_observe(); - let num_ongoing_merges = crate::metrics::INDEXER_METRICS - .ongoing_merge_operations - .get(); + let num_ongoing_merges = crate::metrics::ONGOING_MERGE_OPERATIONS.get(); self.statistics = self .previous_generations_statistics .clone() diff --git a/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs b/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs index 695e0eb7128..477d4f44027 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs @@ -226,12 +226,8 @@ impl MergeSchedulerService { _merge_permit: merge_permit, }; self.pending_merge_bytes -= merge_task.merge_operation.total_num_bytes(); - crate::metrics::INDEXER_METRICS - .pending_merge_operations - .set(self.pending_merge_queue.len() as f64); - crate::metrics::INDEXER_METRICS - .pending_merge_bytes - .set(self.pending_merge_bytes as f64); + crate::metrics::PENDING_MERGE_OPERATIONS.set(self.pending_merge_queue.len() as f64); + crate::metrics::PENDING_MERGE_BYTES.set(self.pending_merge_bytes as f64); match split_downloader_mailbox.try_send_message(merge_task) { Ok(_) => {} Err(quickwit_actors::TrySendError::Full(_)) => { @@ -295,9 +291,7 @@ impl MergeSchedulerService { let num_merges = self.merge_concurrency as i64 - self.merge_semaphore.available_permits() as i64; - crate::metrics::INDEXER_METRICS - .ongoing_merge_operations - .set(num_merges as f64); + crate::metrics::ONGOING_MERGE_OPERATIONS.set(num_merges as f64); } } @@ -381,12 +375,8 @@ impl Handler for MergeSchedulerService { }; self.pending_merge_bytes += scheduled_merge.merge_operation.total_num_bytes(); self.pending_merge_queue.push(scheduled_merge); - crate::metrics::INDEXER_METRICS - .pending_merge_operations - .set(self.pending_merge_queue.len() as f64); - crate::metrics::INDEXER_METRICS - .pending_merge_bytes - .set(self.pending_merge_bytes as f64); + crate::metrics::PENDING_MERGE_OPERATIONS.set(self.pending_merge_queue.len() as f64); + crate::metrics::PENDING_MERGE_BYTES.set(self.pending_merge_bytes as f64); self.schedule_pending_merges(ctx); Ok(()) } diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs index 0b1638aa794..5e77d9a2a05 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs @@ -37,7 +37,6 @@ use tracing::{Instrument, Span, debug, info, instrument, warn}; use super::{ParquetSplitBatch, ParquetSplitsUpdate}; use crate::actors::sequencer::{Sequencer, SequencerCommand}; use crate::actors::{Publisher, UploaderCounters, UploaderType}; -use crate::metrics::INDEXER_METRICS; /// Concurrent upload permits for metrics uploader. /// Uses same permit pool as indexer uploads. @@ -124,7 +123,7 @@ impl ParquetUploader { let concurrent_upload_permits = CONCURRENT_UPLOAD_PERMITS_METRICS .get_or_init(|| Semaphore::const_new(self.max_concurrent_uploads)); let gauge = gauge!( - parent: &INDEXER_METRICS.available_concurrent_upload_permits, + parent: &crate::metrics::AVAILABLE_CONCURRENT_UPLOAD_PERMITS, "component" => "metrics", ); gauge.set(concurrent_upload_permits.available_permits() as f64); diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs index 966ca94cd0d..2a8ef6b213c 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs @@ -145,7 +145,7 @@ impl Actor for MetricsPipeline { impl MetricsPipeline { pub fn new(params: MetricsPipelineParams) -> Self { let indexing_pipelines_gauge = gauge!( - parent: &crate::metrics::INDEXER_METRICS.indexing_pipelines, + parent: &crate::metrics::INDEXING_PIPELINES, "index" => params.pipeline_id.index_uid.index_id.clone(), ); let mut indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs index 1ff1fb0b905..ebf6a0bd296 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs @@ -20,7 +20,6 @@ use std::fmt; use arrow::record_batch::RecordBatch; -use quickwit_common::metrics::MEMORY_METRICS; use quickwit_metastore::checkpoint::SourceCheckpointDelta; use quickwit_metrics::GaugeGuard; @@ -66,7 +65,8 @@ impl ProcessedParquetBatch { .map(|col| col.get_array_memory_size() as i64) .sum(); - let mut gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.indexer_mailbox); + let mut gauge_guard = + GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_INDEXER_MAILBOX); gauge_guard.increment(memory_size as f64); Self { diff --git a/quickwit/quickwit-indexing/src/actors/uploader.rs b/quickwit/quickwit-indexing/src/actors/uploader.rs index c0bf4a43fdf..e9c80e1b051 100644 --- a/quickwit/quickwit-indexing/src/actors/uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/uploader.rs @@ -41,7 +41,6 @@ use tracing::{Instrument, Span, debug, info, instrument, warn}; use crate::actors::Publisher; use crate::actors::sequencer::{Sequencer, SequencerCommand}; use crate::merge_policy::{MergePolicy, MergeTask}; -use crate::metrics::INDEXER_METRICS; use crate::models::{ EmptySplit, PackagedSplit, PackagedSplitBatch, PublishLock, SplitsUpdate, create_split_metadata, }; @@ -205,21 +204,21 @@ impl Uploader { UploaderType::IndexUploader => ( &CONCURRENT_UPLOAD_PERMITS_INDEX, gauge!( - parent: &INDEXER_METRICS.available_concurrent_upload_permits, + parent: &crate::metrics::AVAILABLE_CONCURRENT_UPLOAD_PERMITS, "component" => "indexer", ), ), UploaderType::MergeUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, gauge!( - parent: &INDEXER_METRICS.available_concurrent_upload_permits, + parent: &crate::metrics::AVAILABLE_CONCURRENT_UPLOAD_PERMITS, "component" => "merger", ), ), UploaderType::DeleteUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, gauge!( - parent: &INDEXER_METRICS.available_concurrent_upload_permits, + parent: &crate::metrics::AVAILABLE_CONCURRENT_UPLOAD_PERMITS, "component" => "merger", ), ), diff --git a/quickwit/quickwit-indexing/src/metrics.rs b/quickwit/quickwit-indexing/src/metrics.rs index 4694be00dbf..94c109c8fe5 100644 --- a/quickwit/quickwit-indexing/src/metrics.rs +++ b/quickwit/quickwit-indexing/src/metrics.rs @@ -16,22 +16,7 @@ use std::sync::LazyLock; use quickwit_metrics::{Counter, Gauge, counter, gauge}; -pub struct IndexerMetrics { - pub processed_docs_total: Counter, - pub processed_bytes: Counter, - pub indexing_pipelines: Gauge, - pub backpressure_micros: Counter, - pub available_concurrent_upload_permits: Gauge, - pub split_builders: Gauge, - pub ongoing_merge_operations: Gauge, - pub pending_merge_operations: Gauge, - pub pending_merge_bytes: Gauge, - // We use a lazy counter, as most users do not use Kafka. - #[cfg_attr(not(feature = "kafka"), allow(dead_code))] - pub kafka_rebalance_total: LazyLock, -} - -static PROCESSED_DOCS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static PROCESSED_DOCS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "processed_docs_total", description: "Number of processed docs by index, source and processed status in [valid, schema_error, parse_error, transform_error]", @@ -39,7 +24,7 @@ static PROCESSED_DOCS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static PROCESSED_BYTES: LazyLock = LazyLock::new(|| { +pub(crate) static PROCESSED_BYTES: LazyLock = LazyLock::new(|| { counter!( name: "processed_bytes", description: "Number of bytes of processed documents by index, source and processed status in [valid, schema_error, parse_error, transform_error]", @@ -47,7 +32,7 @@ static PROCESSED_BYTES: LazyLock = LazyLock::new(|| { ) }); -static INDEXING_PIPELINES: LazyLock = LazyLock::new(|| { +pub(crate) static INDEXING_PIPELINES: LazyLock = LazyLock::new(|| { gauge!( name: "indexing_pipelines", description: "Number of running indexing pipelines", @@ -55,7 +40,7 @@ static INDEXING_PIPELINES: LazyLock = LazyLock::new(|| { ) }); -static BACKPRESSURE_MICROS: LazyLock = LazyLock::new(|| { +pub(crate) static BACKPRESSURE_MICROS: LazyLock = LazyLock::new(|| { counter!( name: "backpressure_micros", description: "Amount of time spent in backpressure (in micros). This time only includes the amount of time spent waiting for a place in the queue of another actor.", @@ -63,7 +48,7 @@ static BACKPRESSURE_MICROS: LazyLock = LazyLock::new(|| { ) }); -static AVAILABLE_CONCURRENT_UPLOAD_PERMITS: LazyLock = LazyLock::new(|| { +pub(crate) static AVAILABLE_CONCURRENT_UPLOAD_PERMITS: LazyLock = LazyLock::new(|| { gauge!( name: "concurrent_upload_available_permits_num", description: "Number of available concurrent upload permits by component in [merger, indexer]", @@ -71,7 +56,7 @@ static AVAILABLE_CONCURRENT_UPLOAD_PERMITS: LazyLock = LazyLock::new(|| { ) }); -static SPLIT_BUILDERS: LazyLock = LazyLock::new(|| { +pub(crate) static SPLIT_BUILDERS: LazyLock = LazyLock::new(|| { gauge!( name: "split_builders", description: "Number of existing index writer instances.", @@ -79,7 +64,7 @@ static SPLIT_BUILDERS: LazyLock = LazyLock::new(|| { ) }); -static ONGOING_MERGE_OPERATIONS: LazyLock = LazyLock::new(|| { +pub(crate) static ONGOING_MERGE_OPERATIONS: LazyLock = LazyLock::new(|| { gauge!( name: "ongoing_merge_operations", description: "Number of ongoing merge operations", @@ -88,7 +73,7 @@ static ONGOING_MERGE_OPERATIONS: LazyLock = LazyLock::new(|| { ) }); -static PENDING_MERGE_OPERATIONS: LazyLock = LazyLock::new(|| { +pub(crate) static PENDING_MERGE_OPERATIONS: LazyLock = LazyLock::new(|| { gauge!( name: "pending_merge_operations", description: "Number of pending merge operations", @@ -96,7 +81,7 @@ static PENDING_MERGE_OPERATIONS: LazyLock = LazyLock::new(|| { ) }); -static PENDING_MERGE_BYTES: LazyLock = LazyLock::new(|| { +pub(crate) static PENDING_MERGE_BYTES: LazyLock = LazyLock::new(|| { gauge!( name: "pending_merge_bytes", description: "Number of pending merge bytes", @@ -104,31 +89,12 @@ static PENDING_MERGE_BYTES: LazyLock = LazyLock::new(|| { ) }); -static KAFKA_REBALANCE_TOTAL: LazyLock = LazyLock::new(|| { +// We use a lazy counter, as most users do not use Kafka. +#[cfg_attr(not(feature = "kafka"), allow(dead_code))] +pub(crate) static KAFKA_REBALANCE_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "kafka_rebalance_total", description: "Number of kafka rebalances", subsystem: "indexing", ) }); - -impl Default for IndexerMetrics { - fn default() -> Self { - IndexerMetrics { - processed_docs_total: PROCESSED_DOCS_TOTAL.clone(), - processed_bytes: PROCESSED_BYTES.clone(), - indexing_pipelines: INDEXING_PIPELINES.clone(), - backpressure_micros: BACKPRESSURE_MICROS.clone(), - available_concurrent_upload_permits: AVAILABLE_CONCURRENT_UPLOAD_PERMITS.clone(), - split_builders: SPLIT_BUILDERS.clone(), - ongoing_merge_operations: ONGOING_MERGE_OPERATIONS.clone(), - pending_merge_operations: PENDING_MERGE_OPERATIONS.clone(), - pending_merge_bytes: PENDING_MERGE_BYTES.clone(), - kafka_rebalance_total: LazyLock::new(|| KAFKA_REBALANCE_TOTAL.clone()), - } - } -} - -/// `INDEXER_METRICS` exposes indexing related metrics through a prometheus -/// endpoint. -pub static INDEXER_METRICS: LazyLock = LazyLock::new(IndexerMetrics::default); diff --git a/quickwit/quickwit-indexing/src/models/processed_doc.rs b/quickwit/quickwit-indexing/src/models/processed_doc.rs index 0575cbc0de4..6415da26427 100644 --- a/quickwit/quickwit-indexing/src/models/processed_doc.rs +++ b/quickwit/quickwit-indexing/src/models/processed_doc.rs @@ -14,7 +14,6 @@ use std::fmt; -use quickwit_common::metrics::MEMORY_METRICS; use quickwit_metastore::checkpoint::SourceCheckpointDelta; use quickwit_metrics::GaugeGuard; use tantivy::{DateTime, TantivyDocument}; @@ -52,7 +51,8 @@ impl ProcessedDocBatch { force_commit: bool, ) -> Self { let delta = docs.iter().map(|doc| doc.num_bytes as i64).sum::(); - let mut gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.indexer_mailbox); + let mut gauge_guard = + GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_INDEXER_MAILBOX); gauge_guard.increment(delta as f64); Self { docs, diff --git a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs index 441e7b01dbf..777088086c3 100644 --- a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs +++ b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs @@ -15,7 +15,6 @@ use std::fmt; use bytes::Bytes; -use quickwit_common::metrics::MEMORY_METRICS; use quickwit_metastore::checkpoint::SourceCheckpointDelta; use quickwit_metrics::GaugeGuard; @@ -36,7 +35,7 @@ impl RawDocBatch { ) -> Self { let delta = docs.iter().map(|doc| doc.len() as i64).sum::(); let mut gauge_guard = - GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.doc_processor_mailbox); + GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_DOC_PROCESSOR_MAILBOX); gauge_guard.increment(delta as f64); Self { @@ -68,7 +67,8 @@ impl fmt::Debug for RawDocBatch { impl Default for RawDocBatch { fn default() -> Self { - let _gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.doc_processor_mailbox); + let _gauge_guard = + GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_DOC_PROCESSOR_MAILBOX); Self { docs: Vec::new(), checkpoint_delta: SourceCheckpointDelta::default(), diff --git a/quickwit/quickwit-indexing/src/source/ingest/mod.rs b/quickwit/quickwit-indexing/src/source/ingest/mod.rs index 4403fa0f547..cc7c0f1772c 100644 --- a/quickwit/quickwit-indexing/src/source/ingest/mod.rs +++ b/quickwit/quickwit-indexing/src/source/ingest/mod.rs @@ -665,7 +665,6 @@ mod tests { use itertools::Itertools; use quickwit_actors::{ActorContext, Universe}; use quickwit_common::ServiceStream; - use quickwit_common::metrics::MEMORY_METRICS; use quickwit_common::stream_utils::InFlightValue; use quickwit_config::{IndexingSettings, SourceConfig, SourceParams}; use quickwit_ingest::IngesterPoolEntry; @@ -1437,7 +1436,7 @@ mod tests { let in_flight_value = InFlightValue::new( fetch_message, batch_size, - &MEMORY_METRICS.in_flight.fetch_stream, + &quickwit_common::metrics::IN_FLIGHT_FETCH_STREAM, ); fetch_message_tx.send(Ok(in_flight_value)).await.unwrap(); @@ -1454,7 +1453,7 @@ mod tests { let in_flight_value = InFlightValue::new( fetch_message, batch_size, - &MEMORY_METRICS.in_flight.fetch_stream, + &quickwit_common::metrics::IN_FLIGHT_FETCH_STREAM, ); fetch_message_tx.send(Ok(in_flight_value)).await.unwrap(); @@ -1468,7 +1467,7 @@ mod tests { let in_flight_value = InFlightValue::new( fetch_message, ByteSize(0), - &MEMORY_METRICS.in_flight.fetch_stream, + &quickwit_common::metrics::IN_FLIGHT_FETCH_STREAM, ); fetch_message_tx.send(Ok(in_flight_value)).await.unwrap(); @@ -1529,7 +1528,7 @@ mod tests { let in_flight_value = InFlightValue::new( fetch_message, batch_size, - &MEMORY_METRICS.in_flight.fetch_stream, + &quickwit_common::metrics::IN_FLIGHT_FETCH_STREAM, ); fetch_message_tx.send(Ok(in_flight_value)).await.unwrap(); diff --git a/quickwit/quickwit-indexing/src/source/kafka_source.rs b/quickwit/quickwit-indexing/src/source/kafka_source.rs index 7e9ab3056b8..9b654a6b02f 100644 --- a/quickwit/quickwit-indexing/src/source/kafka_source.rs +++ b/quickwit/quickwit-indexing/src/source/kafka_source.rs @@ -127,9 +127,7 @@ macro_rules! return_if_err { /// impl ConsumerContext for RdKafkaContext { fn pre_rebalance(&self, _consumer: &BaseConsumer, rebalance: &Rebalance) { - crate::metrics::INDEXER_METRICS - .kafka_rebalance_total - .increment(1); + crate::metrics::KAFKA_REBALANCE_TOTAL.increment(1); quickwit_common::rate_limited_info!(limit_per_min = 3, topic = self.topic, "rebalance"); if let Rebalance::Revoke(tpl) = rebalance { let partitions = collect_partitions(tpl, &self.topic); diff --git a/quickwit/quickwit-indexing/src/source/mod.rs b/quickwit/quickwit-indexing/src/source/mod.rs index 162ad12262e..a5f946ec7a5 100644 --- a/quickwit/quickwit-indexing/src/source/mod.rs +++ b/quickwit/quickwit-indexing/src/source/mod.rs @@ -92,7 +92,6 @@ pub use pulsar_source::{PulsarSource, PulsarSourceFactory}; #[cfg(feature = "sqs")] pub use queue_sources::sqs_queue; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler}; -use quickwit_common::metrics::MEMORY_METRICS; use quickwit_common::pubsub::EventBroker; use quickwit_common::runtimes::RuntimeType; use quickwit_config::{ @@ -530,13 +529,13 @@ impl BatchBuilder { pub fn with_capacity(capacity: usize, source_type: SourceType) -> Self { let gauge = match source_type { - SourceType::File => MEMORY_METRICS.in_flight.file(), - SourceType::IngestV2 => MEMORY_METRICS.in_flight.ingest(), - SourceType::Kafka => MEMORY_METRICS.in_flight.kafka(), - SourceType::Kinesis => MEMORY_METRICS.in_flight.kinesis(), - SourceType::PubSub => MEMORY_METRICS.in_flight.pubsub(), - SourceType::Pulsar => MEMORY_METRICS.in_flight.pulsar(), - _ => MEMORY_METRICS.in_flight.other(), + SourceType::File => &quickwit_common::metrics::IN_FLIGHT_FILE_SOURCE, + SourceType::IngestV2 => &quickwit_common::metrics::IN_FLIGHT_INGEST_SOURCE, + SourceType::Kafka => &quickwit_common::metrics::IN_FLIGHT_KAFKA_SOURCE, + SourceType::Kinesis => &quickwit_common::metrics::IN_FLIGHT_KINESIS_SOURCE, + SourceType::PubSub => &quickwit_common::metrics::IN_FLIGHT_PUBSUB_SOURCE, + SourceType::Pulsar => &quickwit_common::metrics::IN_FLIGHT_PULSAR_SOURCE, + _ => &quickwit_common::metrics::IN_FLIGHT_OTHER_SOURCE, }; let gauge_guard = GaugeGuard::from_gauge(gauge); diff --git a/quickwit/quickwit-ingest/src/ingest_api_service.rs b/quickwit/quickwit-ingest/src/ingest_api_service.rs index 1a61c28235d..8dc08487459 100644 --- a/quickwit/quickwit-ingest/src/ingest_api_service.rs +++ b/quickwit/quickwit-ingest/src/ingest_api_service.rs @@ -27,7 +27,6 @@ use quickwit_proto::ingest::RateLimitingCause; use tracing::{error, info}; use ulid::Ulid; -use crate::metrics::INGEST_METRICS; use crate::notifications::Notifications; use crate::{ CommitType, CreateQueueIfNotExistsRequest, CreateQueueIfNotExistsResponse, CreateQueueRequest, @@ -203,11 +202,11 @@ impl IngestApiService { num_docs += batch_num_docs; counter!( - parent: &INGEST_METRICS.docs_bytes_total, + parent: &crate::metrics::DOCS_BYTES_TOTAL, "validity" => "valid", ) .increment(batch_num_bytes as u64); - counter!(parent: &INGEST_METRICS.docs_total, "validity" => "valid") + counter!(parent: &crate::metrics::DOCS_TOTAL, "validity" => "valid") .increment(batch_num_docs as u64); } // TODO we could fsync here and disable autosync to have better i/o perfs. diff --git a/quickwit/quickwit-ingest/src/ingest_v2/broadcast/local_shards.rs b/quickwit/quickwit-ingest/src/ingest_v2/broadcast/local_shards.rs index 5fb95f5ea98..6306399121a 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/broadcast/local_shards.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/broadcast/local_shards.rs @@ -30,7 +30,6 @@ use tracing::{debug, warn}; use super::{BROADCAST_INTERVAL_PERIOD, make_key, parse_key}; use crate::RateMibPerSec; -use crate::ingest_v2::metrics::INGEST_V2_METRICS; use crate::ingest_v2::state::WeakIngesterState; const ONE_MIB: ByteSize = ByteSize::mib(1); @@ -195,11 +194,9 @@ impl ShardThroughputTimeSeriesMap { .average() .as_u64() .div_ceil(ONE_MIB.as_u64()); - INGEST_V2_METRICS - .shard_st_throughput_mib + crate::ingest_v2::metrics::SHARD_ST_THROUGHPUT_MIB .record(short_term_ingestion_rate_mib_per_sec_u64 as f64); - INGEST_V2_METRICS - .shard_lt_throughput_mib + crate::ingest_v2::metrics::SHARD_LT_THROUGHPUT_MIB .record(long_term_ingestion_rate_mib_per_sec_u64 as f64); let short_term_ingestion_rate = @@ -300,10 +297,8 @@ impl BroadcastLocalShardsTask { } } } - INGEST_V2_METRICS.open_shards.set(num_open_shards as f64); - INGEST_V2_METRICS - .closed_shards - .set(num_closed_shards as f64); + crate::ingest_v2::metrics::OPEN_SHARDS.set(num_open_shards as f64); + crate::ingest_v2::metrics::CLOSED_SHARDS.set(num_closed_shards as f64); let snapshot = LocalShardsSnapshot { per_source_shard_infos, diff --git a/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs b/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs index 73c1fb2858d..3d45ef7f571 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs @@ -22,7 +22,6 @@ use bytes::{BufMut, BytesMut}; use bytesize::ByteSize; use futures::StreamExt; use mrecordlog::Record; -use quickwit_common::metrics::MEMORY_METRICS; use quickwit_common::retry::RetryParams; use quickwit_common::stream_utils::{InFlightValue, TrackedSender}; use quickwit_common::{ServiceStream, spawn_named_task}; @@ -81,8 +80,10 @@ impl FetchStreamTask { .as_u64() .map(|offset| offset + 1) .unwrap_or_default(); - let (fetch_message_tx, fetch_stream) = - ServiceStream::new_bounded_with_gauge(3, &MEMORY_METRICS.in_flight.fetch_stream); + let (fetch_message_tx, fetch_stream) = ServiceStream::new_bounded_with_gauge( + 3, + &quickwit_common::metrics::IN_FLIGHT_FETCH_STREAM, + ); let mut fetch_task = Self { shard_id: open_fetch_stream_request.shard_id().clone(), queue_id: open_fetch_stream_request.queue_id(), @@ -559,7 +560,7 @@ async fn fault_tolerant_fetch_stream( let in_flight_value = InFlightValue::new( fetch_message, batch_size, - &MEMORY_METRICS.in_flight.multi_fetch_stream, + &quickwit_common::metrics::IN_FLIGHT_MULTI_FETCH_STREAM, ); if fetch_message_tx.send(Ok(in_flight_value)).await.is_err() { // The consumer was dropped. @@ -572,7 +573,7 @@ async fn fault_tolerant_fetch_stream( let in_flight_value = InFlightValue::new( fetch_message, ByteSize(0), - &MEMORY_METRICS.in_flight.multi_fetch_stream, + &quickwit_common::metrics::IN_FLIGHT_MULTI_FETCH_STREAM, ); // We ignore the send error if the consumer was dropped because we're going // to return anyway. diff --git a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs index c3dc9190591..ef4a4b1a6ad 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs @@ -25,7 +25,6 @@ use futures::StreamExt; use futures::stream::FuturesUnordered; use mrecordlog::error::CreateQueueError; use quickwit_cluster::Cluster; -use quickwit_common::metrics::MEMORY_METRICS; use quickwit_common::pretty::PrettyDisplay; use quickwit_common::pubsub::{EventBroker, EventSubscriber}; use quickwit_common::rate_limiter::{RateLimiter, RateLimiterSettings}; @@ -52,7 +51,6 @@ use super::broadcast::{BroadcastIngesterCapacityScoreTask, BroadcastLocalShardsT use super::doc_mapper::validate_doc_batch; use super::fetch::FetchStreamTask; use super::idle::CloseIdleShardsTask; -use super::metrics::INGEST_V2_METRICS; use super::models::IngesterShard; use super::mrecordlog_utils::{ AppendDocBatchError, append_non_empty_doc_batch, check_enough_capacity, @@ -334,7 +332,7 @@ impl Ingester { now.elapsed().pretty_display() ); counter!( - parent: &INGEST_V2_METRICS.reset_shards_operations_total, + parent: &crate::ingest_v2::metrics::RESET_SHARDS_OPERATIONS_TOTAL, "status" => "success", ) .increment(1); @@ -346,7 +344,7 @@ impl Ingester { warn!("advise reset shards request failed: {error}"); counter!( - parent: &INGEST_V2_METRICS.reset_shards_operations_total, + parent: &crate::ingest_v2::metrics::RESET_SHARDS_OPERATIONS_TOTAL, "status" => "error", ) .increment(1); @@ -355,7 +353,7 @@ impl Ingester { warn!("advise reset shards request timed out"); counter!( - parent: &INGEST_V2_METRICS.reset_shards_operations_total, + parent: &crate::ingest_v2::metrics::RESET_SHARDS_OPERATIONS_TOTAL, "status" => "timeout", ) .increment(1); @@ -572,12 +570,12 @@ impl Ingester { if valid_doc_batch.is_empty() { counter!( - parent: &crate::metrics::INGEST_METRICS.docs_total, + parent: &crate::metrics::DOCS_TOTAL, "validity" => "invalid", ) .increment(parse_failures.len() as u64); counter!( - parent: &crate::metrics::INGEST_METRICS.docs_bytes_total, + parent: &crate::metrics::DOCS_BYTES_TOTAL, "validity" => "invalid", ) .increment(original_batch_num_bytes); @@ -595,23 +593,23 @@ impl Ingester { }; counter!( - parent: &crate::metrics::INGEST_METRICS.docs_total, + parent: &crate::metrics::DOCS_TOTAL, "validity" => "valid", ) .increment(valid_doc_batch.num_docs() as u64); counter!( - parent: &crate::metrics::INGEST_METRICS.docs_bytes_total, + parent: &crate::metrics::DOCS_BYTES_TOTAL, "validity" => "valid", ) .increment(valid_doc_batch.num_bytes() as u64); if !parse_failures.is_empty() { counter!( - parent: &crate::metrics::INGEST_METRICS.docs_total, + parent: &crate::metrics::DOCS_TOTAL, "validity" => "invalid", ) .increment(parse_failures.len() as u64); counter!( - parent: &crate::metrics::INGEST_METRICS.docs_bytes_total, + parent: &crate::metrics::DOCS_BYTES_TOTAL, "validity" => "invalid", ) .increment(original_batch_num_bytes - valid_doc_batch.num_bytes() as u64); @@ -1127,7 +1125,8 @@ impl IngesterService for Ingester { _ => None, }) .sum::(); - let mut _gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.ingester_persist); + let mut _gauge_guard = + GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_INGESTER_PERSIST); _gauge_guard.increment(request_size_bytes as f64); self.persist_inner(persist_request).await diff --git a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs index 2c6497a5504..84bb10ab290 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs @@ -18,28 +18,6 @@ use mrecordlog::ResourceUsage; use quickwit_common::metrics::{exponential_buckets, linear_buckets}; use quickwit_metrics::{Counter, Gauge, Histogram, counter, gauge, histogram}; -// Counter vec counting the different outcomes of ingest requests as -// measure at the end of the router work. -// -// The counter are counting persist subrequests. -pub(crate) struct IngestResultMetrics { - pub success: Counter, - pub circuit_breaker: Counter, - pub unspecified: Counter, - pub index_not_found: Counter, - pub source_not_found: Counter, - pub internal: Counter, - pub no_shards_available: Counter, - pub shard_rate_limited: Counter, - pub wal_full: Counter, - pub timeout: Counter, - pub router_timeout: Counter, - pub router_load_shedding: Counter, - pub load_shedding: Counter, - pub shard_not_found: Counter, - pub unavailable: Counter, -} - static INGEST_RESULT_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "ingest_result_total", @@ -48,55 +26,52 @@ static INGEST_RESULT_TOTAL: LazyLock = LazyLock::new(|| { ) }); -impl Default for IngestResultMetrics { - fn default() -> Self { - Self { - success: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "success"), - circuit_breaker: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "circuit_breaker"), - unspecified: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "unspecified"), - index_not_found: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "index_not_found"), - source_not_found: counter!( - parent: &*INGEST_RESULT_TOTAL, - "result" => "source_not_found", - ), - internal: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "internal"), - no_shards_available: counter!( - parent: &*INGEST_RESULT_TOTAL, - "result" => "no_shards_available", - ), - shard_rate_limited: counter!( - parent: &*INGEST_RESULT_TOTAL, - "result" => "shard_rate_limited", - ), - wal_full: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "wal_full"), - timeout: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "timeout"), - router_timeout: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "router_timeout"), - router_load_shedding: counter!( - parent: &*INGEST_RESULT_TOTAL, - "result" => "router_load_shedding", - ), - load_shedding: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "load_shedding"), - unavailable: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "unavailable"), - shard_not_found: counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "shard_not_found"), - } - } -} +pub(super) static INGEST_RESULT_SUCCESS: LazyLock = + LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "success")); -pub(super) struct IngestV2Metrics { - pub reset_shards_operations_total: Counter, - pub open_shards: Gauge, - pub closed_shards: Gauge, - pub shard_lt_throughput_mib: Histogram, - pub shard_st_throughput_mib: Histogram, - pub wal_acquire_lock_requests_in_flight: Gauge, - pub wal_acquire_lock_request_duration_secs: Histogram, - pub wal_disk_used_bytes: Gauge, - pub wal_memory_used_bytes: Gauge, - pub ingest_results: IngestResultMetrics, - pub ingest_attempts: Counter, -} +pub(super) static INGEST_RESULT_CIRCUIT_BREAKER: LazyLock = + LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "circuit_breaker")); + +pub(super) static INGEST_RESULT_UNSPECIFIED: LazyLock = + LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "unspecified")); + +pub(super) static INGEST_RESULT_INDEX_NOT_FOUND: LazyLock = + LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "index_not_found")); + +pub(super) static INGEST_RESULT_SOURCE_NOT_FOUND: LazyLock = + LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "source_not_found")); + +pub(super) static INGEST_RESULT_INTERNAL: LazyLock = + LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "internal")); + +pub(super) static INGEST_RESULT_NO_SHARDS_AVAILABLE: LazyLock = + LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "no_shards_available")); + +pub(super) static INGEST_RESULT_SHARD_RATE_LIMITED: LazyLock = + LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "shard_rate_limited")); + +pub(super) static INGEST_RESULT_WAL_FULL: LazyLock = + LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "wal_full")); + +pub(super) static INGEST_RESULT_TIMEOUT: LazyLock = + LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "timeout")); + +pub(super) static INGEST_RESULT_ROUTER_TIMEOUT: LazyLock = + LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "router_timeout")); -static INGEST_ATTEMPTS: LazyLock = LazyLock::new(|| { +pub(super) static INGEST_RESULT_ROUTER_LOAD_SHEDDING: LazyLock = + LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "router_load_shedding")); + +pub(super) static INGEST_RESULT_LOAD_SHEDDING: LazyLock = + LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "load_shedding")); + +pub(super) static INGEST_RESULT_SHARD_NOT_FOUND: LazyLock = + LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "shard_not_found")); + +pub(super) static INGEST_RESULT_UNAVAILABLE: LazyLock = + LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "unavailable")); + +pub(super) static INGEST_ATTEMPTS: LazyLock = LazyLock::new(|| { counter!( name: "ingest_attempts", description: "Number of routing attempts by AZ locality", @@ -104,7 +79,7 @@ static INGEST_ATTEMPTS: LazyLock = LazyLock::new(|| { ) }); -static RESET_SHARDS_OPERATIONS_TOTAL: LazyLock = LazyLock::new(|| { +pub(super) static RESET_SHARDS_OPERATIONS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "reset_shards_operations_total", description: "Total number of reset shards operations performed.", @@ -120,7 +95,13 @@ static SHARDS: LazyLock = LazyLock::new(|| { ) }); -static SHARD_LT_THROUGHPUT_MIB: LazyLock = LazyLock::new(|| { +pub(super) static OPEN_SHARDS: LazyLock = + LazyLock::new(|| gauge!(parent: &*SHARDS, "state" => "open")); + +pub(super) static CLOSED_SHARDS: LazyLock = + LazyLock::new(|| gauge!(parent: &*SHARDS, "state" => "closed")); + +pub(super) static SHARD_LT_THROUGHPUT_MIB: LazyLock = LazyLock::new(|| { histogram!( name: "shard_lt_throughput_mib", description: "Shard long term throughput as reported through chitchat", @@ -129,7 +110,7 @@ static SHARD_LT_THROUGHPUT_MIB: LazyLock = LazyLock::new(|| { ) }); -static SHARD_ST_THROUGHPUT_MIB: LazyLock = LazyLock::new(|| { +pub(super) static SHARD_ST_THROUGHPUT_MIB: LazyLock = LazyLock::new(|| { histogram!( name: "shard_st_throughput_mib", description: "Shard short term throughput as reported through chitchat", @@ -138,7 +119,7 @@ static SHARD_ST_THROUGHPUT_MIB: LazyLock = LazyLock::new(|| { ) }); -static WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT: LazyLock = LazyLock::new(|| { +pub(super) static WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT: LazyLock = LazyLock::new(|| { gauge!( name: "wal_acquire_lock_requests_in_flight", description: "Number of acquire lock requests in-flight.", @@ -146,16 +127,17 @@ static WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT: LazyLock = LazyLock::new(|| { ) }); -static WAL_ACQUIRE_LOCK_REQUEST_DURATION_SECS: LazyLock = LazyLock::new(|| { - histogram!( - name: "wal_acquire_lock_request_duration_secs", - description: "Duration of acquire lock requests in seconds.", - subsystem: "ingest", - buckets: exponential_buckets(0.001, 2.0, 12).unwrap(), - ) -}); - -static WAL_DISK_USED_BYTES: LazyLock = LazyLock::new(|| { +pub(super) static WAL_ACQUIRE_LOCK_REQUEST_DURATION_SECS: LazyLock = + LazyLock::new(|| { + histogram!( + name: "wal_acquire_lock_request_duration_secs", + description: "Duration of acquire lock requests in seconds.", + subsystem: "ingest", + buckets: exponential_buckets(0.001, 2.0, 12).unwrap(), + ) + }); + +pub(super) static WAL_DISK_USED_BYTES: LazyLock = LazyLock::new(|| { gauge!( name: "wal_disk_used_bytes", description: "WAL disk space used in bytes.", @@ -163,7 +145,7 @@ static WAL_DISK_USED_BYTES: LazyLock = LazyLock::new(|| { ) }); -static WAL_MEMORY_USED_BYTES: LazyLock = LazyLock::new(|| { +pub(super) static WAL_MEMORY_USED_BYTES: LazyLock = LazyLock::new(|| { gauge!( name: "wal_memory_used_bytes", description: "WAL memory used in bytes.", @@ -171,36 +153,8 @@ static WAL_MEMORY_USED_BYTES: LazyLock = LazyLock::new(|| { ) }); -impl Default for IngestV2Metrics { - fn default() -> Self { - Self { - ingest_results: IngestResultMetrics::default(), - ingest_attempts: INGEST_ATTEMPTS.clone(), - reset_shards_operations_total: RESET_SHARDS_OPERATIONS_TOTAL.clone(), - open_shards: gauge!(parent: &*SHARDS, "state" => "open"), - closed_shards: gauge!(parent: &*SHARDS, "state" => "closed"), - shard_lt_throughput_mib: SHARD_LT_THROUGHPUT_MIB.clone(), - shard_st_throughput_mib: SHARD_ST_THROUGHPUT_MIB.clone(), - wal_acquire_lock_requests_in_flight: WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT.clone(), - wal_acquire_lock_request_duration_secs: WAL_ACQUIRE_LOCK_REQUEST_DURATION_SECS.clone(), - wal_disk_used_bytes: WAL_DISK_USED_BYTES.clone(), - wal_memory_used_bytes: WAL_MEMORY_USED_BYTES.clone(), - } - } -} - pub(super) fn report_wal_usage(wal_usage: ResourceUsage) { - INGEST_V2_METRICS - .wal_disk_used_bytes - .set(wal_usage.disk_used_bytes as f64); - quickwit_common::metrics::MEMORY_METRICS - .in_flight - .wal - .set(wal_usage.memory_allocated_bytes as f64); - INGEST_V2_METRICS - .wal_memory_used_bytes - .set(wal_usage.memory_used_bytes as f64); + WAL_DISK_USED_BYTES.set(wal_usage.disk_used_bytes as f64); + quickwit_common::metrics::IN_FLIGHT_WAL.set(wal_usage.memory_allocated_bytes as f64); + WAL_MEMORY_USED_BYTES.set(wal_usage.memory_used_bytes as f64); } - -pub(super) static INGEST_V2_METRICS: LazyLock = - LazyLock::new(IngestV2Metrics::default); diff --git a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs index 2f22daa3220..adf562f9bc1 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs @@ -18,7 +18,6 @@ use std::time::{Duration, Instant}; use bytesize::ByteSize; use futures::{Future, StreamExt}; use mrecordlog::error::CreateQueueError; -use quickwit_common::metrics::MEMORY_METRICS; use quickwit_common::{ServiceStream, rate_limited_warn}; use quickwit_metrics::GaugeGuard; use quickwit_proto::ingest::ingester::{ @@ -39,7 +38,6 @@ use super::models::IngesterShard; use super::mrecordlog_utils::check_enough_capacity; use super::state::IngesterState; use crate::ingest_v2::mrecordlog_utils::{AppendDocBatchError, append_non_empty_doc_batch}; -use crate::metrics::INGEST_METRICS; use crate::{estimate_size, with_lock_metrics}; pub(super) const SYN_REPLICATION_STREAM_CAPACITY: usize = 5; @@ -505,7 +503,8 @@ impl ReplicationTask { ))); } let request_size_bytes = replicate_request.num_bytes(); - let mut _gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.ingester_replicate); + let mut _gauge_guard = + GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_INGESTER_REPLICATE); _gauge_guard.increment(request_size_bytes as f64); self.current_replication_seqno += 1; @@ -668,12 +667,8 @@ impl ReplicationTask { .expect("replica shard should be initialized") .set_replication_position_inclusive(current_position_inclusive.clone(), now); - INGEST_METRICS - .replicated_num_bytes_total - .increment(batch_num_bytes); - INGEST_METRICS - .replicated_num_docs_total - .increment(batch_num_docs); + crate::metrics::REPLICATED_NUM_BYTES_TOTAL.increment(batch_num_bytes); + crate::metrics::REPLICATED_NUM_DOCS_TOTAL.increment(batch_num_docs); let replicate_success = ReplicateSuccess { subrequest_id: subrequest.subrequest_id, diff --git a/quickwit/quickwit-ingest/src/ingest_v2/router.rs b/quickwit/quickwit-ingest/src/ingest_v2/router.rs index f5b2b0b1a6a..b7abd2e3c3a 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/router.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/router.rs @@ -20,7 +20,6 @@ use std::time::Duration; use async_trait::async_trait; use futures::stream::FuturesUnordered; use futures::{Future, StreamExt}; -use quickwit_common::metrics::MEMORY_METRICS; use quickwit_common::pubsub::{EventBroker, EventSubscriber}; use quickwit_common::{rate_limited_error, rate_limited_warn}; use quickwit_metrics::{GaugeGuard, counter}; @@ -46,12 +45,10 @@ use super::debouncing::{ DebouncedGetOrCreateOpenShardsRequest, GetOrCreateOpenShardsRequestDebouncer, }; use super::ingester::PERSIST_REQUEST_TIMEOUT; -use super::metrics::IngestResultMetrics; use super::routing_table::RoutingTable; use super::workbench::IngestWorkbench; -use super::{IngesterPool, pending_subrequests}; +use super::{IngesterPool, metrics, pending_subrequests}; use crate::get_ingest_router_buffer_size; -use crate::ingest_v2::metrics::INGEST_V2_METRICS; /// Duration after which ingest requests time out with [`IngestV2Error::Timeout`]. fn ingest_request_timeout() -> Duration { @@ -373,7 +370,7 @@ impl IngestRouter { .routing_table .classify_az_locality(&ingester_node.node_id, &self.ingester_pool); counter!( - parent: &INGEST_V2_METRICS.ingest_attempts, + parent: &crate::ingest_v2::metrics::INGEST_ATTEMPTS, "az_routing" => az_locality, ) .increment(1); @@ -494,40 +491,37 @@ impl IngestRouter { fn update_ingest_metrics(ingest_result: &IngestV2Result, num_subrequests: usize) { let num_subrequests = num_subrequests as u64; - let ingest_results_metrics: &IngestResultMetrics = &INGEST_V2_METRICS.ingest_results; match ingest_result { Ok(ingest_response) => { - ingest_results_metrics - .success - .increment(ingest_response.successes.len() as u64); + metrics::INGEST_RESULT_SUCCESS.increment(ingest_response.successes.len() as u64); for ingest_failure in &ingest_response.failures { match ingest_failure.reason() { IngestFailureReason::CircuitBreaker => { - ingest_results_metrics.circuit_breaker.increment(1); + metrics::INGEST_RESULT_CIRCUIT_BREAKER.increment(1); } IngestFailureReason::Unspecified => { - ingest_results_metrics.unspecified.increment(1) + metrics::INGEST_RESULT_UNSPECIFIED.increment(1) } IngestFailureReason::IndexNotFound => { - ingest_results_metrics.index_not_found.increment(1) + metrics::INGEST_RESULT_INDEX_NOT_FOUND.increment(1) } IngestFailureReason::SourceNotFound => { - ingest_results_metrics.source_not_found.increment(1) + metrics::INGEST_RESULT_SOURCE_NOT_FOUND.increment(1) } - IngestFailureReason::Internal => ingest_results_metrics.internal.increment(1), + IngestFailureReason::Internal => metrics::INGEST_RESULT_INTERNAL.increment(1), IngestFailureReason::NoShardsAvailable => { - ingest_results_metrics.no_shards_available.increment(1) + metrics::INGEST_RESULT_NO_SHARDS_AVAILABLE.increment(1) } IngestFailureReason::ShardRateLimited => { - ingest_results_metrics.shard_rate_limited.increment(1) + metrics::INGEST_RESULT_SHARD_RATE_LIMITED.increment(1) } - IngestFailureReason::WalFull => ingest_results_metrics.wal_full.increment(1), - IngestFailureReason::Timeout => ingest_results_metrics.timeout.increment(1), + IngestFailureReason::WalFull => metrics::INGEST_RESULT_WAL_FULL.increment(1), + IngestFailureReason::Timeout => metrics::INGEST_RESULT_TIMEOUT.increment(1), IngestFailureReason::RouterLoadShedding => { - ingest_results_metrics.router_load_shedding.increment(1) + metrics::INGEST_RESULT_ROUTER_LOAD_SHEDDING.increment(1) } IngestFailureReason::LoadShedding => { - ingest_results_metrics.load_shedding.increment(1) + metrics::INGEST_RESULT_LOAD_SHEDDING.increment(1) } } } @@ -535,49 +529,35 @@ fn update_ingest_metrics(ingest_result: &IngestV2Result, num_s Err(ingest_error) => match ingest_error { IngestV2Error::TooManyRequests(rate_limiting_cause) => match rate_limiting_cause { RateLimitingCause::RouterLoadShedding => { - ingest_results_metrics - .router_load_shedding - .increment(num_subrequests); + metrics::INGEST_RESULT_ROUTER_LOAD_SHEDDING.increment(num_subrequests); + } + RateLimitingCause::LoadShedding => { + metrics::INGEST_RESULT_LOAD_SHEDDING.increment(num_subrequests) } - RateLimitingCause::LoadShedding => ingest_results_metrics - .load_shedding - .increment(num_subrequests), RateLimitingCause::WalFull => { - ingest_results_metrics.wal_full.increment(num_subrequests); + metrics::INGEST_RESULT_WAL_FULL.increment(num_subrequests); } RateLimitingCause::CircuitBreaker => { - ingest_results_metrics - .circuit_breaker - .increment(num_subrequests); + metrics::INGEST_RESULT_CIRCUIT_BREAKER.increment(num_subrequests); } RateLimitingCause::ShardRateLimiting => { - ingest_results_metrics - .shard_rate_limited - .increment(num_subrequests); + metrics::INGEST_RESULT_SHARD_RATE_LIMITED.increment(num_subrequests); } RateLimitingCause::Unknown => { - ingest_results_metrics - .unspecified - .increment(num_subrequests); + metrics::INGEST_RESULT_UNSPECIFIED.increment(num_subrequests); } }, IngestV2Error::Timeout(_) => { - ingest_results_metrics - .router_timeout - .increment(num_subrequests); + metrics::INGEST_RESULT_ROUTER_TIMEOUT.increment(num_subrequests); } IngestV2Error::ShardNotFound { .. } => { - ingest_results_metrics - .shard_not_found - .increment(num_subrequests); + metrics::INGEST_RESULT_SHARD_NOT_FOUND.increment(num_subrequests); } IngestV2Error::Unavailable(_) => { - ingest_results_metrics - .unavailable - .increment(num_subrequests); + metrics::INGEST_RESULT_UNAVAILABLE.increment(num_subrequests); } IngestV2Error::Internal(_) => { - ingest_results_metrics.internal.increment(num_subrequests); + metrics::INGEST_RESULT_INTERNAL.increment(num_subrequests); } }, } @@ -588,7 +568,8 @@ impl IngestRouterService for IngestRouter { async fn ingest(&self, ingest_request: IngestRequestV2) -> IngestV2Result { let request_size_bytes = ingest_request.num_bytes(); - let mut _gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.ingest_router); + let mut _gauge_guard = + GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_INGEST_ROUTER); _gauge_guard.increment(request_size_bytes as f64); let num_subrequests = ingest_request.subrequests.len(); diff --git a/quickwit/quickwit-ingest/src/lib.rs b/quickwit/quickwit-ingest/src/lib.rs index e8fcaa313fb..785e549d62e 100644 --- a/quickwit/quickwit-ingest/src/lib.rs +++ b/quickwit/quickwit-ingest/src/lib.rs @@ -110,8 +110,7 @@ macro_rules! with_lock_metrics { ($future:expr, $operation:expr, $kind:expr) => { { quickwit_metrics::gauge!( - parent: &$crate::ingest_v2::metrics::INGEST_V2_METRICS - .wal_acquire_lock_requests_in_flight, + parent: &$crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT, "operation" => $operation, "type" => $kind, ) @@ -128,15 +127,13 @@ macro_rules! with_lock_metrics { ); } quickwit_metrics::gauge!( - parent: &$crate::ingest_v2::metrics::INGEST_V2_METRICS - .wal_acquire_lock_requests_in_flight, + parent: &$crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT, "operation" => $operation, "type" => $kind, ) .decrement(1.0); quickwit_metrics::histogram!( - parent: &$crate::ingest_v2::metrics::INGEST_V2_METRICS - .wal_acquire_lock_request_duration_secs, + parent: &$crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUEST_DURATION_SECS, "operation" => $operation, "type" => $kind, ) diff --git a/quickwit/quickwit-ingest/src/metrics.rs b/quickwit/quickwit-ingest/src/metrics.rs index ed4e1b012a2..fbb63e47df2 100644 --- a/quickwit/quickwit-ingest/src/metrics.rs +++ b/quickwit/quickwit-ingest/src/metrics.rs @@ -16,17 +16,7 @@ use std::sync::LazyLock; use quickwit_metrics::{Counter, Gauge, counter, gauge}; -pub struct IngestMetrics { - pub docs_bytes_total: Counter, - pub docs_total: Counter, - - pub replicated_num_bytes_total: Counter, - pub replicated_num_docs_total: Counter, - #[allow(dead_code)] // this really shouldn't be dead, it needs to be used somewhere - pub queue_count: Gauge, -} - -static DOCS_BYTES_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static DOCS_BYTES_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "docs_bytes_total", description: "Total size of the docs ingested, measured in ingester's leader, after validation and before persistence/replication", @@ -34,7 +24,7 @@ static DOCS_BYTES_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static DOCS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static DOCS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "docs_total", description: "Total number of the docs ingested, measured in ingester's leader, after validation and before persistence/replication", @@ -42,7 +32,7 @@ static DOCS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static REPLICATED_NUM_BYTES_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static REPLICATED_NUM_BYTES_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "replicated_num_bytes_total", description: "Total size in bytes of the replicated docs.", @@ -50,7 +40,7 @@ static REPLICATED_NUM_BYTES_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static REPLICATED_NUM_DOCS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static REPLICATED_NUM_DOCS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "replicated_num_docs_total", description: "Total number of docs replicated.", @@ -58,24 +48,11 @@ static REPLICATED_NUM_DOCS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static QUEUE_COUNT: LazyLock = LazyLock::new(|| { +#[allow(dead_code)] // this really shouldn't be dead, it needs to be used somewhere +pub(crate) static QUEUE_COUNT: LazyLock = LazyLock::new(|| { gauge!( name: "queue_count", description: "Number of queues currently active", subsystem: "ingest", ) }); - -impl Default for IngestMetrics { - fn default() -> Self { - IngestMetrics { - docs_bytes_total: DOCS_BYTES_TOTAL.clone(), - docs_total: DOCS_TOTAL.clone(), - replicated_num_bytes_total: REPLICATED_NUM_BYTES_TOTAL.clone(), - replicated_num_docs_total: REPLICATED_NUM_DOCS_TOTAL.clone(), - queue_count: QUEUE_COUNT.clone(), - } - } -} - -pub static INGEST_METRICS: LazyLock = LazyLock::new(IngestMetrics::default); diff --git a/quickwit/quickwit-jaeger/src/lib.rs b/quickwit/quickwit-jaeger/src/lib.rs index e4b68943762..3e986e54af1 100644 --- a/quickwit/quickwit-jaeger/src/lib.rs +++ b/quickwit/quickwit-jaeger/src/lib.rs @@ -52,8 +52,6 @@ use tonic::Status; use tracing::field::Empty; use tracing::{Span as RuntimeSpan, debug, error, instrument, warn}; -pub(crate) use crate::metrics::JAEGER_SERVICE_METRICS; - mod metrics; mod v1; mod v2; @@ -417,7 +415,7 @@ impl JaegerService { current_span.record("num_bytes", num_bytes_total); counter!( - parent: &JAEGER_SERVICE_METRICS.fetched_traces_total, + parent: &crate::metrics::FETCHED_TRACES_TOTAL, "operation" => operation_name, "index" => OTEL_TRACES_INDEX_ID, ) @@ -425,7 +423,7 @@ impl JaegerService { let elapsed = request_start.elapsed().as_secs_f64(); histogram!( - parent: &JAEGER_SERVICE_METRICS.request_duration_seconds, + parent: &crate::metrics::REQUEST_DURATION_SECONDS, "operation" => operation_name, "index" => OTEL_TRACES_INDEX_ID, "error" => "false", @@ -438,7 +436,7 @@ impl JaegerService { pub(crate) fn record_error(operation_name: &'static str, request_start: Instant) { counter!( - parent: &JAEGER_SERVICE_METRICS.request_errors_total, + parent: &crate::metrics::REQUEST_ERRORS_TOTAL, "operation" => operation_name, "index" => OTEL_TRACES_INDEX_ID, ) @@ -446,7 +444,7 @@ pub(crate) fn record_error(operation_name: &'static str, request_start: Instant) let elapsed = request_start.elapsed().as_secs_f64(); histogram!( - parent: &JAEGER_SERVICE_METRICS.request_duration_seconds, + parent: &crate::metrics::REQUEST_DURATION_SECONDS, "operation" => operation_name, "index" => OTEL_TRACES_INDEX_ID, "error" => "true", @@ -456,13 +454,13 @@ pub(crate) fn record_error(operation_name: &'static str, request_start: Instant) pub(crate) fn record_send(operation_name: &'static str, num_spans: usize, num_bytes: usize) { counter!( - parent: &JAEGER_SERVICE_METRICS.fetched_spans_total, + parent: &crate::metrics::FETCHED_SPANS_TOTAL, "operation" => operation_name, "index" => OTEL_TRACES_INDEX_ID, ) .increment(num_spans as u64); counter!( - parent: &JAEGER_SERVICE_METRICS.transferred_bytes_total, + parent: &crate::metrics::TRANSFERRED_BYTES_TOTAL, "operation" => operation_name, "index" => OTEL_TRACES_INDEX_ID, ) diff --git a/quickwit/quickwit-jaeger/src/metrics.rs b/quickwit/quickwit-jaeger/src/metrics.rs index d986f3c98cc..e5065e60529 100644 --- a/quickwit/quickwit-jaeger/src/metrics.rs +++ b/quickwit/quickwit-jaeger/src/metrics.rs @@ -17,16 +17,7 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; use quickwit_metrics::{Counter, Histogram, counter, histogram}; -pub struct JaegerServiceMetrics { - pub requests_total: Counter, - pub request_errors_total: Counter, - pub request_duration_seconds: Histogram, - pub fetched_traces_total: Counter, - pub fetched_spans_total: Counter, - pub transferred_bytes_total: Counter, -} - -static REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "requests_total", description: "Number of requests", @@ -34,7 +25,7 @@ static REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static REQUEST_ERRORS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static REQUEST_ERRORS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "request_errors_total", description: "Number of failed requests", @@ -42,7 +33,7 @@ static REQUEST_ERRORS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { +pub(crate) static REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { histogram!( name: "request_duration_seconds", description: "Duration of requests", @@ -51,7 +42,7 @@ static REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { ) }); -static FETCHED_TRACES_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static FETCHED_TRACES_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "fetched_traces_total", description: "Number of traces retrieved from storage", @@ -59,7 +50,7 @@ static FETCHED_TRACES_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static FETCHED_SPANS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static FETCHED_SPANS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "fetched_spans_total", description: "Number of spans retrieved from storage", @@ -67,26 +58,10 @@ static FETCHED_SPANS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static TRANSFERRED_BYTES_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static TRANSFERRED_BYTES_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "transferred_bytes_total", description: "Number of bytes transferred", subsystem: "jaeger", ) }); - -impl Default for JaegerServiceMetrics { - fn default() -> Self { - Self { - requests_total: REQUESTS_TOTAL.clone(), - request_errors_total: REQUEST_ERRORS_TOTAL.clone(), - request_duration_seconds: REQUEST_DURATION_SECONDS.clone(), - fetched_traces_total: FETCHED_TRACES_TOTAL.clone(), - fetched_spans_total: FETCHED_SPANS_TOTAL.clone(), - transferred_bytes_total: TRANSFERRED_BYTES_TOTAL.clone(), - } - } -} - -pub static JAEGER_SERVICE_METRICS: LazyLock = - LazyLock::new(JaegerServiceMetrics::default); diff --git a/quickwit/quickwit-jaeger/src/v1.rs b/quickwit/quickwit-jaeger/src/v1.rs index 6a96b1aa38b..8e4708688bb 100644 --- a/quickwit/quickwit-jaeger/src/v1.rs +++ b/quickwit/quickwit-jaeger/src/v1.rs @@ -28,7 +28,6 @@ use quickwit_proto::jaeger::storage::v1::{ }; use tonic::{Request, Response, Status}; -use crate::metrics::JAEGER_SERVICE_METRICS; use crate::{JaegerService, SpanStream}; macro_rules! metrics { @@ -37,7 +36,7 @@ macro_rules! metrics { let operation = stringify!($operation); let index = $index; counter!( - parent: &JAEGER_SERVICE_METRICS.requests_total, + parent: &crate::metrics::REQUESTS_TOTAL, "operation" => operation, "index" => index, ) @@ -48,7 +47,7 @@ macro_rules! metrics { }, err @ Err(_) => { counter!( - parent: &JAEGER_SERVICE_METRICS.request_errors_total, + parent: &crate::metrics::REQUEST_ERRORS_TOTAL, "operation" => operation, "index" => index, ) @@ -58,7 +57,7 @@ macro_rules! metrics { }; let elapsed = start.elapsed().as_secs_f64(); histogram!( - parent: &JAEGER_SERVICE_METRICS.request_duration_seconds, + parent: &crate::metrics::REQUEST_DURATION_SECONDS, "operation" => operation, "index" => index, "error" => is_error, diff --git a/quickwit/quickwit-jaeger/src/v2.rs b/quickwit/quickwit-jaeger/src/v2.rs index c7905e35689..f62a58ea29f 100644 --- a/quickwit/quickwit-jaeger/src/v2.rs +++ b/quickwit/quickwit-jaeger/src/v2.rs @@ -51,7 +51,6 @@ use tonic::{Request, Response, Status}; use tracing::field::Empty; use tracing::{Span as RuntimeSpan, debug, error, instrument}; -use crate::metrics::JAEGER_SERVICE_METRICS; use crate::{ JaegerService, TimeIntervalSecs, TracesDataStream, get_operations_impl, get_services_impl, json_deserialize, record_error, record_send, to_duration_millis, @@ -63,7 +62,7 @@ macro_rules! metrics { let operation = stringify!($operation); let index = $index; counter!( - parent: &JAEGER_SERVICE_METRICS.requests_total, + parent: &crate::metrics::REQUESTS_TOTAL, "operation" => operation, "index" => index, ) @@ -74,7 +73,7 @@ macro_rules! metrics { }, err @ Err(_) => { counter!( - parent: &JAEGER_SERVICE_METRICS.request_errors_total, + parent: &crate::metrics::REQUEST_ERRORS_TOTAL, "operation" => operation, "index" => index, ) @@ -84,7 +83,7 @@ macro_rules! metrics { }; let elapsed = start.elapsed().as_secs_f64(); histogram!( - parent: &JAEGER_SERVICE_METRICS.request_duration_seconds, + parent: &crate::metrics::REQUEST_DURATION_SECONDS, "operation" => operation, "index" => index, "error" => is_error, @@ -444,7 +443,7 @@ async fn stream_otel_spans_impl( record_send(operation_name, num_spans, num_bytes); counter!( - parent: &JAEGER_SERVICE_METRICS.fetched_traces_total, + parent: &crate::metrics::FETCHED_TRACES_TOTAL, "operation" => operation_name, "index" => OTEL_TRACES_INDEX_ID, ) @@ -452,7 +451,7 @@ async fn stream_otel_spans_impl( let elapsed = request_start.elapsed().as_secs_f64(); histogram!( - parent: &JAEGER_SERVICE_METRICS.request_duration_seconds, + parent: &crate::metrics::REQUEST_DURATION_SECONDS, "operation" => operation_name, "index" => OTEL_TRACES_INDEX_ID, "error" => "false", diff --git a/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs b/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs index ea7ab7c631e..b25b06b4a68 100644 --- a/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs +++ b/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs @@ -38,8 +38,6 @@ use serde::Serialize; use tantivy::Inventory; use tracing::{debug, info}; -use crate::metrics::JANITOR_METRICS; - const PLANNER_REFRESH_INTERVAL: Duration = Duration::from_secs(60); const NUM_STALE_SPLITS_TO_FETCH: usize = 1000; @@ -209,7 +207,7 @@ impl DeleteTaskPlanner { quickwit_common::metrics::index_label(self.index_uid.index_id.as_str()) .to_string(); gauge!( - parent: &JANITOR_METRICS.ongoing_num_delete_operations_total, + parent: &crate::metrics::ONGOING_NUM_DELETE_OPERATIONS_TOTAL, "index" => index_label, ) .set(self.ongoing_delete_operations_inventory.list().len() as f64); diff --git a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs index 5aa86999ca9..9c3a9914815 100644 --- a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs +++ b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs @@ -32,8 +32,6 @@ use quickwit_storage::{Storage, StorageResolver}; use serde::Serialize; use tracing::{debug, error, info}; -use crate::metrics::JANITOR_METRICS; - const RUN_INTERVAL: Duration = Duration::from_secs(10 * 60); // 10 minutes /// Result of a GC run (tantivy or parquet). @@ -59,16 +57,16 @@ fn gc_metrics(split_type: &str) -> GcMetrics { let split_type = split_type.to_string(); GcMetrics { deleted_splits: counter!( - parent: &JANITOR_METRICS.gc_deleted_splits, + parent: &crate::metrics::GC_DELETED_SPLITS, "result" => "success", "split_type" => split_type.clone(), ), deleted_bytes: counter!( - parent: &JANITOR_METRICS.gc_deleted_bytes, + parent: &crate::metrics::GC_DELETED_BYTES, "split_type" => split_type.clone(), ), failed_splits: counter!( - parent: &JANITOR_METRICS.gc_deleted_splits, + parent: &crate::metrics::GC_DELETED_SPLITS, "result" => "error", "split_type" => split_type, ), @@ -207,7 +205,7 @@ impl GarbageCollector { let tantivy_run_duration = tantivy_start.elapsed().as_secs(); counter!( - parent: &JANITOR_METRICS.gc_seconds_total, + parent: &crate::metrics::GC_SECONDS_TOTAL, "split_type" => "tantivy", ) .increment(tantivy_run_duration); @@ -216,7 +214,7 @@ impl GarbageCollector { Ok(removal_info) => { self.counters.num_successful_gc_run += 1; counter!( - parent: &JANITOR_METRICS.gc_runs, + parent: &crate::metrics::GC_RUNS, "result" => "success", "split_type" => "tantivy", ) @@ -240,7 +238,7 @@ impl GarbageCollector { Err(error) => { self.counters.num_failed_gc_run += 1; counter!( - parent: &JANITOR_METRICS.gc_runs, + parent: &crate::metrics::GC_RUNS, "result" => "error", "split_type" => "tantivy", ) @@ -268,7 +266,7 @@ impl GarbageCollector { let parquet_run_duration = parquet_start.elapsed().as_secs(); counter!( - parent: &JANITOR_METRICS.gc_seconds_total, + parent: &crate::metrics::GC_SECONDS_TOTAL, "split_type" => "parquet", ) .increment(parquet_run_duration); @@ -277,7 +275,7 @@ impl GarbageCollector { Ok(removal_info) => { self.counters.num_successful_gc_run += 1; counter!( - parent: &JANITOR_METRICS.gc_runs, + parent: &crate::metrics::GC_RUNS, "result" => "success", "split_type" => "parquet", ) @@ -297,7 +295,7 @@ impl GarbageCollector { Err(error) => { self.counters.num_failed_gc_run += 1; counter!( - parent: &JANITOR_METRICS.gc_runs, + parent: &crate::metrics::GC_RUNS, "result" => "error", "split_type" => "parquet", ) diff --git a/quickwit/quickwit-janitor/src/metrics.rs b/quickwit/quickwit-janitor/src/metrics.rs index 713da1a937b..1e3adc48c83 100644 --- a/quickwit/quickwit-janitor/src/metrics.rs +++ b/quickwit/quickwit-janitor/src/metrics.rs @@ -16,15 +16,7 @@ use std::sync::LazyLock; use quickwit_metrics::{Counter, Gauge, counter, gauge}; -pub struct JanitorMetrics { - pub ongoing_num_delete_operations_total: Gauge, - pub gc_deleted_splits: Counter, - pub gc_deleted_bytes: Counter, - pub gc_runs: Counter, - pub gc_seconds_total: Counter, -} - -static ONGOING_NUM_DELETE_OPERATIONS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static ONGOING_NUM_DELETE_OPERATIONS_TOTAL: LazyLock = LazyLock::new(|| { gauge!( name: "ongoing_num_delete_operations_total", description: "Num of ongoing delete operations (per index).", @@ -32,7 +24,7 @@ static ONGOING_NUM_DELETE_OPERATIONS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static GC_DELETED_SPLITS: LazyLock = LazyLock::new(|| { +pub(crate) static GC_DELETED_SPLITS: LazyLock = LazyLock::new(|| { counter!( name: "gc_deleted_splits_total", description: "Total number of splits deleted by the garbage collector.", @@ -40,7 +32,7 @@ static GC_DELETED_SPLITS: LazyLock = LazyLock::new(|| { ) }); -static GC_DELETED_BYTES: LazyLock = LazyLock::new(|| { +pub(crate) static GC_DELETED_BYTES: LazyLock = LazyLock::new(|| { counter!( name: "gc_deleted_bytes_total", description: "Total number of bytes deleted by the garbage collector.", @@ -48,7 +40,7 @@ static GC_DELETED_BYTES: LazyLock = LazyLock::new(|| { ) }); -static GC_RUNS: LazyLock = LazyLock::new(|| { +pub(crate) static GC_RUNS: LazyLock = LazyLock::new(|| { counter!( name: "gc_runs_total", description: "Total number of garbage collector execition.", @@ -56,26 +48,10 @@ static GC_RUNS: LazyLock = LazyLock::new(|| { ) }); -static GC_SECONDS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static GC_SECONDS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "gc_seconds_total", description: "Total time spent running the garbage collector", subsystem: "quickwit_janitor", ) }); - -impl Default for JanitorMetrics { - fn default() -> Self { - JanitorMetrics { - ongoing_num_delete_operations_total: ONGOING_NUM_DELETE_OPERATIONS_TOTAL.clone(), - gc_deleted_splits: GC_DELETED_SPLITS.clone(), - gc_deleted_bytes: GC_DELETED_BYTES.clone(), - gc_runs: GC_RUNS.clone(), - gc_seconds_total: GC_SECONDS_TOTAL.clone(), - } - } -} - -/// `JANITOR_METRICS` exposes a bunch of related metrics through a prometheus -/// endpoint. -pub static JANITOR_METRICS: LazyLock = LazyLock::new(JanitorMetrics::default); diff --git a/quickwit/quickwit-lambda-client/src/invoker.rs b/quickwit/quickwit-lambda-client/src/invoker.rs index 8fb32b642d5..6d5d5720a69 100644 --- a/quickwit/quickwit-lambda-client/src/invoker.rs +++ b/quickwit/quickwit-lambda-client/src/invoker.rs @@ -30,8 +30,6 @@ use quickwit_proto::search::{LambdaSearchResponses, LambdaSingleSplitResult, Lea use quickwit_search::{LambdaLeafSearchInvoker, SearchError}; use tracing::{debug, info, instrument, warn}; -use crate::metrics::LAMBDA_METRICS; - /// Upper bound on the retry-after hint we will honor from Lambda rate-limit responses. const MAX_RETRY_AFTER: Duration = Duration::from_secs(10); @@ -173,12 +171,12 @@ impl LambdaLeafSearchInvoker for AwsLambdaInvoker { let elapsed = start.elapsed().as_secs_f64(); let status = if result.is_ok() { "success" } else { "error" }; counter!( - parent: &LAMBDA_METRICS.leaf_search_requests_total, + parent: &crate::metrics::LEAF_SEARCH_REQUESTS_TOTAL, "status" => status, ) .increment(1); histogram!( - parent: &LAMBDA_METRICS.leaf_search_duration_seconds, + parent: &crate::metrics::LEAF_SEARCH_DURATION_SECONDS, "status" => status, ) .record(elapsed); @@ -235,9 +233,7 @@ impl AwsLambdaInvoker { let payload_json = serde_json::to_vec(&payload) .map_err(|e| SearchError::Internal(format!("JSON serialization error: {}", e)))?; - LAMBDA_METRICS - .leaf_search_request_payload_size_bytes - .record(payload_json.len() as f64); + crate::metrics::LEAF_SEARCH_REQUEST_PAYLOAD_SIZE_BYTES.record(payload_json.len() as f64); debug!( payload_size = payload_json.len(), @@ -277,8 +273,7 @@ impl AwsLambdaInvoker { .payload() .ok_or_else(|| SearchError::Internal("no response payload from Lambda".into()))?; - LAMBDA_METRICS - .leaf_search_response_payload_size_bytes + crate::metrics::LEAF_SEARCH_RESPONSE_PAYLOAD_SIZE_BYTES .record(response_payload.as_ref().len() as f64); let lambda_response: LambdaSearchResponsePayload = diff --git a/quickwit/quickwit-lambda-client/src/lib.rs b/quickwit/quickwit-lambda-client/src/lib.rs index aebf264df8c..70163f06e84 100644 --- a/quickwit/quickwit-lambda-client/src/lib.rs +++ b/quickwit/quickwit-lambda-client/src/lib.rs @@ -32,6 +32,5 @@ mod invoker; mod metrics; pub use deploy::try_get_or_deploy_invoker; -pub use metrics::LAMBDA_METRICS; // Re-export payload types from server crate for convenience pub use quickwit_lambda_server::{LambdaSearchRequestPayload, LambdaSearchResponsePayload}; diff --git a/quickwit/quickwit-lambda-client/src/metrics.rs b/quickwit/quickwit-lambda-client/src/metrics.rs index 3bab535d813..75e56577816 100644 --- a/quickwit/quickwit-lambda-client/src/metrics.rs +++ b/quickwit/quickwit-lambda-client/src/metrics.rs @@ -29,14 +29,7 @@ fn payload_size_buckets() -> Vec { exponential_buckets(1024.0, 4.0, 8).unwrap() } -pub struct LambdaMetrics { - pub leaf_search_requests_total: Counter, - pub leaf_search_duration_seconds: Histogram, - pub leaf_search_request_payload_size_bytes: Histogram, - pub leaf_search_response_payload_size_bytes: Histogram, -} - -static LEAF_SEARCH_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static LEAF_SEARCH_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "leaf_search_requests_total", description: "Total number of Lambda leaf search invocations.", @@ -44,7 +37,7 @@ static LEAF_SEARCH_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static LEAF_SEARCH_DURATION_SECONDS: LazyLock = LazyLock::new(|| { +pub(crate) static LEAF_SEARCH_DURATION_SECONDS: LazyLock = LazyLock::new(|| { histogram!( name: "leaf_search_duration_seconds", description: "Duration of Lambda leaf search invocations in seconds.", @@ -53,34 +46,22 @@ static LEAF_SEARCH_DURATION_SECONDS: LazyLock = LazyLock::new(|| { ) }); -static LEAF_SEARCH_REQUEST_PAYLOAD_SIZE_BYTES: LazyLock = LazyLock::new(|| { - histogram!( - name: "leaf_search_request_payload_size_bytes", - description: "Size of the request payload sent to Lambda in bytes.", - subsystem: "lambda", - buckets: payload_size_buckets(), - ) -}); - -static LEAF_SEARCH_RESPONSE_PAYLOAD_SIZE_BYTES: LazyLock = LazyLock::new(|| { - histogram!( - name: "leaf_search_response_payload_size_bytes", - description: "Size of the response payload received from Lambda in bytes.", - subsystem: "lambda", - buckets: payload_size_buckets(), - ) -}); - -impl Default for LambdaMetrics { - fn default() -> Self { - LambdaMetrics { - leaf_search_requests_total: LEAF_SEARCH_REQUESTS_TOTAL.clone(), - leaf_search_duration_seconds: LEAF_SEARCH_DURATION_SECONDS.clone(), - leaf_search_request_payload_size_bytes: LEAF_SEARCH_REQUEST_PAYLOAD_SIZE_BYTES.clone(), - leaf_search_response_payload_size_bytes: LEAF_SEARCH_RESPONSE_PAYLOAD_SIZE_BYTES - .clone(), - } - } -} +pub(crate) static LEAF_SEARCH_REQUEST_PAYLOAD_SIZE_BYTES: LazyLock = + LazyLock::new(|| { + histogram!( + name: "leaf_search_request_payload_size_bytes", + description: "Size of the request payload sent to Lambda in bytes.", + subsystem: "lambda", + buckets: payload_size_buckets(), + ) + }); -pub static LAMBDA_METRICS: LazyLock = LazyLock::new(LambdaMetrics::default); +pub(crate) static LEAF_SEARCH_RESPONSE_PAYLOAD_SIZE_BYTES: LazyLock = + LazyLock::new(|| { + histogram!( + name: "leaf_search_response_payload_size_bytes", + description: "Size of the response payload received from Lambda in bytes.", + subsystem: "lambda", + buckets: payload_size_buckets(), + ) + }); diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/metrics.rs b/quickwit/quickwit-metastore/src/metastore/postgres/metrics.rs index 7f0a6feec04..b45d005366a 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/metrics.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/metrics.rs @@ -16,14 +16,7 @@ use std::sync::LazyLock; use quickwit_metrics::{Gauge, gauge}; -#[derive(Clone)] -pub(super) struct PostgresMetrics { - pub acquire_connections: Gauge, - pub active_connections: Gauge, - pub idle_connections: Gauge, -} - -static ACQUIRE_CONNECTIONS: LazyLock = LazyLock::new(|| { +pub(super) static ACQUIRE_CONNECTIONS: LazyLock = LazyLock::new(|| { gauge!( name: "acquire_connections", description: "Number of connections being acquired.", @@ -31,7 +24,7 @@ static ACQUIRE_CONNECTIONS: LazyLock = LazyLock::new(|| { ) }); -static ACTIVE_CONNECTIONS: LazyLock = LazyLock::new(|| { +pub(super) static ACTIVE_CONNECTIONS: LazyLock = LazyLock::new(|| { gauge!( name: "active_connections", description: "Number of active (used + idle) connections.", @@ -39,23 +32,10 @@ static ACTIVE_CONNECTIONS: LazyLock = LazyLock::new(|| { ) }); -static IDLE_CONNECTIONS: LazyLock = LazyLock::new(|| { +pub(super) static IDLE_CONNECTIONS: LazyLock = LazyLock::new(|| { gauge!( name: "idle_connections", description: "Number of idle connections.", subsystem: "metastore", ) }); - -impl Default for PostgresMetrics { - fn default() -> Self { - Self { - acquire_connections: ACQUIRE_CONNECTIONS.clone(), - active_connections: ACTIVE_CONNECTIONS.clone(), - idle_connections: IDLE_CONNECTIONS.clone(), - } - } -} - -pub(super) static POSTGRES_METRICS: LazyLock = - LazyLock::new(PostgresMetrics::default); diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs b/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs index 455f7e0e848..909154b89a6 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs @@ -21,8 +21,6 @@ use sqlx::{ Acquire, Database, Describe, Either, Error, Execute, Executor, Pool, Postgres, Transaction, }; -use super::metrics::POSTGRES_METRICS; - #[derive(Debug)] pub(super) struct TrackedPool { inner_pool: Pool, @@ -50,15 +48,11 @@ impl<'a, DB: Database> Acquire<'a> for &TrackedPool { fn acquire(self) -> BoxFuture<'static, Result> { let acquire_conn_fut = self.inner_pool.acquire(); - POSTGRES_METRICS - .active_connections - .set(self.inner_pool.size() as f64); - POSTGRES_METRICS - .idle_connections - .set(self.inner_pool.num_idle() as f64); + super::metrics::ACTIVE_CONNECTIONS.set(self.inner_pool.size() as f64); + super::metrics::IDLE_CONNECTIONS.set(self.inner_pool.num_idle() as f64); Box::pin(async move { - let mut _gauge_guard = GaugeGuard::from_gauge(&POSTGRES_METRICS.acquire_connections); + let mut _gauge_guard = GaugeGuard::from_gauge(&super::metrics::ACQUIRE_CONNECTIONS); _gauge_guard.increment(1.0); let conn = acquire_conn_fut.await?; diff --git a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs index 1e26f8eb650..9facb3ff0ff 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs @@ -40,7 +40,6 @@ use super::{ extract_otel_index_id_from_metadata, ingest_doc_batch_v2, is_zero, parse_log_record_body, }; use crate::otlp::extract_attributes; -use crate::otlp::metrics::OTLP_SERVICE_METRICS; pub const OTEL_LOGS_INDEX_ID: &str = "otel-logs-v0_9"; @@ -241,7 +240,7 @@ impl OtlpGrpcLogsService { self.store_logs(index_id.clone(), doc_batch).await?; counter!( - parent: &OTLP_SERVICE_METRICS.ingested_log_records_total, + parent: &crate::otlp::metrics::INGESTED_LOG_RECORDS_TOTAL, "service" => "logs", "index" => index_id.clone(), "transport" => "grpc", @@ -249,7 +248,7 @@ impl OtlpGrpcLogsService { ) .increment(num_log_records); counter!( - parent: &OTLP_SERVICE_METRICS.ingested_bytes_total, + parent: &crate::otlp::metrics::INGESTED_BYTES_TOTAL, "service" => "logs", "index" => index_id, "transport" => "grpc", @@ -327,7 +326,7 @@ impl OtlpGrpcLogsService { let start = std::time::Instant::now(); counter!( - parent: &OTLP_SERVICE_METRICS.requests_total, + parent: &crate::otlp::metrics::REQUESTS_TOTAL, "service" => "logs", "index" => index_id.clone(), "transport" => "grpc", @@ -338,7 +337,7 @@ impl OtlpGrpcLogsService { ok @ Ok(_) => (ok, "false"), err @ Err(_) => { counter!( - parent: &OTLP_SERVICE_METRICS.request_errors_total, + parent: &crate::otlp::metrics::REQUEST_ERRORS_TOTAL, "service" => "logs", "index" => index_id.clone(), "transport" => "grpc", @@ -350,7 +349,7 @@ impl OtlpGrpcLogsService { }; let elapsed = start.elapsed().as_secs_f64(); histogram!( - parent: &OTLP_SERVICE_METRICS.request_duration_seconds, + parent: &crate::otlp::metrics::REQUEST_DURATION_SECONDS, "service" => "logs", "index" => index_id, "transport" => "grpc", diff --git a/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs index 9cff954dedc..92c34755bea 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs @@ -17,17 +17,7 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; use quickwit_metrics::{Counter, Histogram, counter, histogram}; -pub struct OtlpServiceMetrics { - pub requests_total: Counter, - pub request_errors_total: Counter, - pub request_duration_seconds: Histogram, - pub ingested_log_records_total: Counter, - pub ingested_spans_total: Counter, - pub ingested_data_points_total: Counter, - pub ingested_bytes_total: Counter, -} - -static REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "requests_total", description: "Number of requests", @@ -35,7 +25,7 @@ static REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static REQUEST_ERRORS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static REQUEST_ERRORS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "request_errors_total", description: "Number of failed requests", @@ -43,7 +33,7 @@ static REQUEST_ERRORS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { +pub(crate) static REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { histogram!( name: "request_duration_seconds", description: "Duration of requests", @@ -52,7 +42,7 @@ static REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { ) }); -static INGESTED_LOG_RECORDS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static INGESTED_LOG_RECORDS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "ingested_log_records_total", description: "Number of log records ingested", @@ -60,7 +50,7 @@ static INGESTED_LOG_RECORDS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static INGESTED_SPANS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static INGESTED_SPANS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "ingested_spans_total", description: "Number of spans ingested", @@ -68,7 +58,7 @@ static INGESTED_SPANS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static INGESTED_DATA_POINTS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static INGESTED_DATA_POINTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "ingested_data_points_total", description: "Number of metric data points ingested", @@ -76,28 +66,10 @@ static INGESTED_DATA_POINTS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static INGESTED_BYTES_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static INGESTED_BYTES_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "ingested_bytes_total", description: "Number of bytes ingested", subsystem: "otlp", ) }); - -impl Default for OtlpServiceMetrics { - fn default() -> Self { - Self { - requests_total: REQUESTS_TOTAL.clone(), - request_errors_total: REQUEST_ERRORS_TOTAL.clone(), - request_duration_seconds: REQUEST_DURATION_SECONDS.clone(), - ingested_log_records_total: INGESTED_LOG_RECORDS_TOTAL.clone(), - ingested_spans_total: INGESTED_SPANS_TOTAL.clone(), - ingested_data_points_total: INGESTED_DATA_POINTS_TOTAL.clone(), - ingested_bytes_total: INGESTED_BYTES_TOTAL.clone(), - } - } -} - -/// `OTLP_SERVICE_METRICS` exposes metrics for each OTLP service. -pub static OTLP_SERVICE_METRICS: LazyLock = - LazyLock::new(OtlpServiceMetrics::default); diff --git a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs index b88d08aff94..3e6fc83c173 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs @@ -39,7 +39,6 @@ use tracing::{Span as RuntimeSpan, error, instrument, warn}; use super::arrow_metrics::{ArrowDocBatchV2Builder, ArrowMetricsBatchBuilder}; use super::{OtelSignal, extract_otel_index_id_from_metadata, ingest_doc_batch_v2}; use crate::otlp::extract_attributes; -use crate::otlp::metrics::OTLP_SERVICE_METRICS; pub const OTEL_METRICS_INDEX_ID: &str = "otel-metrics-v0_9"; @@ -237,7 +236,7 @@ impl OtlpGrpcMetricsService { self.store_metrics(index_id.clone(), doc_batch).await?; counter!( - parent: &OTLP_SERVICE_METRICS.ingested_data_points_total, + parent: &crate::otlp::metrics::INGESTED_DATA_POINTS_TOTAL, "service" => "metrics", "index" => index_id.clone(), "transport" => "grpc", @@ -245,7 +244,7 @@ impl OtlpGrpcMetricsService { ) .increment(num_data_points - num_parse_errors); counter!( - parent: &OTLP_SERVICE_METRICS.ingested_bytes_total, + parent: &crate::otlp::metrics::INGESTED_BYTES_TOTAL, "service" => "metrics", "index" => index_id, "transport" => "grpc", @@ -341,7 +340,7 @@ impl OtlpGrpcMetricsService { let start = std::time::Instant::now(); counter!( - parent: &OTLP_SERVICE_METRICS.requests_total, + parent: &crate::otlp::metrics::REQUESTS_TOTAL, "service" => "metrics", "index" => index_id.clone(), "transport" => "grpc", @@ -353,7 +352,7 @@ impl OtlpGrpcMetricsService { ok @ Ok(_) => (ok, "false"), err @ Err(_) => { counter!( - parent: &OTLP_SERVICE_METRICS.request_errors_total, + parent: &crate::otlp::metrics::REQUEST_ERRORS_TOTAL, "service" => "metrics", "index" => index_id.clone(), "transport" => "grpc", @@ -366,7 +365,7 @@ impl OtlpGrpcMetricsService { let elapsed = start.elapsed().as_secs_f64(); histogram!( - parent: &OTLP_SERVICE_METRICS.request_duration_seconds, + parent: &crate::otlp::metrics::REQUEST_DURATION_SECONDS, "service" => "metrics", "index" => index_id, "transport" => "grpc", diff --git a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs index 964e69777d7..d1f9be51a4e 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs @@ -45,7 +45,6 @@ use super::{ OtelSignal, TryFromSpanIdError, TryFromTraceIdError, extract_otel_index_id_from_metadata, ingest_doc_batch_v2, is_zero, }; -use crate::otlp::metrics::OTLP_SERVICE_METRICS; use crate::otlp::{SpanId, TraceId, extract_attributes}; pub const OTEL_TRACES_INDEX_ID: &str = "otel-traces-v0_9"; @@ -703,7 +702,7 @@ impl OtlpGrpcTracesService { self.store_spans(index_id.clone(), doc_batch).await?; counter!( - parent: &OTLP_SERVICE_METRICS.ingested_spans_total, + parent: &crate::otlp::metrics::INGESTED_SPANS_TOTAL, "service" => "trace", "index" => index_id.clone(), "transport" => "grpc", @@ -711,7 +710,7 @@ impl OtlpGrpcTracesService { ) .increment(num_spans); counter!( - parent: &OTLP_SERVICE_METRICS.ingested_bytes_total, + parent: &crate::otlp::metrics::INGESTED_BYTES_TOTAL, "service" => "trace", "index" => index_id, "transport" => "grpc", @@ -789,7 +788,7 @@ impl OtlpGrpcTracesService { let start = std::time::Instant::now(); counter!( - parent: &OTLP_SERVICE_METRICS.requests_total, + parent: &crate::otlp::metrics::REQUESTS_TOTAL, "service" => "trace", "index" => index_id.clone(), "transport" => "grpc", @@ -800,7 +799,7 @@ impl OtlpGrpcTracesService { ok @ Ok(_) => (ok, "false"), err @ Err(_) => { counter!( - parent: &OTLP_SERVICE_METRICS.request_errors_total, + parent: &crate::otlp::metrics::REQUEST_ERRORS_TOTAL, "service" => "trace", "index" => index_id.clone(), "transport" => "grpc", @@ -812,7 +811,7 @@ impl OtlpGrpcTracesService { }; let elapsed = start.elapsed().as_secs_f64(); histogram!( - parent: &OTLP_SERVICE_METRICS.request_duration_seconds, + parent: &crate::otlp::metrics::REQUEST_DURATION_SECONDS, "service" => "trace", "index" => index_id, "transport" => "grpc", diff --git a/quickwit/quickwit-parquet-engine/src/index/accumulator.rs b/quickwit/quickwit-parquet-engine/src/index/accumulator.rs index fb02f5b1c4d..1c97bf894a2 100644 --- a/quickwit/quickwit-parquet-engine/src/index/accumulator.rs +++ b/quickwit/quickwit-parquet-engine/src/index/accumulator.rs @@ -25,7 +25,6 @@ use arrow::record_batch::RecordBatch; use tracing::{debug, info}; use super::config::ParquetIndexingConfig; -use crate::metrics::PARQUET_ENGINE_METRICS; /// Error type for index operations. #[derive(Debug, thiserror::Error)] @@ -89,10 +88,8 @@ impl ParquetBatchAccumulator { let batch_bytes = estimate_batch_bytes(&batch); // Record index metrics - PARQUET_ENGINE_METRICS.index_batches_total.increment(1); - PARQUET_ENGINE_METRICS - .index_rows_total - .increment(batch_rows as u64); + crate::metrics::INDEX_BATCHES_TOTAL.increment(1); + crate::metrics::INDEX_ROWS_TOTAL.increment(batch_rows as u64); // Merge fields into union schema before pushing (we need the schema reference) for field in batch.schema().fields() { @@ -127,9 +124,7 @@ impl ParquetBatchAccumulator { }; // Record batch processing duration - PARQUET_ENGINE_METRICS - .index_batch_duration_seconds - .record(start.elapsed().as_secs_f64()); + crate::metrics::INDEX_BATCH_DURATION_SECONDS.record(start.elapsed().as_secs_f64()); Ok(flushed) } diff --git a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs index e23c894ed60..f028e6b2e06 100644 --- a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs +++ b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs @@ -21,7 +21,6 @@ use arrow::record_batch::RecordBatch; use quickwit_metrics::counter; use tracing::{debug, instrument, warn}; -use crate::metrics::PARQUET_ENGINE_METRICS; use crate::schema::validate_required_fields; /// Error type for ingest operations. @@ -65,7 +64,7 @@ impl ParquetIngestProcessor { pub fn process_ipc(&self, ipc_bytes: &[u8]) -> Result { // Record bytes ingested counter!( - parent: &PARQUET_ENGINE_METRICS.ingest_bytes_total, + parent: &crate::metrics::INGEST_BYTES_TOTAL, "kind" => "points", ) .increment(ipc_bytes.len() as u64); @@ -74,7 +73,7 @@ impl ParquetIngestProcessor { Ok(batch) => batch, Err(e) => { counter!( - parent: &PARQUET_ENGINE_METRICS.errors_total, + parent: &crate::metrics::ERRORS_TOTAL, "operation" => "ingest", "kind" => "points", ) @@ -85,7 +84,7 @@ impl ParquetIngestProcessor { if let Err(e) = self.validate_schema(&batch) { counter!( - parent: &PARQUET_ENGINE_METRICS.errors_total, + parent: &crate::metrics::ERRORS_TOTAL, "operation" => "ingest", "kind" => "points", ) diff --git a/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs b/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs index ec599001051..8913200d474 100644 --- a/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs +++ b/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs @@ -20,7 +20,6 @@ use quickwit_metrics::counter; use tracing::{debug, instrument, warn}; use super::processor::IngestError; -use crate::metrics::PARQUET_ENGINE_METRICS; use crate::schema::validate_required_sketch_fields; /// Processor that converts Arrow IPC bytes to RecordBatch for DDSketch data. @@ -43,7 +42,7 @@ impl SketchParquetIngestProcessor { #[instrument(skip(self, ipc_bytes), fields(bytes_len = ipc_bytes.len()))] pub fn process_ipc(&self, ipc_bytes: &[u8]) -> Result { counter!( - parent: &PARQUET_ENGINE_METRICS.ingest_bytes_total, + parent: &crate::metrics::INGEST_BYTES_TOTAL, "kind" => "sketches", ) .increment(ipc_bytes.len() as u64); @@ -52,7 +51,7 @@ impl SketchParquetIngestProcessor { Ok(batch) => batch, Err(err) => { counter!( - parent: &PARQUET_ENGINE_METRICS.errors_total, + parent: &crate::metrics::ERRORS_TOTAL, "operation" => "ingest", "kind" => "sketches", ) @@ -63,7 +62,7 @@ impl SketchParquetIngestProcessor { if let Err(err) = self.validate_schema(&batch) { counter!( - parent: &PARQUET_ENGINE_METRICS.errors_total, + parent: &crate::metrics::ERRORS_TOTAL, "operation" => "ingest", "kind" => "sketches", ) @@ -73,7 +72,7 @@ impl SketchParquetIngestProcessor { if let Err(err) = self.validate_sketch_arrays(&batch) { counter!( - parent: &PARQUET_ENGINE_METRICS.errors_total, + parent: &crate::metrics::ERRORS_TOTAL, "operation" => "ingest", "kind" => "sketches", ) diff --git a/quickwit/quickwit-parquet-engine/src/metrics.rs b/quickwit/quickwit-parquet-engine/src/metrics.rs index d2ce2f37d45..dc9897658ba 100644 --- a/quickwit/quickwit-parquet-engine/src/metrics.rs +++ b/quickwit/quickwit-parquet-engine/src/metrics.rs @@ -29,31 +29,7 @@ fn duration_buckets() -> Vec { ] } -/// Metrics for the Pomsky Parquet Engine. -#[derive(Clone)] -pub struct ParquetEngineMetrics { - /// Total number of batches accumulated during indexing. - pub index_batches_total: Counter, - /// Total number of rows accumulated during indexing. - pub index_rows_total: Counter, - /// Total number of bytes received from IPC payloads during ingestion, by kind - /// (points/sketches). - pub ingest_bytes_total: Counter, - /// Histogram of add_batch durations (seconds), including any triggered flush. - pub index_batch_duration_seconds: Histogram, - /// Total number of splits written to storage. - pub splits_written_total: Counter, - /// Total bytes written to split files. - pub splits_bytes_written: Counter, - /// Histogram of query execution durations (seconds). - pub query_duration_seconds: Histogram, - /// Total number of rows returned from queries. - pub query_rows_returned: Counter, - /// Errors by operation type and kind (points/sketches). - pub errors_total: Counter, -} - -static INDEX_BATCHES_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static INDEX_BATCHES_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "index_batches_total", description: "Total number of batches accumulated during indexing.", @@ -61,7 +37,7 @@ static INDEX_BATCHES_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static INDEX_ROWS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static INDEX_ROWS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "index_rows_total", description: "Total number of rows accumulated during indexing.", @@ -69,7 +45,7 @@ static INDEX_ROWS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static INGEST_BYTES_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static INGEST_BYTES_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "ingest_bytes_total", description: "Total number of bytes received from IPC payloads during ingestion.", @@ -77,7 +53,7 @@ static INGEST_BYTES_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static INDEX_BATCH_DURATION_SECONDS: LazyLock = LazyLock::new(|| { +pub(crate) static INDEX_BATCH_DURATION_SECONDS: LazyLock = LazyLock::new(|| { histogram!( name: "index_batch_duration_seconds", description: "Histogram of add_batch durations in seconds, including any triggered flush.", @@ -86,7 +62,8 @@ static INDEX_BATCH_DURATION_SECONDS: LazyLock = LazyLock::new(|| { ) }); -static SPLITS_WRITTEN_TOTAL: LazyLock = LazyLock::new(|| { +#[allow(dead_code)] +pub(crate) static SPLITS_WRITTEN_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "splits_written_total", description: "Total number of splits written to storage.", @@ -94,7 +71,8 @@ static SPLITS_WRITTEN_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static SPLITS_BYTES_WRITTEN: LazyLock = LazyLock::new(|| { +#[allow(dead_code)] +pub(crate) static SPLITS_BYTES_WRITTEN: LazyLock = LazyLock::new(|| { counter!( name: "splits_bytes_written", description: "Total bytes written to split files.", @@ -102,7 +80,8 @@ static SPLITS_BYTES_WRITTEN: LazyLock = LazyLock::new(|| { ) }); -static QUERY_DURATION_SECONDS: LazyLock = LazyLock::new(|| { +#[allow(dead_code)] +pub(crate) static QUERY_DURATION_SECONDS: LazyLock = LazyLock::new(|| { histogram!( name: "query_duration_seconds", description: "Histogram of query execution durations in seconds.", @@ -111,7 +90,8 @@ static QUERY_DURATION_SECONDS: LazyLock = LazyLock::new(|| { ) }); -static QUERY_ROWS_RETURNED: LazyLock = LazyLock::new(|| { +#[allow(dead_code)] +pub(crate) static QUERY_ROWS_RETURNED: LazyLock = LazyLock::new(|| { counter!( name: "query_rows_returned", description: "Total number of rows returned from queries.", @@ -119,30 +99,10 @@ static QUERY_ROWS_RETURNED: LazyLock = LazyLock::new(|| { ) }); -static ERRORS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static ERRORS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "errors_total", description: "Total errors by operation type and kind.", subsystem: "metrics_engine", ) }); - -impl Default for ParquetEngineMetrics { - fn default() -> Self { - Self { - index_batches_total: INDEX_BATCHES_TOTAL.clone(), - index_rows_total: INDEX_ROWS_TOTAL.clone(), - ingest_bytes_total: INGEST_BYTES_TOTAL.clone(), - index_batch_duration_seconds: INDEX_BATCH_DURATION_SECONDS.clone(), - splits_written_total: SPLITS_WRITTEN_TOTAL.clone(), - splits_bytes_written: SPLITS_BYTES_WRITTEN.clone(), - query_duration_seconds: QUERY_DURATION_SECONDS.clone(), - query_rows_returned: QUERY_ROWS_RETURNED.clone(), - errors_total: ERRORS_TOTAL.clone(), - } - } -} - -/// Global metrics instance for the metrics engine. -pub static PARQUET_ENGINE_METRICS: LazyLock = - LazyLock::new(ParquetEngineMetrics::default); diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index 5d07c45c951..38fb94793e0 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -529,7 +529,7 @@ async fn leaf_search_single_split( let split_id = split.split_id.to_string(); let byte_range_cache = - ByteRangeCache::with_infinite_capacity(&quickwit_storage::STORAGE_METRICS.shortlived_cache); + ByteRangeCache::with_infinite_capacity(&quickwit_storage::SHORTLIVED_CACHE); let (index, hot_directory) = open_index_with_caches( &ctx.searcher_context, storage, @@ -591,9 +591,7 @@ async fn leaf_search_single_split( "current leaf search is consuming more memory than the initial allocation" ); } - crate::SEARCH_METRICS - .leaf_search_single_split_warmup_num_bytes - .record(warmup_size.as_u64() as f64); + crate::metrics::LEAF_SEARCH_SINGLE_SPLIT_WARMUP_NUM_BYTES.record(warmup_size.as_u64() as f64); search_permit.update_memory_usage(warmup_size); search_permit.free_warmup_slot(); @@ -1822,7 +1820,7 @@ impl SplitSearchState { impl Drop for SplitSearchStateGuard { fn drop(&mut self) { self.state - .increment(&crate::metrics::SEARCH_METRICS.split_search_outcome_total); + .increment(&crate::metrics::SPLIT_SEARCH_OUTCOME_TOTAL); self.state .increment(&self.local_split_search_outcome_counters); } @@ -1863,9 +1861,7 @@ async fn leaf_search_single_split_wrapper( split: SplitIdAndFooterOffsets, mut search_permit: SearchPermit, ) { - let timer = crate::SEARCH_METRICS - .leaf_search_split_duration_secs - .start_timer(); + let timer = crate::metrics::LEAF_SEARCH_SPLIT_DURATION_SECS.start_timer(); let leaf_search_single_split_opt_res: crate::Result> = leaf_search_single_split( request, diff --git a/quickwit/quickwit-search/src/leaf_cache.rs b/quickwit/quickwit-search/src/leaf_cache.rs index cc4d27e464e..3b492c1342a 100644 --- a/quickwit/quickwit-search/src/leaf_cache.rs +++ b/quickwit/quickwit-search/src/leaf_cache.rs @@ -48,7 +48,7 @@ impl LeafSearchCache { LeafSearchCache { content: MemorySizedCache::from_config( config, - &quickwit_storage::STORAGE_METRICS.partial_request_cache, + &quickwit_storage::PARTIAL_REQUEST_CACHE, ), } } @@ -195,10 +195,7 @@ pub struct PredicateCacheImpl { impl PredicateCacheImpl { pub fn new(config: &CacheConfig) -> Self { PredicateCacheImpl { - content: MemorySizedCache::from_config( - config, - &quickwit_storage::STORAGE_METRICS.predicate_cache, - ), + content: MemorySizedCache::from_config(config, &quickwit_storage::PREDICATE_CACHE), } } } diff --git a/quickwit/quickwit-search/src/lib.rs b/quickwit/quickwit-search/src/lib.rs index f7989efc3ab..7efa21e22fa 100644 --- a/quickwit/quickwit-search/src/lib.rs +++ b/quickwit/quickwit-search/src/lib.rs @@ -47,7 +47,6 @@ mod search_permit_provider; mod tests; pub use collector::QuickwitAggregations; -use metrics::SEARCH_METRICS; use quickwit_common::thread_pool::ThreadPool; use quickwit_common::tower::Pool; use quickwit_doc_mapper::DocMapper; diff --git a/quickwit/quickwit-search/src/list_fields_cache.rs b/quickwit/quickwit-search/src/list_fields_cache.rs index 531e1015e4e..6066d6cb9f6 100644 --- a/quickwit/quickwit-search/src/list_fields_cache.rs +++ b/quickwit/quickwit-search/src/list_fields_cache.rs @@ -31,7 +31,7 @@ impl ListFieldsCache { ListFieldsCache { content: MemorySizedCache::from_config( config, - &quickwit_storage::STORAGE_METRICS.partial_request_cache, + &quickwit_storage::PARTIAL_REQUEST_CACHE, ), } } diff --git a/quickwit/quickwit-search/src/list_terms.rs b/quickwit/quickwit-search/src/list_terms.rs index 0b6bb51c88f..b3d6ef8bb6a 100644 --- a/quickwit/quickwit-search/src/list_terms.rs +++ b/quickwit/quickwit-search/src/list_terms.rs @@ -214,8 +214,7 @@ async fn leaf_list_terms_single_split( storage: Arc, split: SplitIdAndFooterOffsets, ) -> crate::Result { - let cache = - ByteRangeCache::with_infinite_capacity(&quickwit_storage::STORAGE_METRICS.shortlived_cache); + let cache = ByteRangeCache::with_infinite_capacity(&quickwit_storage::SHORTLIVED_CACHE); let (index, _) = open_index_with_caches(searcher_context, storage, &split, None, Some(cache)).await?; let split_schema = index.schema(); @@ -354,12 +353,8 @@ pub async fn leaf_list_terms( async move { let leaf_split_search_permit = search_permit_recv.await; // TODO dedicated counter and timer? - crate::SEARCH_METRICS - .leaf_list_terms_splits_total - .increment(1); - let timer = crate::SEARCH_METRICS - .leaf_search_split_duration_secs - .start_timer(); + crate::metrics::LEAF_LIST_TERMS_SPLITS_TOTAL.increment(1); + let timer = crate::metrics::LEAF_SEARCH_SPLIT_DURATION_SECS.start_timer(); let leaf_search_single_split_res = leaf_list_terms_single_split( &searcher_context_clone, request, diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index f7da31bd6c6..c811a8b0c1e 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -158,23 +158,6 @@ impl SplitSearchOutcomeCounters { } } -pub struct SearchMetrics { - pub root_search_requests_total: Counter, - pub root_search_request_duration_seconds: Histogram, - pub root_search_targeted_splits: Histogram, - pub leaf_search_requests_total: Counter, - pub leaf_search_request_duration_seconds: Histogram, - pub leaf_search_targeted_splits: Histogram, - pub leaf_list_terms_splits_total: Counter, - pub split_search_outcome_total: SplitSearchOutcomeCounters, - pub leaf_search_split_duration_secs: Histogram, - pub job_assigned_total: Counter, - pub leaf_search_single_split_tasks_pending: Gauge, - pub leaf_search_single_split_tasks_ongoing: Gauge, - pub leaf_search_single_split_warmup_num_bytes: Histogram, - pub searcher_local_kv_store_size_bytes: Gauge, -} - /// From 0.008s to 131.072s fn duration_buckets() -> Vec { exponential_buckets(0.008, 2.0, 15).unwrap() @@ -216,7 +199,10 @@ static SPLIT_SEARCH_OUTCOME: LazyLock = LazyLock::new(|| { ) }); -static LEAF_SEARCH_SINGLE_SPLIT_TASKS: LazyLock = LazyLock::new(|| { +pub(crate) static SPLIT_SEARCH_OUTCOME_TOTAL: LazyLock = + LazyLock::new(SplitSearchOutcomeCounters::new_registered); + +static LEAF_SEARCH_SINGLE_SPLIT_TASKS_BASE: LazyLock = LazyLock::new(|| { gauge!( name: "leaf_search_single_split_tasks", description: "Number of single split search tasks pending or ongoing", @@ -224,7 +210,13 @@ static LEAF_SEARCH_SINGLE_SPLIT_TASKS: LazyLock = LazyLock::new(|| { ) }); -static ROOT_SEARCH_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static LEAF_SEARCH_SINGLE_SPLIT_TASKS_ONGOING: LazyLock = + LazyLock::new(|| gauge!(parent: &*LEAF_SEARCH_SINGLE_SPLIT_TASKS_BASE, "status" => "ongoing")); + +pub(crate) static LEAF_SEARCH_SINGLE_SPLIT_TASKS_PENDING: LazyLock = + LazyLock::new(|| gauge!(parent: &*LEAF_SEARCH_SINGLE_SPLIT_TASKS_BASE, "status" => "pending")); + +static ROOT_SEARCH_REQUESTS_TOTAL_BASE: LazyLock = LazyLock::new(|| { counter!( name: "root_search_requests_total", description: "Total number of root search gRPC requests processed.", @@ -232,7 +224,10 @@ static ROOT_SEARCH_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static ROOT_SEARCH_REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { +pub(crate) static ROOT_SEARCH_REQUESTS_TOTAL: LazyLock = + LazyLock::new(|| counter!(parent: &*ROOT_SEARCH_REQUESTS_TOTAL_BASE, "kind" => "server")); + +static ROOT_SEARCH_REQUEST_DURATION_SECONDS_BASE: LazyLock = LazyLock::new(|| { histogram!( name: "root_search_request_duration_seconds", description: "Duration of root search gRPC requests in seconds.", @@ -241,7 +236,14 @@ static ROOT_SEARCH_REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new ) }); -static ROOT_SEARCH_TARGETED_SPLITS: LazyLock = LazyLock::new(|| { +pub(crate) static ROOT_SEARCH_REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { + histogram!( + parent: &*ROOT_SEARCH_REQUEST_DURATION_SECONDS_BASE, + "kind" => "server", + ) +}); + +pub(crate) static ROOT_SEARCH_TARGETED_SPLITS: LazyLock = LazyLock::new(|| { histogram!( name: "root_search_targeted_splits", description: "Number of splits targeted per root search GRPC request.", @@ -250,7 +252,7 @@ static ROOT_SEARCH_TARGETED_SPLITS: LazyLock = LazyLock::new(|| { ) }); -static LEAF_SEARCH_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { +static LEAF_SEARCH_REQUESTS_TOTAL_BASE: LazyLock = LazyLock::new(|| { counter!( name: "leaf_search_requests_total", description: "Total number of leaf search gRPC requests processed.", @@ -258,7 +260,10 @@ static LEAF_SEARCH_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static LEAF_SEARCH_REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { +pub(crate) static LEAF_SEARCH_REQUESTS_TOTAL: LazyLock = + LazyLock::new(|| counter!(parent: &*LEAF_SEARCH_REQUESTS_TOTAL_BASE, "kind" => "server")); + +static LEAF_SEARCH_REQUEST_DURATION_SECONDS_BASE: LazyLock = LazyLock::new(|| { histogram!( name: "leaf_search_request_duration_seconds", description: "Duration of leaf search gRPC requests in seconds.", @@ -267,7 +272,14 @@ static LEAF_SEARCH_REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new ) }); -static LEAF_SEARCH_TARGETED_SPLITS: LazyLock = LazyLock::new(|| { +pub(crate) static LEAF_SEARCH_REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { + histogram!( + parent: &*LEAF_SEARCH_REQUEST_DURATION_SECONDS_BASE, + "kind" => "server", + ) +}); + +pub(crate) static LEAF_SEARCH_TARGETED_SPLITS: LazyLock = LazyLock::new(|| { histogram!( name: "leaf_search_targeted_splits", description: "Number of splits targeted per leaf search GRPC request.", @@ -276,7 +288,7 @@ static LEAF_SEARCH_TARGETED_SPLITS: LazyLock = LazyLock::new(|| { ) }); -static LEAF_LIST_TERMS_SPLITS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static LEAF_LIST_TERMS_SPLITS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "leaf_list_terms_splits_total", description: "Number of list terms splits total", @@ -284,7 +296,7 @@ static LEAF_LIST_TERMS_SPLITS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static LEAF_SEARCH_SPLIT_DURATION_SECS: LazyLock = LazyLock::new(|| { +pub(crate) static LEAF_SEARCH_SPLIT_DURATION_SECS: LazyLock = LazyLock::new(|| { histogram!( name: "leaf_search_split_duration_secs", description: "Number of seconds required to run a leaf search over a single split. The timer starts after the semaphore is obtained.", @@ -293,16 +305,18 @@ static LEAF_SEARCH_SPLIT_DURATION_SECS: LazyLock = LazyLock::new(|| { ) }); -static LEAF_SEARCH_SINGLE_SPLIT_WARMUP_NUM_BYTES: LazyLock = LazyLock::new(|| { - histogram!( - name: "leaf_search_single_split_warmup_num_bytes", - description: "Size of the short lived cache for a single split once the warmup is done.", - subsystem: "search", - buckets: pseudo_exponential_bytes_buckets(), - ) -}); - -static JOB_ASSIGNED_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static LEAF_SEARCH_SINGLE_SPLIT_WARMUP_NUM_BYTES: LazyLock = LazyLock::new( + || { + histogram!( + name: "leaf_search_single_split_warmup_num_bytes", + description: "Size of the short lived cache for a single split once the warmup is done.", + subsystem: "search", + buckets: pseudo_exponential_bytes_buckets(), + ) + }, +); + +pub(crate) static JOB_ASSIGNED_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "job_assigned_total", description: "Number of job assigned to searchers, per affinity rank.", @@ -310,56 +324,10 @@ static JOB_ASSIGNED_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static SEARCHER_LOCAL_KV_STORE_SIZE_BYTES: LazyLock = LazyLock::new(|| { +pub(crate) static SEARCHER_LOCAL_KV_STORE_SIZE_BYTES: LazyLock = LazyLock::new(|| { gauge!( name: "searcher_local_kv_store_size_bytes", description: "Size of the searcher kv store in bytes. This store is used to cache scroll contexts.", subsystem: "search", ) }); - -impl Default for SearchMetrics { - fn default() -> Self { - SearchMetrics { - root_search_requests_total: counter!( - parent: &*ROOT_SEARCH_REQUESTS_TOTAL, - "kind" => "server", - ), - root_search_request_duration_seconds: histogram!( - parent: &*ROOT_SEARCH_REQUEST_DURATION_SECONDS, - "kind" => "server", - ), - root_search_targeted_splits: ROOT_SEARCH_TARGETED_SPLITS.clone(), - leaf_search_requests_total: counter!( - parent: &*LEAF_SEARCH_REQUESTS_TOTAL, - "kind" => "server", - ), - leaf_search_request_duration_seconds: histogram!( - parent: &*LEAF_SEARCH_REQUEST_DURATION_SECONDS, - "kind" => "server", - ), - leaf_search_targeted_splits: LEAF_SEARCH_TARGETED_SPLITS.clone(), - - leaf_list_terms_splits_total: LEAF_LIST_TERMS_SPLITS_TOTAL.clone(), - split_search_outcome_total: SplitSearchOutcomeCounters::new_registered(), - - leaf_search_split_duration_secs: LEAF_SEARCH_SPLIT_DURATION_SECS.clone(), - leaf_search_single_split_tasks_ongoing: gauge!( - parent: &*LEAF_SEARCH_SINGLE_SPLIT_TASKS, - "status" => "ongoing", - ), - leaf_search_single_split_tasks_pending: gauge!( - parent: &*LEAF_SEARCH_SINGLE_SPLIT_TASKS, - "status" => "pending", - ), - leaf_search_single_split_warmup_num_bytes: LEAF_SEARCH_SINGLE_SPLIT_WARMUP_NUM_BYTES - .clone(), - job_assigned_total: JOB_ASSIGNED_TOTAL.clone(), - searcher_local_kv_store_size_bytes: SEARCHER_LOCAL_KV_STORE_SIZE_BYTES.clone(), - } - } -} - -/// `SEARCH_METRICS` exposes a bunch a set of storage/cache related metrics through a prometheus -/// endpoint. -pub static SEARCH_METRICS: LazyLock = LazyLock::new(SearchMetrics::default); diff --git a/quickwit/quickwit-search/src/metrics_trackers.rs b/quickwit/quickwit-search/src/metrics_trackers.rs index 32f22b78086..fad66fe1c3e 100644 --- a/quickwit/quickwit-search/src/metrics_trackers.rs +++ b/quickwit/quickwit-search/src/metrics_trackers.rs @@ -23,7 +23,6 @@ use quickwit_metrics::{counter, histogram}; use quickwit_proto::search::LeafSearchResponse; use crate::SearchError; -use crate::metrics::SEARCH_METRICS; // root @@ -71,17 +70,17 @@ impl PinnedDrop for RootSearchMetricsFuture { }; counter!( - parent: &SEARCH_METRICS.root_search_requests_total, + parent: &crate::metrics::ROOT_SEARCH_REQUESTS_TOTAL, "status" => status, ) .increment(1); histogram!( - parent: &SEARCH_METRICS.root_search_request_duration_seconds, + parent: &crate::metrics::ROOT_SEARCH_REQUEST_DURATION_SECONDS, "status" => status, ) .record(self.start.elapsed().as_secs_f64()); histogram!( - parent: &SEARCH_METRICS.root_search_targeted_splits, + parent: &crate::metrics::ROOT_SEARCH_TARGETED_SPLITS, "status" => status, ) .record(num_targeted_splits as f64); @@ -122,17 +121,17 @@ where F: Future> fn drop(self: Pin<&mut Self>) { let status = self.status.unwrap_or("cancelled"); counter!( - parent: &SEARCH_METRICS.leaf_search_requests_total, + parent: &crate::metrics::LEAF_SEARCH_REQUESTS_TOTAL, "status" => status, ) .increment(1); histogram!( - parent: &SEARCH_METRICS.leaf_search_request_duration_seconds, + parent: &crate::metrics::LEAF_SEARCH_REQUEST_DURATION_SECONDS, "status" => status, ) .record(self.start.elapsed().as_secs_f64()); histogram!( - parent: &SEARCH_METRICS.leaf_search_targeted_splits, + parent: &crate::metrics::LEAF_SEARCH_TARGETED_SPLITS, "status" => status, ) .record(self.targeted_splits as f64); diff --git a/quickwit/quickwit-search/src/scroll_context.rs b/quickwit/quickwit-search/src/scroll_context.rs index 786e3e4c7eb..7f200ca3167 100644 --- a/quickwit/quickwit-search/src/scroll_context.rs +++ b/quickwit/quickwit-search/src/scroll_context.rs @@ -149,7 +149,7 @@ impl Default for MiniKV { impl MiniKV { pub async fn put(&self, key: Vec, payload: Vec, ttl: Duration) { let mut metric_guard = - GaugeGuard::from_gauge(&crate::SEARCH_METRICS.searcher_local_kv_store_size_bytes); + GaugeGuard::from_gauge(&crate::metrics::SEARCHER_LOCAL_KV_STORE_SIZE_BYTES); metric_guard.increment(payload.len() as f64); let mut cache_lock = self.ttl_with_cache.write().await; cache_lock.insert( diff --git a/quickwit/quickwit-search/src/search_job_placer.rs b/quickwit/quickwit-search/src/search_job_placer.rs index d5d6961f7f4..f5993308d9f 100644 --- a/quickwit/quickwit-search/src/search_job_placer.rs +++ b/quickwit/quickwit-search/src/search_job_placer.rs @@ -27,7 +27,7 @@ use quickwit_metrics::counter; use quickwit_proto::search::{ReportSplit, ReportSplitsRequest}; use tracing::{info, warn}; -use crate::{SEARCH_METRICS, SearchJob, SearchServiceClient, SearcherPool}; +use crate::{SearchJob, SearchServiceClient, SearcherPool}; /// Job. /// The unit in which distributed search is performed. @@ -219,7 +219,7 @@ impl SearchJobPlacer { _ => "> 1", }; counter!( - parent: &SEARCH_METRICS.job_assigned_total, + parent: &crate::metrics::JOB_ASSIGNED_TOTAL, "affinity" => metric_node_idx, ) .increment(1); diff --git a/quickwit/quickwit-search/src/search_permit_provider.rs b/quickwit/quickwit-search/src/search_permit_provider.rs index 33729cfb7c9..c30e5365f8e 100644 --- a/quickwit/quickwit-search/src/search_permit_provider.rs +++ b/quickwit/quickwit-search/src/search_permit_provider.rs @@ -332,9 +332,8 @@ impl SearchPermitActor { fn assign_available_permits(&mut self) { while let Some(permit_request) = self.pop_next_request_if_serviceable() { - let mut ongoing_gauge_guard = GaugeGuard::from_gauge( - &crate::SEARCH_METRICS.leaf_search_single_split_tasks_ongoing, - ); + let mut ongoing_gauge_guard = + GaugeGuard::from_gauge(&crate::metrics::LEAF_SEARCH_SINGLE_SPLIT_TASKS_ONGOING); ongoing_gauge_guard.increment(1.0); self.total_memory_allocated += permit_request.permit_size; self.num_warmup_slots_available -= 1; @@ -350,8 +349,7 @@ impl SearchPermitActor { // created SearchPermit which releases the resources .ok(); } - crate::SEARCH_METRICS - .leaf_search_single_split_tasks_pending + crate::metrics::LEAF_SEARCH_SINGLE_SPLIT_TASKS_PENDING .set(self.permits_requests.len() as f64); } } diff --git a/quickwit/quickwit-search/src/service.rs b/quickwit/quickwit-search/src/service.rs index 5e04e6a4dcf..37cc3ecd483 100644 --- a/quickwit/quickwit-search/src/service.rs +++ b/quickwit/quickwit-search/src/service.rs @@ -459,7 +459,7 @@ impl SearcherContext { ) -> Self { let global_split_footer_cache = MemorySizedCache::from_config( &searcher_config.split_footer_cache, - &quickwit_storage::STORAGE_METRICS.split_footer_cache, + &quickwit_storage::SPLIT_FOOTER_CACHE, ); let leaf_search_split_semaphore = SearchPermitProvider::new( searcher_config.max_num_concurrent_split_searches, diff --git a/quickwit/quickwit-serve/src/decompression.rs b/quickwit/quickwit-serve/src/decompression.rs index f63ea806922..7492d8cece2 100644 --- a/quickwit/quickwit-serve/src/decompression.rs +++ b/quickwit/quickwit-serve/src/decompression.rs @@ -17,7 +17,6 @@ use std::sync::LazyLock; use bytes::Bytes; use flate2::read::{MultiGzDecoder, ZlibDecoder}; -use quickwit_common::metrics::MEMORY_METRICS; use quickwit_common::thread_pool::run_cpu_intensive; use quickwit_metrics::GaugeGuard; use thiserror::Error; @@ -115,7 +114,8 @@ pub(crate) struct Body { impl Body { pub fn new(content: Bytes, load_shield_permit: LoadShieldPermit) -> Body { - let mut gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.rest_server); + let mut gauge_guard = + GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_REST_SERVER); gauge_guard.increment(content.len() as f64); Body { content, diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index 17cb4501a19..cae1f3c37cc 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -127,7 +127,6 @@ use warp::{Filter, Rejection}; pub use crate::build_info::{BuildInfo, RuntimeInfo}; pub use crate::index_api::{ListSplitsQueryParams, ListSplitsResponse}; pub use crate::ingest_api::{RestIngestResponse, RestParseFailure}; -pub use crate::metrics::SERVE_METRICS; use crate::rate_modulator::RateModulator; #[cfg(test)] use crate::rest::recover_fn; @@ -926,7 +925,7 @@ fn ingester_service_layer_stack( PersistCircuitBreakerEvaluator.make_layer( 3, Duration::from_millis(500), - crate::metrics::SERVE_METRICS.circuit_break_total.clone(), + crate::metrics::CIRCUIT_BREAK_TOTAL.clone(), ), ) .stack_open_replication_stream_layer(quickwit_common::tower::OneTaskPerCallLayer) diff --git a/quickwit/quickwit-serve/src/load_shield.rs b/quickwit/quickwit-serve/src/load_shield.rs index 03e0154e26d..52b14cd4c7f 100644 --- a/quickwit/quickwit-serve/src/load_shield.rs +++ b/quickwit/quickwit-serve/src/load_shield.rs @@ -44,11 +44,11 @@ impl LoadShield { let in_flight_semaphore_opt = max_in_flight_opt.map(Semaphore::new); let concurrency_semaphore_opt = max_concurrency_opt.map(Semaphore::new); let pending_gauge = gauge!( - parent: &crate::metrics::SERVE_METRICS.pending_requests, + parent: &crate::metrics::PENDING_REQUESTS, "endpoint_group" => endpoint_group, ); let ongoing_gauge = gauge!( - parent: &crate::metrics::SERVE_METRICS.ongoing_requests, + parent: &crate::metrics::ONGOING_REQUESTS, "endpoint_group" => endpoint_group, ); LoadShield { diff --git a/quickwit/quickwit-serve/src/metrics.rs b/quickwit/quickwit-serve/src/metrics.rs index 9757439289c..79e8aac6861 100644 --- a/quickwit/quickwit-serve/src/metrics.rs +++ b/quickwit/quickwit-serve/src/metrics.rs @@ -17,15 +17,7 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; use quickwit_metrics::{Counter, Gauge, Histogram, counter, gauge, histogram}; -pub struct ServeMetrics { - pub http_requests_total: Counter, - pub request_duration_secs: Histogram, - pub ongoing_requests: Gauge, - pub pending_requests: Gauge, - pub circuit_break_total: Counter, -} - -static HTTP_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static HTTP_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "http_requests_total", description: "Total number of HTTP requests processed.", @@ -33,7 +25,7 @@ static HTTP_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static REQUEST_DURATION_SECS: LazyLock = LazyLock::new(|| { +pub(crate) static REQUEST_DURATION_SECS: LazyLock = LazyLock::new(|| { histogram!( name: "request_duration_secs", description: "Response time in seconds", @@ -43,7 +35,7 @@ static REQUEST_DURATION_SECS: LazyLock = LazyLock::new(|| { ) }); -static ONGOING_REQUESTS: LazyLock = LazyLock::new(|| { +pub(crate) static ONGOING_REQUESTS: LazyLock = LazyLock::new(|| { gauge!( name: "ongoing_requests", description: "Number of ongoing requests.", @@ -51,7 +43,7 @@ static ONGOING_REQUESTS: LazyLock = LazyLock::new(|| { ) }); -static PENDING_REQUESTS: LazyLock = LazyLock::new(|| { +pub(crate) static PENDING_REQUESTS: LazyLock = LazyLock::new(|| { gauge!( name: "pending_requests", description: "Number of pending requests.", @@ -59,25 +51,10 @@ static PENDING_REQUESTS: LazyLock = LazyLock::new(|| { ) }); -static CIRCUIT_BREAK_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static CIRCUIT_BREAK_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "circuit_break_total", description: "Circuit breaker counter", subsystem: "grpc", ) }); - -impl Default for ServeMetrics { - fn default() -> Self { - ServeMetrics { - http_requests_total: HTTP_REQUESTS_TOTAL.clone(), - request_duration_secs: REQUEST_DURATION_SECS.clone(), - ongoing_requests: ONGOING_REQUESTS.clone(), - pending_requests: PENDING_REQUESTS.clone(), - circuit_break_total: CIRCUIT_BREAK_TOTAL.clone(), - } - } -} - -/// Serve counters exposes a bunch a set of metrics about the request received to quickwit. -pub static SERVE_METRICS: LazyLock = LazyLock::new(ServeMetrics::default); diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index 21c5edde9f7..ed33cde48c6 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -141,13 +141,13 @@ pub(crate) async fn start_rest_server( let method = info.method().as_str().to_string(); let status_code = status.as_str().to_string(); histogram!( - parent: &crate::SERVE_METRICS.request_duration_secs, + parent: &crate::metrics::REQUEST_DURATION_SECS, "method" => method.clone(), "status_code" => status_code.clone(), ) .record(elapsed.as_secs_f64()); counter!( - parent: &crate::SERVE_METRICS.http_requests_total, + parent: &crate::metrics::HTTP_REQUESTS_TOTAL, "method" => method, "status_code" => status_code, ) diff --git a/quickwit/quickwit-storage/src/cache/quickwit_cache.rs b/quickwit/quickwit-storage/src/cache/quickwit_cache.rs index 20441bd5fd1..f5bc0a5d5ae 100644 --- a/quickwit/quickwit-storage/src/cache/quickwit_cache.rs +++ b/quickwit/quickwit-storage/src/cache/quickwit_cache.rs @@ -41,8 +41,7 @@ impl QuickwitCache { /// Creates a [`QuickwitCache`] with a cache on fast fields. pub fn new(cache_config: &CacheConfig) -> Self { let mut quickwit_cache = QuickwitCache::empty(); - let fast_field_cache_counters: &'static CacheMetrics = - &crate::STORAGE_METRICS.fast_field_cache; + let fast_field_cache_counters: &'static CacheMetrics = &crate::FAST_FIELD_CACHE; quickwit_cache.add_route( ".fast", Arc::new(SimpleCache::from_config( diff --git a/quickwit/quickwit-storage/src/file_descriptor_cache.rs b/quickwit/quickwit-storage/src/file_descriptor_cache.rs index 95aa30d4ce7..28d37f19ae8 100644 --- a/quickwit/quickwit-storage/src/file_descriptor_cache.rs +++ b/quickwit/quickwit-storage/src/file_descriptor_cache.rs @@ -88,10 +88,7 @@ impl FileDescriptorCache { Self::new( NonZeroU32::new(max_fd_limit).unwrap(), fd_cache_capacity, - crate::STORAGE_METRICS - .fd_cache_metrics - .cache_metrics - .clone(), + crate::FD_CACHE_METRICS.cache_metrics.clone(), ) } diff --git a/quickwit/quickwit-storage/src/lib.rs b/quickwit/quickwit-storage/src/lib.rs index c21ed2a0bf3..261217395b9 100644 --- a/quickwit/quickwit-storage/src/lib.rs +++ b/quickwit/quickwit-storage/src/lib.rs @@ -34,7 +34,18 @@ mod timeout_and_retry_storage; pub use debouncer::AsyncDebouncer; pub(crate) use debouncer::DebouncedStorage; -pub use self::metrics::STORAGE_METRICS; +pub use self::metrics::{ + FAST_FIELD_CACHE, FD_CACHE_METRICS, GET_SLICE_TIMEOUT_ALL_TIMEOUTS, + GET_SLICE_TIMEOUT_SUCCESS_AFTER_0_TIMEOUT, GET_SLICE_TIMEOUT_SUCCESS_AFTER_1_TIMEOUT, + GET_SLICE_TIMEOUT_SUCCESS_AFTER_2_PLUS_TIMEOUT, OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION, + OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL, OBJECT_STORAGE_DELETE_REQUEST_DURATION, + OBJECT_STORAGE_DELETE_REQUESTS_TOTAL, OBJECT_STORAGE_DOWNLOAD_NUM_BYTES, + OBJECT_STORAGE_GET_ERRORS_TOTAL, OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT, + OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES, OBJECT_STORAGE_GET_TOTAL, + OBJECT_STORAGE_PUT_PARTS, OBJECT_STORAGE_PUT_TOTAL, OBJECT_STORAGE_UPLOAD_NUM_BYTES, + PARTIAL_REQUEST_CACHE, PREDICATE_CACHE, SEARCHER_SPLIT_CACHE, SHORTLIVED_CACHE, + SPLIT_FOOTER_CACHE, +}; pub use self::payload::PutPayload; pub use self::storage::Storage; diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs index 4e3f5986b11..dff2a6f1a4e 100644 --- a/quickwit/quickwit-storage/src/metrics.rs +++ b/quickwit/quickwit-storage/src/metrics.rs @@ -13,6 +13,7 @@ // limitations under the License. // See https://prometheus.io/docs/practices/naming/ +#![allow(missing_docs)] use std::collections::HashMap; use std::sync::{LazyLock, RwLock}; @@ -20,32 +21,6 @@ use std::sync::{LazyLock, RwLock}; use quickwit_config::CacheConfig; use quickwit_metrics::{Counter, Gauge, GaugeGuard, Histogram, counter, gauge, histogram}; -/// Counters associated to storage operations. -pub struct StorageMetrics { - pub shortlived_cache: CacheMetrics, - pub partial_request_cache: CacheMetrics, - pub predicate_cache: CacheMetrics, - pub fd_cache_metrics: CacheMetrics, - pub fast_field_cache: CacheMetrics, - pub split_footer_cache: CacheMetrics, - pub searcher_split_cache: CacheMetrics, - pub get_slice_timeout_successes: [Counter; 3], - pub get_slice_timeout_all_timeouts: Counter, - pub object_storage_get_total: Counter, - pub object_storage_get_errors_total: Counter, - pub object_storage_get_slice_in_flight_count: Gauge, - pub object_storage_get_slice_in_flight_num_bytes: Gauge, - pub object_storage_put_total: Counter, - pub object_storage_put_parts: Counter, - pub object_storage_download_num_bytes: Counter, - pub object_storage_upload_num_bytes: Counter, - - pub object_storage_delete_requests_total: Counter, - pub object_storage_bulk_delete_requests_total: Counter, - pub object_storage_delete_request_duration: Histogram, - pub object_storage_bulk_delete_request_duration: Histogram, -} - static GET_SLICE_TIMEOUT_OUTCOME_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "get_slice_timeout_outcome", @@ -54,6 +29,35 @@ static GET_SLICE_TIMEOUT_OUTCOME_TOTAL: LazyLock = LazyLock::new(|| { ) }); +pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_0_TIMEOUT: LazyLock = LazyLock::new(|| { + counter!( + parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, + "outcome" => "success_after_0_timeout", + ) +}); + +pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_1_TIMEOUT: LazyLock = LazyLock::new(|| { + counter!( + parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, + "outcome" => "success_after_1_timeout", + ) +}); + +pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_2_PLUS_TIMEOUT: LazyLock = + LazyLock::new(|| { + counter!( + parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, + "outcome" => "success_after_2+_timeout", + ) + }); + +pub static GET_SLICE_TIMEOUT_ALL_TIMEOUTS: LazyLock = LazyLock::new(|| { + counter!( + parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, + "outcome" => "all_timeouts", + ) +}); + static OBJECT_STORAGE_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "object_storage_requests_total", @@ -71,7 +75,35 @@ static OBJECT_STORAGE_REQUEST_DURATION: LazyLock = LazyLock::new(|| { ) }); -static OBJECT_STORAGE_GET_TOTAL: LazyLock = LazyLock::new(|| { +pub static OBJECT_STORAGE_DELETE_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + parent: &*OBJECT_STORAGE_REQUESTS_TOTAL, + "action" => "delete_object", + ) +}); + +pub static OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { + counter!( + parent: &*OBJECT_STORAGE_REQUESTS_TOTAL, + "action" => "delete_objects", + ) +}); + +pub static OBJECT_STORAGE_DELETE_REQUEST_DURATION: LazyLock = LazyLock::new(|| { + histogram!( + parent: &*OBJECT_STORAGE_REQUEST_DURATION, + "action" => "delete_object", + ) +}); + +pub static OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION: LazyLock = LazyLock::new(|| { + histogram!( + parent: &*OBJECT_STORAGE_REQUEST_DURATION, + "action" => "delete_objects", + ) +}); + +pub static OBJECT_STORAGE_GET_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "object_storage_gets_total", description: "Number of objects fetched. Might be lower than get_slice_timeout_outcome if queries are debounced.", @@ -79,7 +111,7 @@ static OBJECT_STORAGE_GET_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static OBJECT_STORAGE_GET_ERRORS_TOTAL: LazyLock = LazyLock::new(|| { +pub static OBJECT_STORAGE_GET_ERRORS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "object_storage_get_errors_total", description: "Number of GetObject errors.", @@ -87,7 +119,7 @@ static OBJECT_STORAGE_GET_ERRORS_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT: LazyLock = LazyLock::new(|| { +pub static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT: LazyLock = LazyLock::new(|| { gauge!( name: "object_storage_get_slice_in_flight_count", description: "Number of GetObject for which the memory was allocated but the download is still in progress.", @@ -95,7 +127,7 @@ static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT: LazyLock = LazyLock::new ) }); -static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES: LazyLock = LazyLock::new(|| { +pub static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES: LazyLock = LazyLock::new(|| { gauge!( name: "object_storage_get_slice_in_flight_num_bytes", description: "Memory allocated for GetObject requests that are still in progress.", @@ -103,7 +135,7 @@ static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES: LazyLock = LazyLock: ) }); -static OBJECT_STORAGE_PUT_TOTAL: LazyLock = LazyLock::new(|| { +pub static OBJECT_STORAGE_PUT_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "object_storage_puts_total", description: "Number of objects uploaded. May differ from object_storage_requests_parts due to multipart upload.", @@ -111,7 +143,7 @@ static OBJECT_STORAGE_PUT_TOTAL: LazyLock = LazyLock::new(|| { ) }); -static OBJECT_STORAGE_PUT_PARTS: LazyLock = LazyLock::new(|| { +pub static OBJECT_STORAGE_PUT_PARTS: LazyLock = LazyLock::new(|| { counter!( name: "object_storage_puts_parts", description: "Number of object parts uploaded.", @@ -119,7 +151,7 @@ static OBJECT_STORAGE_PUT_PARTS: LazyLock = LazyLock::new(|| { ) }); -static OBJECT_STORAGE_DOWNLOAD_NUM_BYTES: LazyLock = LazyLock::new(|| { +pub static OBJECT_STORAGE_DOWNLOAD_NUM_BYTES: LazyLock = LazyLock::new(|| { counter!( name: "object_storage_download_num_bytes", description: "Amount of data downloaded from an object storage.", @@ -127,7 +159,7 @@ static OBJECT_STORAGE_DOWNLOAD_NUM_BYTES: LazyLock = LazyLock::new(|| { ) }); -static OBJECT_STORAGE_UPLOAD_NUM_BYTES: LazyLock = LazyLock::new(|| { +pub static OBJECT_STORAGE_UPLOAD_NUM_BYTES: LazyLock = LazyLock::new(|| { counter!( name: "object_storage_upload_num_bytes", description: "Amount of data uploaded to an object storage.", @@ -135,73 +167,6 @@ static OBJECT_STORAGE_UPLOAD_NUM_BYTES: LazyLock = LazyLock::new(|| { ) }); -impl Default for StorageMetrics { - fn default() -> Self { - let get_slice_timeout_successes = [ - counter!( - parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, - "outcome" => "success_after_0_timeout", - ), - counter!( - parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, - "outcome" => "success_after_1_timeout", - ), - counter!( - parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, - "outcome" => "success_after_2+_timeout", - ), - ]; - let get_slice_timeout_all_timeouts = counter!( - parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, - "outcome" => "all_timeouts", - ); - - let object_storage_delete_requests_total = counter!( - parent: &*OBJECT_STORAGE_REQUESTS_TOTAL, - "action" => "delete_object", - ); - let object_storage_bulk_delete_requests_total = counter!( - parent: &*OBJECT_STORAGE_REQUESTS_TOTAL, - "action" => "delete_objects", - ); - - let object_storage_delete_request_duration = histogram!( - parent: &*OBJECT_STORAGE_REQUEST_DURATION, - "action" => "delete_object", - ); - let object_storage_bulk_delete_request_duration = histogram!( - parent: &*OBJECT_STORAGE_REQUEST_DURATION, - "action" => "delete_objects", - ); - - StorageMetrics { - fast_field_cache: CacheMetrics::for_component("fastfields"), - fd_cache_metrics: CacheMetrics::for_component("fd"), - partial_request_cache: CacheMetrics::for_component("partial_request"), - predicate_cache: CacheMetrics::for_component("predicate"), - searcher_split_cache: CacheMetrics::for_component("searcher_split"), - shortlived_cache: CacheMetrics::for_component("shortlived"), - split_footer_cache: CacheMetrics::for_component("splitfooter"), - get_slice_timeout_successes, - get_slice_timeout_all_timeouts, - object_storage_get_total: OBJECT_STORAGE_GET_TOTAL.clone(), - object_storage_get_errors_total: OBJECT_STORAGE_GET_ERRORS_TOTAL.clone(), - object_storage_get_slice_in_flight_count: OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT - .clone(), - object_storage_get_slice_in_flight_num_bytes: - OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES.clone(), - object_storage_put_total: OBJECT_STORAGE_PUT_TOTAL.clone(), - object_storage_put_parts: OBJECT_STORAGE_PUT_PARTS.clone(), - object_storage_download_num_bytes: OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.clone(), - object_storage_upload_num_bytes: OBJECT_STORAGE_UPLOAD_NUM_BYTES.clone(), - object_storage_delete_requests_total, - object_storage_bulk_delete_requests_total, - object_storage_delete_request_duration, - object_storage_bulk_delete_request_duration, - } - } -} - /// Counters associated to a cache. pub struct CacheMetrics { pub component_name: String, @@ -446,9 +411,26 @@ static VIRTUAL_CACHE_EVICT_BYTES: LazyLock = LazyLock::new(|| { ) }); -/// Storage counters exposes a bunch a set of storage/cache related metrics through a prometheus -/// endpoint. -pub static STORAGE_METRICS: LazyLock = LazyLock::new(StorageMetrics::default); +pub static FAST_FIELD_CACHE: LazyLock = + LazyLock::new(|| CacheMetrics::for_component("fastfields")); + +pub static FD_CACHE_METRICS: LazyLock = + LazyLock::new(|| CacheMetrics::for_component("fd")); + +pub static PARTIAL_REQUEST_CACHE: LazyLock = + LazyLock::new(|| CacheMetrics::for_component("partial_request")); + +pub static PREDICATE_CACHE: LazyLock = + LazyLock::new(|| CacheMetrics::for_component("predicate")); + +pub static SEARCHER_SPLIT_CACHE: LazyLock = + LazyLock::new(|| CacheMetrics::for_component("searcher_split")); + +pub static SHORTLIVED_CACHE: LazyLock = + LazyLock::new(|| CacheMetrics::for_component("shortlived")); + +pub static SPLIT_FOOTER_CACHE: LazyLock = + LazyLock::new(|| CacheMetrics::for_component("splitfooter")); #[cfg(test)] pub static CACHE_METRICS_FOR_TESTS: LazyLock = @@ -457,12 +439,9 @@ pub static CACHE_METRICS_FOR_TESTS: LazyLock = pub fn object_storage_get_slice_in_flight_guards( get_request_size: usize, ) -> (GaugeGuard, GaugeGuard) { - let mut bytes_guard = GaugeGuard::from_gauge( - &crate::STORAGE_METRICS.object_storage_get_slice_in_flight_num_bytes, - ); + let mut bytes_guard = GaugeGuard::from_gauge(&OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES); bytes_guard.increment(get_request_size as f64); - let mut count_guard = - GaugeGuard::from_gauge(&crate::STORAGE_METRICS.object_storage_get_slice_in_flight_count); + let mut count_guard = GaugeGuard::from_gauge(&OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT); count_guard.increment(1.0); (bytes_guard, count_guard) } diff --git a/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs b/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs index 6540e20ee91..1ca91d9bdcf 100644 --- a/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs +++ b/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs @@ -47,8 +47,8 @@ use crate::metrics::object_storage_get_slice_in_flight_guards; use crate::stable_deref_bytes::into_owned_bytes; use crate::storage::SendableAsync; use crate::{ - BulkDeleteError, DeleteFailure, MultiPartPolicy, PutPayload, STORAGE_METRICS, Storage, - StorageError, StorageErrorKind, StorageFactory, StorageResolverError, StorageResult, + BulkDeleteError, DeleteFailure, MultiPartPolicy, PutPayload, Storage, StorageError, + StorageErrorKind, StorageFactory, StorageResolverError, StorageResult, }; /// Azure object storage resolver. @@ -240,10 +240,8 @@ impl AzureBlobStorage { name: &'a str, payload: Box, ) -> StorageResult<()> { - crate::STORAGE_METRICS.object_storage_put_parts.increment(1); - crate::STORAGE_METRICS - .object_storage_upload_num_bytes - .increment(payload.len()); + crate::OBJECT_STORAGE_PUT_PARTS.increment(1); + crate::OBJECT_STORAGE_UPLOAD_NUM_BYTES.increment(payload.len()); retry(&self.retry_params, || async { let data = Bytes::from(payload.read_all().await?.to_vec()); let hash = azure_storage_blobs::prelude::Hash::from(md5::compute(&data[..]).0); @@ -276,10 +274,8 @@ impl AzureBlobStorage { .map(|(num, range)| { let moved_blob_client = blob_client.clone(); let moved_payload = payload.clone(); - crate::STORAGE_METRICS.object_storage_put_parts.increment(1); - crate::STORAGE_METRICS - .object_storage_upload_num_bytes - .increment(range.end - range.start); + crate::OBJECT_STORAGE_PUT_PARTS.increment(1); + crate::OBJECT_STORAGE_UPLOAD_NUM_BYTES.increment(range.end - range.start); async move { retry(&self.retry_params, || async { // zero pad block ids to make them sortable as strings @@ -349,7 +345,7 @@ impl Storage for AzureBlobStorage { path: &Path, payload: Box, ) -> crate::StorageResult<()> { - crate::STORAGE_METRICS.object_storage_put_total.increment(1); + crate::OBJECT_STORAGE_PUT_TOTAL.increment(1); let name = self.blob_name(path); let total_len = payload.len(); let part_num_bytes = self.multipart_policy.part_num_bytes(total_len); @@ -376,9 +372,7 @@ impl Storage for AzureBlobStorage { .compat(); let mut body_stream_reader = BufReader::new(chunk_response_body_stream); let num_bytes_copied = tokio::io::copy_buf(&mut body_stream_reader, output).await?; - STORAGE_METRICS - .object_storage_download_num_bytes - .increment(num_bytes_copied); + crate::OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.increment(num_bytes_copied); } output.flush().await?; Ok(()) @@ -577,9 +571,7 @@ async fn download_all( segments.push(bytes); } } - crate::STORAGE_METRICS - .object_storage_download_num_bytes - .increment(total_num_bytes as u64); + crate::OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.increment(total_num_bytes as u64); Ok(coalesce_segments(segments, total_num_bytes)) } diff --git a/quickwit/quickwit-storage/src/object_storage/error.rs b/quickwit/quickwit-storage/src/object_storage/error.rs index ca6ef0396b8..eed6301d96b 100644 --- a/quickwit/quickwit-storage/src/object_storage/error.rs +++ b/quickwit/quickwit-storage/src/object_storage/error.rs @@ -65,7 +65,7 @@ impl ToStorageErrorKind for GetObjectError { fn to_storage_error_kind(&self) -> StorageErrorKind { let error_code = self.code().unwrap_or("unknown").to_string(); counter!( - parent: &crate::STORAGE_METRICS.object_storage_get_errors_total, + parent: &crate::OBJECT_STORAGE_GET_ERRORS_TOTAL, "code" => error_code, ) .increment(1); diff --git a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs index 42700456775..f9e6d2e7e59 100644 --- a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs +++ b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs @@ -53,8 +53,8 @@ use crate::object_storage::MultiPartPolicy; use crate::stable_deref_bytes::into_owned_bytes; use crate::storage::SendableAsync; use crate::{ - BulkDeleteError, DeleteFailure, OwnedBytes, STORAGE_METRICS, Storage, StorageError, - StorageErrorKind, StorageResolverError, StorageResult, + BulkDeleteError, DeleteFailure, OwnedBytes, Storage, StorageError, StorageErrorKind, + StorageResolverError, StorageResult, }; /// Semaphore to limit the number of concurrent requests to the object store. Some object stores @@ -300,10 +300,8 @@ impl S3CompatibleObjectStorage { .await .map_err(|io_error| Retry::Permanent(StorageError::from(io_error)))?; - crate::STORAGE_METRICS.object_storage_put_parts.increment(1); - crate::STORAGE_METRICS - .object_storage_upload_num_bytes - .increment(len); + crate::OBJECT_STORAGE_PUT_PARTS.increment(1); + crate::OBJECT_STORAGE_UPLOAD_NUM_BYTES.increment(len); self.s3_client .put_object() @@ -435,10 +433,8 @@ impl S3CompatibleObjectStorage { .map_err(Retry::Permanent)?; let md5 = BASE64_STANDARD.encode(part.md5.0); - crate::STORAGE_METRICS.object_storage_put_parts.increment(1); - crate::STORAGE_METRICS - .object_storage_upload_num_bytes - .increment(part.len()); + crate::OBJECT_STORAGE_PUT_PARTS.increment(1); + crate::OBJECT_STORAGE_UPLOAD_NUM_BYTES.increment(part.len()); let upload_part_output = self .s3_client @@ -558,7 +554,7 @@ impl S3CompatibleObjectStorage { let key = self.key(path); let range_str = range_opt.map(|range| format!("bytes={}-{}", range.start, range.end - 1)); - crate::STORAGE_METRICS.object_storage_get_total.increment(1); + crate::OBJECT_STORAGE_GET_TOTAL.increment(1); let get_object_output = self .s3_client @@ -651,12 +647,8 @@ impl S3CompatibleObjectStorage { for (path_chunk, delete) in &mut delete_requests_it { let delete_objects_res: StorageResult = aws_retry(&self.retry_params, || async { - crate::STORAGE_METRICS - .object_storage_bulk_delete_requests_total - .increment(1); - let _timer = crate::STORAGE_METRICS - .object_storage_bulk_delete_request_duration - .start_timer(); + crate::OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL.increment(1); + let _timer = crate::OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION.start_timer(); self.s3_client .delete_objects() .bucket(self.bucket.clone()) @@ -732,9 +724,7 @@ async fn download_all(byte_stream: ByteStream) -> StorageResult { // `AggregatedBytes::into_bytes` returns the underlying `Bytes` without copying when the body // was received as a single segment, and concatenates into a fresh `Bytes` otherwise. let bytes = aggregated.into_bytes(); - STORAGE_METRICS - .object_storage_download_num_bytes - .increment(bytes.len() as u64); + crate::OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.increment(bytes.len() as u64); Ok(bytes) } @@ -774,7 +764,7 @@ impl Storage for S3CompatibleObjectStorage { path: &Path, payload: Box, ) -> crate::StorageResult<()> { - crate::STORAGE_METRICS.object_storage_put_total.increment(1); + crate::OBJECT_STORAGE_PUT_TOTAL.increment(1); let _permit = REQUEST_SEMAPHORE.acquire().await; let key = self.key(path); let total_len = payload.len(); @@ -794,9 +784,7 @@ impl Storage for S3CompatibleObjectStorage { aws_retry(&self.retry_params, || self.get_object(path, None)).await?; let mut body_read = BufReader::new(get_object_output.body.into_async_read()); let num_bytes_copied = tokio::io::copy_buf(&mut body_read, output).await?; - STORAGE_METRICS - .object_storage_download_num_bytes - .increment(num_bytes_copied); + crate::OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.increment(num_bytes_copied); output.flush().await?; Ok(()) } @@ -806,12 +794,8 @@ impl Storage for S3CompatibleObjectStorage { let bucket = self.bucket.clone(); let key = self.key(path); let delete_res = aws_retry(&self.retry_params, || async { - crate::STORAGE_METRICS - .object_storage_delete_requests_total - .increment(1); - let _timer = crate::STORAGE_METRICS - .object_storage_delete_request_duration - .start_timer(); + crate::OBJECT_STORAGE_DELETE_REQUESTS_TOTAL.increment(1); + let _timer = crate::OBJECT_STORAGE_DELETE_REQUEST_DURATION.start_timer(); self.s3_client .delete_object() .bucket(&bucket) diff --git a/quickwit/quickwit-storage/src/opendal_storage/base.rs b/quickwit/quickwit-storage/src/opendal_storage/base.rs index e1e9f2eeba8..a8a5be3ac82 100644 --- a/quickwit/quickwit-storage/src/opendal_storage/base.rs +++ b/quickwit/quickwit-storage/src/opendal_storage/base.rs @@ -35,7 +35,7 @@ use crate::{ /// # TODO /// /// - Implement REQUEST_SEMAPHORE to control the concurrency. -/// - Implement STORAGE_METRICS for metrics. +/// - Implement object storage metrics. pub struct OpendalStorage { uri: Uri, op: Operator, @@ -80,7 +80,7 @@ impl Storage for OpendalStorage { } async fn put(&self, path: &Path, payload: Box) -> StorageResult<()> { - crate::STORAGE_METRICS.object_storage_put_total.increment(1); + crate::OBJECT_STORAGE_PUT_TOTAL.increment(1); let path = path.as_os_str().to_string_lossy(); let mut payload_reader = payload.byte_stream().await?.into_async_read(); @@ -93,9 +93,7 @@ impl Storage for OpendalStorage { .compat_write(); tokio::io::copy(&mut payload_reader, &mut storage_writer).await?; storage_writer.get_mut().close().await?; - crate::STORAGE_METRICS - .object_storage_upload_num_bytes - .increment(payload.len()); + crate::OBJECT_STORAGE_UPLOAD_NUM_BYTES.increment(payload.len()); Ok(()) } @@ -109,9 +107,7 @@ impl Storage for OpendalStorage { .await? .compat(); let num_bytes_copied = tokio::io::copy(&mut storage_reader, output).await?; - crate::STORAGE_METRICS - .object_storage_download_num_bytes - .increment(num_bytes_copied); + crate::OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.increment(num_bytes_copied); output.flush().await?; Ok(()) } @@ -123,7 +119,7 @@ impl Storage for OpendalStorage { // Unlike other object store implementations, in flight requests are // recorded before issuing the query to the object store. let _inflight_guards = object_storage_get_slice_in_flight_guards(size); - crate::STORAGE_METRICS.object_storage_get_total.increment(1); + crate::OBJECT_STORAGE_GET_TOTAL.increment(1); // `Buffer::to_bytes` is zero-copy when the underlying buffer is contiguous, and coalesces // into a single `Bytes` otherwise — avoiding the extra `Vec` round-trip `to_vec` would // perform. @@ -156,12 +152,8 @@ impl Storage for OpendalStorage { async fn delete(&self, path: &Path) -> StorageResult<()> { let path = path.as_os_str().to_string_lossy(); - crate::STORAGE_METRICS - .object_storage_delete_requests_total - .increment(1); - let _timer = crate::STORAGE_METRICS - .object_storage_delete_request_duration - .start_timer(); + crate::OBJECT_STORAGE_DELETE_REQUESTS_TOTAL.increment(1); + let _timer = crate::OBJECT_STORAGE_DELETE_REQUEST_DURATION.start_timer(); self.op.delete(&path).await?; Ok(()) } @@ -175,12 +167,8 @@ impl Storage for OpendalStorage { if storage_info.name().starts_with("sample-bucket") && storage_info.scheme() == "gcs" { let mut bulk_error = BulkDeleteError::default(); for (index, path) in paths.iter().enumerate() { - crate::STORAGE_METRICS - .object_storage_bulk_delete_requests_total - .increment(1); - let _timer = crate::STORAGE_METRICS - .object_storage_bulk_delete_request_duration - .start_timer(); + crate::OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL.increment(1); + let _timer = crate::OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION.start_timer(); let result = self.op.delete(&path.as_os_str().to_string_lossy()).await; if let Err(err) = result { let storage_error_kind = err.kind(); diff --git a/quickwit/quickwit-storage/src/split_cache/mod.rs b/quickwit/quickwit-storage/src/split_cache/mod.rs index 5979ef258d2..2eca3c1c2a7 100644 --- a/quickwit/quickwit-storage/src/split_cache/mod.rs +++ b/quickwit/quickwit-storage/src/split_cache/mod.rs @@ -217,7 +217,7 @@ impl SplitCacheBackingStorage { } fn record_hit_metrics(&self, result_opt: Option<&OwnedBytes>) { - let split_metrics = &crate::STORAGE_METRICS.searcher_split_cache.cache_metrics; + let split_metrics = &crate::SEARCHER_SPLIT_CACHE.cache_metrics; if let Some(result) = result_opt { split_metrics.hits_num_items.increment(1); split_metrics.hits_num_bytes.increment(result.len() as u64); diff --git a/quickwit/quickwit-storage/src/split_cache/split_table.rs b/quickwit/quickwit-storage/src/split_cache/split_table.rs index 943a5101acf..50e841025d4 100644 --- a/quickwit/quickwit-storage/src/split_cache/split_table.rs +++ b/quickwit/quickwit-storage/src/split_cache/split_table.rs @@ -152,23 +152,19 @@ impl SplitTable { Status::Downloading { .. } => &mut self.downloading_splits, Status::OnDisk { num_bytes } => { self.on_disk_bytes -= num_bytes; - crate::metrics::STORAGE_METRICS - .searcher_split_cache + crate::metrics::SEARCHER_SPLIT_CACHE .cache_metrics .in_cache_count .decrement(1.0); - crate::metrics::STORAGE_METRICS - .searcher_split_cache + crate::metrics::SEARCHER_SPLIT_CACHE .cache_metrics .in_cache_num_bytes .decrement(num_bytes as f64); - crate::metrics::STORAGE_METRICS - .searcher_split_cache + crate::metrics::SEARCHER_SPLIT_CACHE .cache_metrics .evict_num_items .increment(1); - crate::metrics::STORAGE_METRICS - .searcher_split_cache + crate::metrics::SEARCHER_SPLIT_CACHE .cache_metrics .evict_num_bytes .increment(num_bytes); @@ -220,13 +216,11 @@ impl SplitTable { Status::Downloading { .. } => self.downloading_splits.insert(split_info.split_key), Status::OnDisk { num_bytes } => { self.on_disk_bytes += num_bytes; - crate::metrics::STORAGE_METRICS - .searcher_split_cache + crate::metrics::SEARCHER_SPLIT_CACHE .cache_metrics .in_cache_count .increment(1.0); - crate::metrics::STORAGE_METRICS - .searcher_split_cache + crate::metrics::SEARCHER_SPLIT_CACHE .cache_metrics .in_cache_num_bytes .increment(num_bytes as f64); diff --git a/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs b/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs index a51a74a04e9..1841c0b0e0e 100644 --- a/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs +++ b/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs @@ -97,12 +97,11 @@ impl Storage for TimeoutAndRetryStorage { // TODO test avoid aborting timed out requests. #5468 match tokio::time::timeout(timeout_duration, get_slice_fut).await { Ok(result) => { - crate::STORAGE_METRICS - .get_slice_timeout_successes - .get(attempt_id) - .or(crate::STORAGE_METRICS.get_slice_timeout_successes.last()) - .unwrap() - .increment(1); + match attempt_id { + 0 => crate::GET_SLICE_TIMEOUT_SUCCESS_AFTER_0_TIMEOUT.increment(1), + 1 => crate::GET_SLICE_TIMEOUT_SUCCESS_AFTER_1_TIMEOUT.increment(1), + _ => crate::GET_SLICE_TIMEOUT_SUCCESS_AFTER_2_PLUS_TIMEOUT.increment(1), + } return result; } Err(_elapsed) => { @@ -112,9 +111,7 @@ impl Storage for TimeoutAndRetryStorage { } } rate_limited_warn!(limit_per_min=60, num_bytes=num_bytes, path=%path.display(), "all get_slice attempts timeouted"); - crate::STORAGE_METRICS - .get_slice_timeout_all_timeouts - .increment(1); + crate::GET_SLICE_TIMEOUT_ALL_TIMEOUTS.increment(1); return Err( StorageErrorKind::Timeout.with_error(anyhow::anyhow!("internal timeout on get_slice")) ); From 81dabc866bf505550989fccee7052ddef576b2d2 Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Sat, 2 May 2026 14:09:02 +0200 Subject: [PATCH 13/54] Use reusable metric label templates --- quickwit/quickwit-common/src/io.rs | 10 +-- quickwit/quickwit-common/src/metrics.rs | 7 +- quickwit/quickwit-common/src/runtimes.rs | 13 ++-- quickwit/quickwit-common/src/thread_pool.rs | 9 ++- quickwit/quickwit-common/src/tower/metrics.rs | 39 ++++------ .../quickwit-control-plane/src/metrics.rs | 4 +- .../src/model/shard_table.rs | 10 ++- .../src/actors/doc_processor.rs | 9 +-- .../src/actors/indexing_pipeline.rs | 7 +- .../src/actors/metrics_pipeline/pipeline.rs | 7 +- quickwit/quickwit-indexing/src/metrics.rs | 7 +- .../quickwit-ingest/src/ingest_v2/metrics.rs | 6 +- .../quickwit-ingest/src/ingest_v2/router.rs | 3 +- quickwit/quickwit-ingest/src/lib.rs | 11 ++- quickwit/quickwit-jaeger/src/lib.rs | 55 ++++++-------- quickwit/quickwit-jaeger/src/metrics.rs | 6 +- quickwit/quickwit-jaeger/src/v1.rs | 13 ++-- quickwit/quickwit-jaeger/src/v2.rs | 32 ++++---- .../src/actors/delete_task_planner.rs | 3 +- .../src/actors/garbage_collector.rs | 14 ++-- quickwit/quickwit-janitor/src/metrics.rs | 8 +- .../quickwit-lambda-client/src/invoker.rs | 5 +- .../quickwit-lambda-client/src/metrics.rs | 4 +- .../quickwit-opentelemetry/src/otlp/logs.rs | 51 +++++++------ .../src/otlp/metrics.rs | 7 +- .../src/otlp/otel_metrics.rs | 51 +++++++------ .../quickwit-opentelemetry/src/otlp/traces.rs | 51 +++++++------ quickwit/quickwit-search/src/metrics.rs | 5 +- .../quickwit-search/src/metrics_trackers.rs | 14 ++-- .../quickwit-search/src/search_job_placer.rs | 7 +- quickwit/quickwit-serve/src/load_shield.rs | 11 +-- quickwit/quickwit-serve/src/metrics.rs | 5 +- quickwit/quickwit-serve/src/rest.rs | 11 +-- quickwit/quickwit-storage/src/metrics.rs | 76 ++++++------------- .../src/object_storage/error.rs | 7 +- 35 files changed, 294 insertions(+), 284 deletions(-) diff --git a/quickwit/quickwit-common/src/io.rs b/quickwit/quickwit-common/src/io.rs index 195fd205a52..240f77c4203 100644 --- a/quickwit/quickwit-common/src/io.rs +++ b/quickwit/quickwit-common/src/io.rs @@ -34,7 +34,7 @@ use async_speed_limit::clock::StandardClock; use async_speed_limit::limiter::Consume; use bytesize::ByteSize; use pin_project::pin_project; -use quickwit_metrics::{Counter, counter}; +use quickwit_metrics::{Counter, Labels, counter}; use tokio::io::AsyncWrite; use crate::{KillSwitch, Progress, ProtectedZoneGuard}; @@ -56,6 +56,8 @@ static WRITE_BYTES: LazyLock = LazyLock::new(|| { ) }); +const COMPONENT_LABELS: Labels<1> = Labels::new(["component"]); + /// Parameter used in `async_speed_limit`. /// /// The default value is good and does not need to be tweaked. @@ -118,10 +120,8 @@ impl IoControls { } pub fn set_component(mut self, component: &str) -> Self { - self.bytes_counter = counter!( - parent: &*WRITE_BYTES, - "component" => component.to_string(), - ); + let labels = COMPONENT_LABELS.with_values([component.to_string()]); + self.bytes_counter = counter!(parent: &*WRITE_BYTES, labels: &labels); self } diff --git a/quickwit/quickwit-common/src/metrics.rs b/quickwit/quickwit-common/src/metrics.rs index eb90d64a5d2..53a06a07dd1 100644 --- a/quickwit/quickwit-common/src/metrics.rs +++ b/quickwit/quickwit-common/src/metrics.rs @@ -19,7 +19,7 @@ use std::time::Duration; use metrics_exporter_prometheus::PrometheusHandle; pub use prometheus::{exponential_buckets, linear_buckets}; -use quickwit_metrics::{Gauge, gauge}; +use quickwit_metrics::{Gauge, Labels, gauge}; const SYSTEM: &str = "quickwit"; @@ -117,6 +117,8 @@ static IN_FLIGHT_DATA_BYTES: LazyLock = LazyLock::new(|| { ) }); +const COMPONENT_LABELS: Labels<1> = Labels::new(["component"]); + pub static IN_FLIGHT_REST_SERVER: LazyLock = LazyLock::new(|| in_flight_data_gauge("rest_server")); @@ -168,7 +170,8 @@ pub static IN_FLIGHT_OTHER_SOURCE: LazyLock = LazyLock::new(|| in_flight_data_gauge("pulsar_source")); fn in_flight_data_gauge(component: &'static str) -> Gauge { - gauge!(parent: &*IN_FLIGHT_DATA_BYTES, "component" => component) + let labels = COMPONENT_LABELS.with_values([component]); + gauge!(parent: &*IN_FLIGHT_DATA_BYTES, labels: &labels) } fn metric_key_name(subsystem: &str, name: &str) -> String { diff --git a/quickwit/quickwit-common/src/runtimes.rs b/quickwit/quickwit-common/src/runtimes.rs index 98fb33975de..21f0ae9015c 100644 --- a/quickwit/quickwit-common/src/runtimes.rs +++ b/quickwit/quickwit-common/src/runtimes.rs @@ -17,7 +17,7 @@ use std::sync::OnceLock; use std::sync::atomic::{AtomicUsize, Ordering}; use std::time::Duration; -use quickwit_metrics::{Counter, Gauge, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, Labels, counter, gauge}; use tokio::runtime::Runtime; use tokio_metrics::{RuntimeMetrics, RuntimeMonitor}; @@ -56,6 +56,8 @@ static TOKIO_WORKER_THREADS: std::sync::LazyLock = std::sync::LazyLock::n ) }); +const RUNTIME_TYPE_LABELS: Labels<1> = Labels::new(["runtime_type"]); + /// Describes which runtime an actor should run on. #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] pub enum RuntimeType { @@ -214,14 +216,15 @@ struct RuntimeMetricsRecorder { impl RuntimeMetricsRecorder { pub fn new(label: &'static str) -> Self { + let labels = RUNTIME_TYPE_LABELS.with_values([label]); Self { - scheduled_tasks: gauge!(parent: &*TOKIO_SCHEDULED_TASKS, "runtime_type" => label), + scheduled_tasks: gauge!(parent: &*TOKIO_SCHEDULED_TASKS, labels: &labels), worker_busy_duration_milliseconds_total: counter!( parent: &*TOKIO_WORKER_BUSY_DURATION_MILLISECONDS_TOTAL, - "runtime_type" => label, + labels: &labels, ), - worker_busy_ratio: gauge!(parent: &*TOKIO_WORKER_BUSY_RATIO, "runtime_type" => label), - worker_threads: gauge!(parent: &*TOKIO_WORKER_THREADS, "runtime_type" => label), + worker_busy_ratio: gauge!(parent: &*TOKIO_WORKER_BUSY_RATIO, labels: &labels), + worker_threads: gauge!(parent: &*TOKIO_WORKER_THREADS, labels: &labels), } } diff --git a/quickwit/quickwit-common/src/thread_pool.rs b/quickwit/quickwit-common/src/thread_pool.rs index fef231b91b7..46e610acfc7 100644 --- a/quickwit/quickwit-common/src/thread_pool.rs +++ b/quickwit/quickwit-common/src/thread_pool.rs @@ -16,7 +16,7 @@ use std::fmt; use std::sync::Arc; use futures::{Future, TryFutureExt}; -use quickwit_metrics::{Gauge, GaugeGuard, gauge}; +use quickwit_metrics::{Gauge, GaugeGuard, Labels, gauge}; use tokio::sync::oneshot; use tracing::error; @@ -36,6 +36,8 @@ static THREAD_POOL_PENDING_TASKS: std::sync::LazyLock = std::sync::LazyLo ) }); +const THREAD_POOL_LABELS: Labels<1> = Labels::new(["pool"]); + /// An executor backed by a thread pool to run CPU-intensive tasks. /// /// tokio::spawn_blocking should only used for IO-bound tasks, as it has not limit on its @@ -60,8 +62,9 @@ impl ThreadPool { let thread_pool = rayon_pool_builder .build() .expect("failed to spawn thread pool"); - let ongoing_tasks = gauge!(parent: &*THREAD_POOL_ONGOING_TASKS, "pool" => name); - let pending_tasks = gauge!(parent: &*THREAD_POOL_PENDING_TASKS, "pool" => name); + let labels = THREAD_POOL_LABELS.with_values([name]); + let ongoing_tasks = gauge!(parent: &*THREAD_POOL_ONGOING_TASKS, labels: &labels); + let pending_tasks = gauge!(parent: &*THREAD_POOL_PENDING_TASKS, labels: &labels); ThreadPool { thread_pool: Arc::new(thread_pool), ongoing_tasks, diff --git a/quickwit/quickwit-common/src/tower/metrics.rs b/quickwit/quickwit-common/src/tower/metrics.rs index 77607d2c5dc..a66676e23a0 100644 --- a/quickwit/quickwit-common/src/tower/metrics.rs +++ b/quickwit/quickwit-common/src/tower/metrics.rs @@ -19,7 +19,7 @@ use std::time::Instant; use futures::{Future, ready}; use pin_project::{pin_project, pinned_drop}; -use quickwit_metrics::{Counter, Gauge, Histogram, counter, gauge, histogram}; +use quickwit_metrics::{Counter, Gauge, Histogram, Labels, counter, gauge, histogram}; use tower::{Layer, Service}; use crate::metrics::exponential_buckets; @@ -53,6 +53,10 @@ static GRPC_REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { ) }); +const GRPC_SERVICE_LABELS: Labels<2> = Labels::new(["service", "kind"]); +const GRPC_RPC_LABELS: Labels<1> = Labels::new(["rpc"]); +const GRPC_RPC_STATUS_LABELS: Labels<2> = Labels::new(["rpc", "status"]); + #[derive(Clone)] pub struct GrpcMetrics { inner: S, @@ -79,7 +83,8 @@ where let rpc_name = R::rpc_name(); let inner = self.inner.call(request); - gauge!(parent: &self.requests_in_flight, "rpc" => rpc_name).increment(1.0); + let labels = GRPC_RPC_LABELS.with_values([rpc_name]); + gauge!(parent: &self.requests_in_flight, labels: &labels).increment(1.0); ResponseFuture { inner, @@ -102,21 +107,13 @@ pub struct GrpcMetricsLayer { impl GrpcMetricsLayer { pub fn new(subsystem: &'static str, kind: &'static str) -> Self { + let labels = GRPC_SERVICE_LABELS.with_values([subsystem, kind]); Self { - requests_total: counter!( - parent: &*GRPC_REQUESTS_TOTAL, - "service" => subsystem, - "kind" => kind, - ), - requests_in_flight: gauge!( - parent: &*GRPC_REQUESTS_IN_FLIGHT, - "service" => subsystem, - "kind" => kind, - ), + requests_total: counter!(parent: &*GRPC_REQUESTS_TOTAL, labels: &labels), + requests_in_flight: gauge!(parent: &*GRPC_REQUESTS_IN_FLIGHT, labels: &labels), request_duration_seconds: histogram!( parent: &*GRPC_REQUEST_DURATION_SECONDS, - "service" => subsystem, - "kind" => kind, + labels: &labels, ), } } @@ -152,19 +149,15 @@ pub struct ResponseFuture { impl PinnedDrop for ResponseFuture { fn drop(self: Pin<&mut Self>) { let elapsed = self.start.elapsed().as_secs_f64(); - counter!( - parent: &self.requests_total, - "rpc" => self.rpc_name, - "status" => self.status, - ) - .increment(1); + let rpc_status_labels = GRPC_RPC_STATUS_LABELS.with_values([self.rpc_name, self.status]); + counter!(parent: &self.requests_total, labels: &rpc_status_labels).increment(1); histogram!( parent: &self.request_duration_seconds, - "rpc" => self.rpc_name, - "status" => self.status, + labels: &rpc_status_labels, ) .record(elapsed); - gauge!(parent: &self.requests_in_flight, "rpc" => self.rpc_name).decrement(1.0); + let rpc_labels = GRPC_RPC_LABELS.with_values([self.rpc_name]); + gauge!(parent: &self.requests_in_flight, labels: &rpc_labels).decrement(1.0); } } diff --git a/quickwit/quickwit-control-plane/src/metrics.rs b/quickwit/quickwit-control-plane/src/metrics.rs index c22d12f83b1..c0b35b3426a 100644 --- a/quickwit/quickwit-control-plane/src/metrics.rs +++ b/quickwit/quickwit-control-plane/src/metrics.rs @@ -14,7 +14,7 @@ use std::sync::LazyLock; -use quickwit_metrics::{Counter, Gauge, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, Labels, counter, gauge}; #[derive(Debug, Clone, Copy)] pub struct ShardLocalityMetrics { @@ -49,6 +49,8 @@ pub(crate) static OPEN_SHARDS: LazyLock = pub(crate) static CLOSED_SHARDS: LazyLock = LazyLock::new(|| gauge!(parent: &*SHARDS, "state" => "closed")); +pub(crate) const INDEX_ID_LABELS: Labels<1> = Labels::new(["index_id"]); + static INDEXED_SHARDS: LazyLock = LazyLock::new(|| { gauge!( name: "indexed_shards", diff --git a/quickwit/quickwit-control-plane/src/model/shard_table.rs b/quickwit/quickwit-control-plane/src/model/shard_table.rs index 960e6d258d7..aafa344c17d 100644 --- a/quickwit/quickwit-control-plane/src/model/shard_table.rs +++ b/quickwit/quickwit-control-plane/src/model/shard_table.rs @@ -461,14 +461,15 @@ impl ShardTable { // can update the metrics for this specific index. if index_label == index_id { let shard_stats = table_entry.shards_stats(); + let labels = crate::metrics::INDEX_ID_LABELS.with_values([index_label.to_string()]); quickwit_metrics::gauge!( parent: &crate::metrics::OPEN_SHARDS, - "index_id" => index_label.to_string(), + labels: &labels, ) .set(shard_stats.num_open_shards as f64); quickwit_metrics::gauge!( parent: &crate::metrics::CLOSED_SHARDS, - "index_id" => index_label.to_string(), + labels: &labels, ) .set(shard_stats.num_closed_shards as f64); return; @@ -484,14 +485,15 @@ impl ShardTable { num_closed_shards += 1; } } + let labels = crate::metrics::INDEX_ID_LABELS.with_values([index_label.to_string()]); quickwit_metrics::gauge!( parent: &crate::metrics::OPEN_SHARDS, - "index_id" => index_label.to_string(), + labels: &labels, ) .set(num_open_shards as f64); quickwit_metrics::gauge!( parent: &crate::metrics::CLOSED_SHARDS, - "index_id" => index_label.to_string(), + labels: &labels, ) .set(num_closed_shards as f64); } diff --git a/quickwit/quickwit-indexing/src/actors/doc_processor.rs b/quickwit/quickwit-indexing/src/actors/doc_processor.rs index 5b7f3573d12..d0320023544 100644 --- a/quickwit/quickwit-indexing/src/actors/doc_processor.rs +++ b/quickwit/quickwit-indexing/src/actors/doc_processor.rs @@ -45,7 +45,6 @@ use crate::models::{ }; const PLAIN_TEXT: &str = "plain_text"; - pub(super) struct JsonDoc { json_obj: JsonObject, num_bytes: usize, @@ -284,17 +283,17 @@ impl Serialize for DocProcessorCounter { impl DocProcessorCounter { fn for_index_and_doc_processor_outcome(index: &str, outcome: &str) -> DocProcessorCounter { let index_label = quickwit_common::metrics::index_label(index); + let labels = crate::metrics::INDEX_DOCS_PROCESSED_STATUS_LABELS + .with_values([index_label.to_string(), outcome.to_string()]); DocProcessorCounter { num_docs: Default::default(), num_docs_metric: counter!( parent: &crate::metrics::PROCESSED_DOCS_TOTAL, - "index" => index_label.to_string(), - "docs_processed_status" => outcome.to_string(), + labels: &labels, ), num_bytes_metric: counter!( parent: &crate::metrics::PROCESSED_BYTES, - "index" => index_label.to_string(), - "docs_processed_status" => outcome.to_string(), + labels: &labels, ), } } diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index c89b0d89488..c12a8fa8a76 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -123,9 +123,14 @@ impl Actor for IndexingPipeline { impl IndexingPipeline { pub fn new(params: IndexingPipelineParams) -> Self { + let labels = crate::metrics::INDEX_LABELS.with_values([params + .pipeline_id + .index_uid + .index_id + .clone()]); let indexing_pipelines_gauge = gauge!( parent: &crate::metrics::INDEXING_PIPELINES, - "index" => params.pipeline_id.index_uid.index_id.clone(), + labels: &labels, ); let mut indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); indexing_pipelines_gauge_guard.increment(1.0); diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs index 2a8ef6b213c..f6a3dafba7f 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs @@ -144,9 +144,14 @@ impl Actor for MetricsPipeline { impl MetricsPipeline { pub fn new(params: MetricsPipelineParams) -> Self { + let labels = crate::metrics::INDEX_LABELS.with_values([params + .pipeline_id + .index_uid + .index_id + .clone()]); let indexing_pipelines_gauge = gauge!( parent: &crate::metrics::INDEXING_PIPELINES, - "index" => params.pipeline_id.index_uid.index_id.clone(), + labels: &labels, ); let mut indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); indexing_pipelines_gauge_guard.increment(1.0); diff --git a/quickwit/quickwit-indexing/src/metrics.rs b/quickwit/quickwit-indexing/src/metrics.rs index 94c109c8fe5..699c2568e19 100644 --- a/quickwit/quickwit-indexing/src/metrics.rs +++ b/quickwit/quickwit-indexing/src/metrics.rs @@ -14,7 +14,10 @@ use std::sync::LazyLock; -use quickwit_metrics::{Counter, Gauge, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, Labels, counter, gauge}; + +pub(crate) const INDEX_DOCS_PROCESSED_STATUS_LABELS: Labels<2> = + Labels::new(["index", "docs_processed_status"]); pub(crate) static PROCESSED_DOCS_TOTAL: LazyLock = LazyLock::new(|| { counter!( @@ -40,6 +43,8 @@ pub(crate) static INDEXING_PIPELINES: LazyLock = LazyLock::new(|| { ) }); +pub(crate) const INDEX_LABELS: Labels<1> = Labels::new(["index"]); + pub(crate) static BACKPRESSURE_MICROS: LazyLock = LazyLock::new(|| { counter!( name: "backpressure_micros", diff --git a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs index 84bb10ab290..4841f835e0a 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs @@ -16,7 +16,7 @@ use std::sync::LazyLock; use mrecordlog::ResourceUsage; use quickwit_common::metrics::{exponential_buckets, linear_buckets}; -use quickwit_metrics::{Counter, Gauge, Histogram, counter, gauge, histogram}; +use quickwit_metrics::{Counter, Gauge, Histogram, Labels, counter, gauge, histogram}; static INGEST_RESULT_TOTAL: LazyLock = LazyLock::new(|| { counter!( @@ -79,6 +79,8 @@ pub(super) static INGEST_ATTEMPTS: LazyLock = LazyLock::new(|| { ) }); +pub(super) const AZ_ROUTING_LABELS: Labels<1> = Labels::new(["az_routing"]); + pub(super) static RESET_SHARDS_OPERATIONS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "reset_shards_operations_total", @@ -137,6 +139,8 @@ pub(super) static WAL_ACQUIRE_LOCK_REQUEST_DURATION_SECS: LazyLock = ) }); +pub(super) const WAL_LOCK_METRIC_LABELS: Labels<2> = Labels::new(["operation", "type"]); + pub(super) static WAL_DISK_USED_BYTES: LazyLock = LazyLock::new(|| { gauge!( name: "wal_disk_used_bytes", diff --git a/quickwit/quickwit-ingest/src/ingest_v2/router.rs b/quickwit/quickwit-ingest/src/ingest_v2/router.rs index b7abd2e3c3a..fb03c76609d 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/router.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/router.rs @@ -369,9 +369,10 @@ impl IngestRouter { let az_locality = state_guard .routing_table .classify_az_locality(&ingester_node.node_id, &self.ingester_pool); + let labels = crate::ingest_v2::metrics::AZ_ROUTING_LABELS.with_values([az_locality]); counter!( parent: &crate::ingest_v2::metrics::INGEST_ATTEMPTS, - "az_routing" => az_locality, + labels: &labels, ) .increment(1); let persist_subrequest = PersistSubrequest { diff --git a/quickwit/quickwit-ingest/src/lib.rs b/quickwit/quickwit-ingest/src/lib.rs index 785e549d62e..07e96f75731 100644 --- a/quickwit/quickwit-ingest/src/lib.rs +++ b/quickwit/quickwit-ingest/src/lib.rs @@ -109,10 +109,11 @@ pub async fn start_ingest_api_service( macro_rules! with_lock_metrics { ($future:expr, $operation:expr, $kind:expr) => { { + let labels = + $crate::ingest_v2::metrics::WAL_LOCK_METRIC_LABELS.with_values([$operation, $kind]); quickwit_metrics::gauge!( parent: &$crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT, - "operation" => $operation, - "type" => $kind, + labels: &labels, ) .increment(1.0); @@ -128,14 +129,12 @@ macro_rules! with_lock_metrics { } quickwit_metrics::gauge!( parent: &$crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT, - "operation" => $operation, - "type" => $kind, + labels: &labels, ) .decrement(1.0); quickwit_metrics::histogram!( parent: &$crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUEST_DURATION_SECS, - "operation" => $operation, - "type" => $kind, + labels: &labels, ) .record(elapsed.as_secs_f64()); diff --git a/quickwit/quickwit-jaeger/src/lib.rs b/quickwit/quickwit-jaeger/src/lib.rs index 3e986e54af1..caac5c4c2e9 100644 --- a/quickwit/quickwit-jaeger/src/lib.rs +++ b/quickwit/quickwit-jaeger/src/lib.rs @@ -414,19 +414,20 @@ impl JaegerService { current_span.record("num_spans", num_spans_total); current_span.record("num_bytes", num_bytes_total); - counter!( - parent: &crate::metrics::FETCHED_TRACES_TOTAL, - "operation" => operation_name, - "index" => OTEL_TRACES_INDEX_ID, - ) - .increment(num_traces); + let labels = crate::metrics::OPERATION_INDEX_LABELS + .with_values([operation_name, OTEL_TRACES_INDEX_ID]); + counter!(parent: &crate::metrics::FETCHED_TRACES_TOTAL, labels: &labels) + .increment(num_traces); let elapsed = request_start.elapsed().as_secs_f64(); + let duration_labels = crate::metrics::OPERATION_INDEX_ERROR_LABELS.with_values([ + operation_name, + OTEL_TRACES_INDEX_ID, + "false", + ]); histogram!( parent: &crate::metrics::REQUEST_DURATION_SECONDS, - "operation" => operation_name, - "index" => OTEL_TRACES_INDEX_ID, - "error" => "false", + labels: &duration_labels, ) .record(elapsed); }); @@ -435,36 +436,30 @@ impl JaegerService { } pub(crate) fn record_error(operation_name: &'static str, request_start: Instant) { - counter!( - parent: &crate::metrics::REQUEST_ERRORS_TOTAL, - "operation" => operation_name, - "index" => OTEL_TRACES_INDEX_ID, - ) - .increment(1); + let labels = + crate::metrics::OPERATION_INDEX_LABELS.with_values([operation_name, OTEL_TRACES_INDEX_ID]); + counter!(parent: &crate::metrics::REQUEST_ERRORS_TOTAL, labels: &labels).increment(1); let elapsed = request_start.elapsed().as_secs_f64(); + let duration_labels = crate::metrics::OPERATION_INDEX_ERROR_LABELS.with_values([ + operation_name, + OTEL_TRACES_INDEX_ID, + "true", + ]); histogram!( parent: &crate::metrics::REQUEST_DURATION_SECONDS, - "operation" => operation_name, - "index" => OTEL_TRACES_INDEX_ID, - "error" => "true", + labels: &duration_labels, ) .record(elapsed); } pub(crate) fn record_send(operation_name: &'static str, num_spans: usize, num_bytes: usize) { - counter!( - parent: &crate::metrics::FETCHED_SPANS_TOTAL, - "operation" => operation_name, - "index" => OTEL_TRACES_INDEX_ID, - ) - .increment(num_spans as u64); - counter!( - parent: &crate::metrics::TRANSFERRED_BYTES_TOTAL, - "operation" => operation_name, - "index" => OTEL_TRACES_INDEX_ID, - ) - .increment(num_bytes as u64); + let labels = + crate::metrics::OPERATION_INDEX_LABELS.with_values([operation_name, OTEL_TRACES_INDEX_ID]); + counter!(parent: &crate::metrics::FETCHED_SPANS_TOTAL, labels: &labels) + .increment(num_spans as u64); + counter!(parent: &crate::metrics::TRANSFERRED_BYTES_TOTAL, labels: &labels) + .increment(num_bytes as u64); } #[allow(deprecated)] diff --git a/quickwit/quickwit-jaeger/src/metrics.rs b/quickwit/quickwit-jaeger/src/metrics.rs index e5065e60529..4a53c75f41e 100644 --- a/quickwit/quickwit-jaeger/src/metrics.rs +++ b/quickwit/quickwit-jaeger/src/metrics.rs @@ -15,7 +15,11 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; -use quickwit_metrics::{Counter, Histogram, counter, histogram}; +use quickwit_metrics::{Counter, Histogram, Labels, counter, histogram}; + +pub(crate) const OPERATION_INDEX_LABELS: Labels<2> = Labels::new(["operation", "index"]); +pub(crate) const OPERATION_INDEX_ERROR_LABELS: Labels<3> = + Labels::new(["operation", "index", "error"]); pub(crate) static REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( diff --git a/quickwit/quickwit-jaeger/src/v1.rs b/quickwit/quickwit-jaeger/src/v1.rs index 8e4708688bb..dff0b08e26b 100644 --- a/quickwit/quickwit-jaeger/src/v1.rs +++ b/quickwit/quickwit-jaeger/src/v1.rs @@ -35,10 +35,10 @@ macro_rules! metrics { let start = std::time::Instant::now(); let operation = stringify!($operation); let index = $index; + let labels = crate::metrics::OPERATION_INDEX_LABELS.with_values([operation, index]); counter!( parent: &crate::metrics::REQUESTS_TOTAL, - "operation" => operation, - "index" => index, + labels: &labels, ) .increment(1); let (res, is_error) = match $expr { @@ -48,19 +48,18 @@ macro_rules! metrics { err @ Err(_) => { counter!( parent: &crate::metrics::REQUEST_ERRORS_TOTAL, - "operation" => operation, - "index" => index, + labels: &labels, ) .increment(1); (err, "true") }, }; let elapsed = start.elapsed().as_secs_f64(); + let duration_labels = + crate::metrics::OPERATION_INDEX_ERROR_LABELS.with_values([operation, index, is_error]); histogram!( parent: &crate::metrics::REQUEST_DURATION_SECONDS, - "operation" => operation, - "index" => index, - "error" => is_error, + labels: &duration_labels, ) .record(elapsed); diff --git a/quickwit/quickwit-jaeger/src/v2.rs b/quickwit/quickwit-jaeger/src/v2.rs index f62a58ea29f..4e8f4cf8532 100644 --- a/quickwit/quickwit-jaeger/src/v2.rs +++ b/quickwit/quickwit-jaeger/src/v2.rs @@ -61,10 +61,10 @@ macro_rules! metrics { let start = std::time::Instant::now(); let operation = stringify!($operation); let index = $index; + let labels = crate::metrics::OPERATION_INDEX_LABELS.with_values([operation, index]); counter!( parent: &crate::metrics::REQUESTS_TOTAL, - "operation" => operation, - "index" => index, + labels: &labels, ) .increment(1); let (res, is_error) = match $expr { @@ -74,19 +74,18 @@ macro_rules! metrics { err @ Err(_) => { counter!( parent: &crate::metrics::REQUEST_ERRORS_TOTAL, - "operation" => operation, - "index" => index, + labels: &labels, ) .increment(1); (err, "true") }, }; let elapsed = start.elapsed().as_secs_f64(); + let duration_labels = + crate::metrics::OPERATION_INDEX_ERROR_LABELS.with_values([operation, index, is_error]); histogram!( parent: &crate::metrics::REQUEST_DURATION_SECONDS, - "operation" => operation, - "index" => index, - "error" => is_error, + labels: &duration_labels, ) .record(elapsed); @@ -442,19 +441,20 @@ async fn stream_otel_spans_impl( record_send(operation_name, num_spans, num_bytes); - counter!( - parent: &crate::metrics::FETCHED_TRACES_TOTAL, - "operation" => operation_name, - "index" => OTEL_TRACES_INDEX_ID, - ) - .increment(trace_ids.len() as u64); + let labels = + crate::metrics::OPERATION_INDEX_LABELS.with_values([operation_name, OTEL_TRACES_INDEX_ID]); + counter!(parent: &crate::metrics::FETCHED_TRACES_TOTAL, labels: &labels) + .increment(trace_ids.len() as u64); let elapsed = request_start.elapsed().as_secs_f64(); + let duration_labels = crate::metrics::OPERATION_INDEX_ERROR_LABELS.with_values([ + operation_name, + OTEL_TRACES_INDEX_ID, + "false", + ]); histogram!( parent: &crate::metrics::REQUEST_DURATION_SECONDS, - "operation" => operation_name, - "index" => OTEL_TRACES_INDEX_ID, - "error" => "false", + labels: &duration_labels, ) .record(elapsed); diff --git a/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs b/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs index b25b06b4a68..b7b17845c57 100644 --- a/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs +++ b/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs @@ -206,9 +206,10 @@ impl DeleteTaskPlanner { let index_label = quickwit_common::metrics::index_label(self.index_uid.index_id.as_str()) .to_string(); + let labels = crate::metrics::INDEX_LABELS.with_values([index_label]); gauge!( parent: &crate::metrics::ONGOING_NUM_DELETE_OPERATIONS_TOTAL, - "index" => index_label, + labels: &labels, ) .set(self.ongoing_delete_operations_inventory.list().len() as f64); } diff --git a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs index 9c3a9914815..7f0df9d89f6 100644 --- a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs +++ b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::borrow::Cow; use std::collections::HashMap; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -55,20 +56,23 @@ impl GcRunResult { fn gc_metrics(split_type: &str) -> GcMetrics { let split_type = split_type.to_string(); + let success_labels = crate::metrics::GC_RESULT_SPLIT_TYPE_LABELS + .with_values([Cow::Borrowed("success"), Cow::Owned(split_type.clone())]); + let split_type_labels = crate::metrics::GC_SPLIT_TYPE_LABELS.with_values([split_type.clone()]); + let error_labels = crate::metrics::GC_RESULT_SPLIT_TYPE_LABELS + .with_values([Cow::Borrowed("error"), Cow::Owned(split_type)]); GcMetrics { deleted_splits: counter!( parent: &crate::metrics::GC_DELETED_SPLITS, - "result" => "success", - "split_type" => split_type.clone(), + labels: &success_labels, ), deleted_bytes: counter!( parent: &crate::metrics::GC_DELETED_BYTES, - "split_type" => split_type.clone(), + labels: &split_type_labels, ), failed_splits: counter!( parent: &crate::metrics::GC_DELETED_SPLITS, - "result" => "error", - "split_type" => split_type, + labels: &error_labels, ), } } diff --git a/quickwit/quickwit-janitor/src/metrics.rs b/quickwit/quickwit-janitor/src/metrics.rs index 1e3adc48c83..a6e33562dcb 100644 --- a/quickwit/quickwit-janitor/src/metrics.rs +++ b/quickwit/quickwit-janitor/src/metrics.rs @@ -14,7 +14,7 @@ use std::sync::LazyLock; -use quickwit_metrics::{Counter, Gauge, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, Labels, counter, gauge}; pub(crate) static ONGOING_NUM_DELETE_OPERATIONS_TOTAL: LazyLock = LazyLock::new(|| { gauge!( @@ -24,6 +24,8 @@ pub(crate) static ONGOING_NUM_DELETE_OPERATIONS_TOTAL: LazyLock = LazyLoc ) }); +pub(crate) const INDEX_LABELS: Labels<1> = Labels::new(["index"]); + pub(crate) static GC_DELETED_SPLITS: LazyLock = LazyLock::new(|| { counter!( name: "gc_deleted_splits_total", @@ -32,6 +34,8 @@ pub(crate) static GC_DELETED_SPLITS: LazyLock = LazyLock::new(|| { ) }); +pub(crate) const GC_RESULT_SPLIT_TYPE_LABELS: Labels<2> = Labels::new(["result", "split_type"]); + pub(crate) static GC_DELETED_BYTES: LazyLock = LazyLock::new(|| { counter!( name: "gc_deleted_bytes_total", @@ -40,6 +44,8 @@ pub(crate) static GC_DELETED_BYTES: LazyLock = LazyLock::new(|| { ) }); +pub(crate) const GC_SPLIT_TYPE_LABELS: Labels<1> = Labels::new(["split_type"]); + pub(crate) static GC_RUNS: LazyLock = LazyLock::new(|| { counter!( name: "gc_runs_total", diff --git a/quickwit/quickwit-lambda-client/src/invoker.rs b/quickwit/quickwit-lambda-client/src/invoker.rs index 6d5d5720a69..6b67431fdc5 100644 --- a/quickwit/quickwit-lambda-client/src/invoker.rs +++ b/quickwit/quickwit-lambda-client/src/invoker.rs @@ -170,14 +170,15 @@ impl LambdaLeafSearchInvoker for AwsLambdaInvoker { let result = self.invoke_leaf_search_with_retry(request).await; let elapsed = start.elapsed().as_secs_f64(); let status = if result.is_ok() { "success" } else { "error" }; + let labels = crate::metrics::STATUS_LABELS.with_values([status]); counter!( parent: &crate::metrics::LEAF_SEARCH_REQUESTS_TOTAL, - "status" => status, + labels: &labels, ) .increment(1); histogram!( parent: &crate::metrics::LEAF_SEARCH_DURATION_SECONDS, - "status" => status, + labels: &labels, ) .record(elapsed); result diff --git a/quickwit/quickwit-lambda-client/src/metrics.rs b/quickwit/quickwit-lambda-client/src/metrics.rs index 75e56577816..dd2ff87f189 100644 --- a/quickwit/quickwit-lambda-client/src/metrics.rs +++ b/quickwit/quickwit-lambda-client/src/metrics.rs @@ -17,7 +17,9 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; -use quickwit_metrics::{Counter, Histogram, counter, histogram}; +use quickwit_metrics::{Counter, Histogram, Labels, counter, histogram}; + +pub(crate) const STATUS_LABELS: Labels<1> = Labels::new(["status"]); /// From 100ms to 73s seconds fn duration_buckets() -> Vec { diff --git a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs index 9facb3ff0ff..b452cc04267 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::borrow::Cow; use std::collections::HashMap; use async_trait::async_trait; @@ -239,22 +240,19 @@ impl OtlpGrpcLogsService { let num_bytes = doc_batch.num_bytes() as u64; self.store_logs(index_id.clone(), doc_batch).await?; + let labels = crate::otlp::metrics::OTLP_GRPC_LABELS.with_values([ + Cow::Borrowed("logs"), + Cow::Owned(index_id), + Cow::Borrowed("grpc"), + Cow::Borrowed("protobuf"), + ]); counter!( parent: &crate::otlp::metrics::INGESTED_LOG_RECORDS_TOTAL, - "service" => "logs", - "index" => index_id.clone(), - "transport" => "grpc", - "format" => "protobuf", + labels: &labels, ) .increment(num_log_records); - counter!( - parent: &crate::otlp::metrics::INGESTED_BYTES_TOTAL, - "service" => "logs", - "index" => index_id, - "transport" => "grpc", - "format" => "protobuf", - ) - .increment(num_bytes); + counter!(parent: &crate::otlp::metrics::INGESTED_BYTES_TOTAL, labels: &labels) + .increment(num_bytes); let response = ExportLogsServiceResponse { // `rejected_log_records=0` and `error_message=""` is consided a "full" success. @@ -325,12 +323,15 @@ impl OtlpGrpcLogsService { ) -> Result { let start = std::time::Instant::now(); + let labels = crate::otlp::metrics::OTLP_GRPC_LABELS.with_values([ + Cow::Borrowed("logs"), + Cow::Owned(index_id.clone()), + Cow::Borrowed("grpc"), + Cow::Borrowed("protobuf"), + ]); counter!( parent: &crate::otlp::metrics::REQUESTS_TOTAL, - "service" => "logs", - "index" => index_id.clone(), - "transport" => "grpc", - "format" => "protobuf", + labels: &labels, ) .increment(1); let (export_res, is_error) = match self.export_inner(request, index_id.clone()).await { @@ -338,23 +339,23 @@ impl OtlpGrpcLogsService { err @ Err(_) => { counter!( parent: &crate::otlp::metrics::REQUEST_ERRORS_TOTAL, - "service" => "logs", - "index" => index_id.clone(), - "transport" => "grpc", - "format" => "protobuf", + labels: &labels, ) .increment(1); (err, "true") } }; let elapsed = start.elapsed().as_secs_f64(); + let duration_labels = crate::otlp::metrics::OTLP_GRPC_ERROR_LABELS.with_values([ + Cow::Borrowed("logs"), + Cow::Owned(index_id), + Cow::Borrowed("grpc"), + Cow::Borrowed("protobuf"), + Cow::Borrowed(is_error), + ]); histogram!( parent: &crate::otlp::metrics::REQUEST_DURATION_SECONDS, - "service" => "logs", - "index" => index_id, - "transport" => "grpc", - "format" => "protobuf", - "error" => is_error, + labels: &duration_labels, ) .record(elapsed); diff --git a/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs index 92c34755bea..0f7c6c4d939 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs @@ -15,7 +15,12 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; -use quickwit_metrics::{Counter, Histogram, counter, histogram}; +use quickwit_metrics::{Counter, Histogram, Labels, counter, histogram}; + +pub(crate) const OTLP_GRPC_LABELS: Labels<4> = + Labels::new(["service", "index", "transport", "format"]); +pub(crate) const OTLP_GRPC_ERROR_LABELS: Labels<5> = + Labels::new(["service", "index", "transport", "format", "error"]); pub(crate) static REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( diff --git a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs index 3e6fc83c173..60f59e889e0 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::borrow::Cow; use std::collections::HashMap; use async_trait::async_trait; @@ -235,22 +236,19 @@ impl OtlpGrpcMetricsService { let num_bytes = doc_batch.num_bytes() as u64; self.store_metrics(index_id.clone(), doc_batch).await?; + let labels = crate::otlp::metrics::OTLP_GRPC_LABELS.with_values([ + Cow::Borrowed("metrics"), + Cow::Owned(index_id), + Cow::Borrowed("grpc"), + Cow::Borrowed("protobuf"), + ]); counter!( parent: &crate::otlp::metrics::INGESTED_DATA_POINTS_TOTAL, - "service" => "metrics", - "index" => index_id.clone(), - "transport" => "grpc", - "format" => "protobuf", + labels: &labels, ) .increment(num_data_points - num_parse_errors); - counter!( - parent: &crate::otlp::metrics::INGESTED_BYTES_TOTAL, - "service" => "metrics", - "index" => index_id, - "transport" => "grpc", - "format" => "protobuf", - ) - .increment(num_bytes); + counter!(parent: &crate::otlp::metrics::INGESTED_BYTES_TOTAL, labels: &labels) + .increment(num_bytes); let response = ExportMetricsServiceResponse { partial_success: Some(ExportMetricsPartialSuccess { @@ -339,12 +337,15 @@ impl OtlpGrpcMetricsService { ) -> Result { let start = std::time::Instant::now(); + let labels = crate::otlp::metrics::OTLP_GRPC_LABELS.with_values([ + Cow::Borrowed("metrics"), + Cow::Owned(index_id.clone()), + Cow::Borrowed("grpc"), + Cow::Borrowed("protobuf"), + ]); counter!( parent: &crate::otlp::metrics::REQUESTS_TOTAL, - "service" => "metrics", - "index" => index_id.clone(), - "transport" => "grpc", - "format" => "protobuf", + labels: &labels, ) .increment(1); @@ -353,10 +354,7 @@ impl OtlpGrpcMetricsService { err @ Err(_) => { counter!( parent: &crate::otlp::metrics::REQUEST_ERRORS_TOTAL, - "service" => "metrics", - "index" => index_id.clone(), - "transport" => "grpc", - "format" => "protobuf", + labels: &labels, ) .increment(1); (err, "true") @@ -364,13 +362,16 @@ impl OtlpGrpcMetricsService { }; let elapsed = start.elapsed().as_secs_f64(); + let duration_labels = crate::otlp::metrics::OTLP_GRPC_ERROR_LABELS.with_values([ + Cow::Borrowed("metrics"), + Cow::Owned(index_id), + Cow::Borrowed("grpc"), + Cow::Borrowed("protobuf"), + Cow::Borrowed(is_error), + ]); histogram!( parent: &crate::otlp::metrics::REQUEST_DURATION_SECONDS, - "service" => "metrics", - "index" => index_id, - "transport" => "grpc", - "format" => "protobuf", - "error" => is_error, + labels: &duration_labels, ) .record(elapsed); diff --git a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs index d1f9be51a4e..097869df351 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::borrow::Cow; use std::cmp::PartialEq; use std::collections::HashMap; use std::str::FromStr; @@ -701,22 +702,19 @@ impl OtlpGrpcTracesService { let num_bytes = doc_batch.num_bytes() as u64; self.store_spans(index_id.clone(), doc_batch).await?; + let labels = crate::otlp::metrics::OTLP_GRPC_LABELS.with_values([ + Cow::Borrowed("trace"), + Cow::Owned(index_id), + Cow::Borrowed("grpc"), + Cow::Borrowed("protobuf"), + ]); counter!( parent: &crate::otlp::metrics::INGESTED_SPANS_TOTAL, - "service" => "trace", - "index" => index_id.clone(), - "transport" => "grpc", - "format" => "protobuf", + labels: &labels, ) .increment(num_spans); - counter!( - parent: &crate::otlp::metrics::INGESTED_BYTES_TOTAL, - "service" => "trace", - "index" => index_id, - "transport" => "grpc", - "format" => "protobuf", - ) - .increment(num_bytes); + counter!(parent: &crate::otlp::metrics::INGESTED_BYTES_TOTAL, labels: &labels) + .increment(num_bytes); let response = ExportTraceServiceResponse { // `rejected_spans=0` and `error_message=""` is considered a "full" success. @@ -787,12 +785,15 @@ impl OtlpGrpcTracesService { ) -> Result { let start = std::time::Instant::now(); + let labels = crate::otlp::metrics::OTLP_GRPC_LABELS.with_values([ + Cow::Borrowed("trace"), + Cow::Owned(index_id.clone()), + Cow::Borrowed("grpc"), + Cow::Borrowed("protobuf"), + ]); counter!( parent: &crate::otlp::metrics::REQUESTS_TOTAL, - "service" => "trace", - "index" => index_id.clone(), - "transport" => "grpc", - "format" => "protobuf", + labels: &labels, ) .increment(1); let (export_res, is_error) = match self.export_inner(request, index_id.clone()).await { @@ -800,23 +801,23 @@ impl OtlpGrpcTracesService { err @ Err(_) => { counter!( parent: &crate::otlp::metrics::REQUEST_ERRORS_TOTAL, - "service" => "trace", - "index" => index_id.clone(), - "transport" => "grpc", - "format" => "protobuf", + labels: &labels, ) .increment(1); (err, "true") } }; let elapsed = start.elapsed().as_secs_f64(); + let duration_labels = crate::otlp::metrics::OTLP_GRPC_ERROR_LABELS.with_values([ + Cow::Borrowed("trace"), + Cow::Owned(index_id), + Cow::Borrowed("grpc"), + Cow::Borrowed("protobuf"), + Cow::Borrowed(is_error), + ]); histogram!( parent: &crate::otlp::metrics::REQUEST_DURATION_SECONDS, - "service" => "trace", - "index" => index_id, - "transport" => "grpc", - "format" => "protobuf", - "error" => is_error, + labels: &duration_labels, ) .record(elapsed); diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index c811a8b0c1e..f35a323d313 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -20,7 +20,10 @@ use std::sync::{Arc, LazyLock}; use bytesize::ByteSize; use quickwit_common::metrics::{exponential_buckets, linear_buckets}; -use quickwit_metrics::{Counter, Gauge, Histogram, counter, gauge, histogram}; +use quickwit_metrics::{Counter, Gauge, Histogram, Labels, counter, gauge, histogram}; + +pub(crate) const STATUS_LABELS: Labels<1> = Labels::new(["status"]); +pub(crate) const AFFINITY_LABELS: Labels<1> = Labels::new(["affinity"]); fn print_if_not_null( field_name: &'static str, diff --git a/quickwit/quickwit-search/src/metrics_trackers.rs b/quickwit/quickwit-search/src/metrics_trackers.rs index fad66fe1c3e..032bcde5a22 100644 --- a/quickwit/quickwit-search/src/metrics_trackers.rs +++ b/quickwit/quickwit-search/src/metrics_trackers.rs @@ -69,19 +69,20 @@ impl PinnedDrop for RootSearchMetricsFuture { ) => (*num_targeted_splits, "cancelled"), }; + let labels = crate::metrics::STATUS_LABELS.with_values([status]); counter!( parent: &crate::metrics::ROOT_SEARCH_REQUESTS_TOTAL, - "status" => status, + labels: &labels, ) .increment(1); histogram!( parent: &crate::metrics::ROOT_SEARCH_REQUEST_DURATION_SECONDS, - "status" => status, + labels: &labels, ) .record(self.start.elapsed().as_secs_f64()); histogram!( parent: &crate::metrics::ROOT_SEARCH_TARGETED_SPLITS, - "status" => status, + labels: &labels, ) .record(num_targeted_splits as f64); } @@ -120,19 +121,20 @@ where F: Future> { fn drop(self: Pin<&mut Self>) { let status = self.status.unwrap_or("cancelled"); + let labels = crate::metrics::STATUS_LABELS.with_values([status]); counter!( parent: &crate::metrics::LEAF_SEARCH_REQUESTS_TOTAL, - "status" => status, + labels: &labels, ) .increment(1); histogram!( parent: &crate::metrics::LEAF_SEARCH_REQUEST_DURATION_SECONDS, - "status" => status, + labels: &labels, ) .record(self.start.elapsed().as_secs_f64()); histogram!( parent: &crate::metrics::LEAF_SEARCH_TARGETED_SPLITS, - "status" => status, + labels: &labels, ) .record(self.targeted_splits as f64); } diff --git a/quickwit/quickwit-search/src/search_job_placer.rs b/quickwit/quickwit-search/src/search_job_placer.rs index f5993308d9f..8e60de3a6f8 100644 --- a/quickwit/quickwit-search/src/search_job_placer.rs +++ b/quickwit/quickwit-search/src/search_job_placer.rs @@ -218,11 +218,8 @@ impl SearchJobPlacer { 1 => "1", _ => "> 1", }; - counter!( - parent: &crate::metrics::JOB_ASSIGNED_TOTAL, - "affinity" => metric_node_idx, - ) - .increment(1); + let labels = crate::metrics::AFFINITY_LABELS.with_values([metric_node_idx]); + counter!(parent: &crate::metrics::JOB_ASSIGNED_TOTAL, labels: &labels).increment(1); chosen_node.load += job.cost(); job_assignments diff --git a/quickwit/quickwit-serve/src/load_shield.rs b/quickwit/quickwit-serve/src/load_shield.rs index 52b14cd4c7f..09008216849 100644 --- a/quickwit/quickwit-serve/src/load_shield.rs +++ b/quickwit/quickwit-serve/src/load_shield.rs @@ -43,14 +43,9 @@ impl LoadShield { quickwit_common::get_from_env_opt(&max_concurrency_env_key, false); let in_flight_semaphore_opt = max_in_flight_opt.map(Semaphore::new); let concurrency_semaphore_opt = max_concurrency_opt.map(Semaphore::new); - let pending_gauge = gauge!( - parent: &crate::metrics::PENDING_REQUESTS, - "endpoint_group" => endpoint_group, - ); - let ongoing_gauge = gauge!( - parent: &crate::metrics::ONGOING_REQUESTS, - "endpoint_group" => endpoint_group, - ); + let labels = crate::metrics::ENDPOINT_GROUP_LABELS.with_values([endpoint_group]); + let pending_gauge = gauge!(parent: &crate::metrics::PENDING_REQUESTS, labels: &labels); + let ongoing_gauge = gauge!(parent: &crate::metrics::ONGOING_REQUESTS, labels: &labels); LoadShield { in_flight_semaphore_opt, concurrency_semaphore_opt, diff --git a/quickwit/quickwit-serve/src/metrics.rs b/quickwit/quickwit-serve/src/metrics.rs index 79e8aac6861..798fbe56235 100644 --- a/quickwit/quickwit-serve/src/metrics.rs +++ b/quickwit/quickwit-serve/src/metrics.rs @@ -15,7 +15,10 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; -use quickwit_metrics::{Counter, Gauge, Histogram, counter, gauge, histogram}; +use quickwit_metrics::{Counter, Gauge, Histogram, Labels, counter, gauge, histogram}; + +pub(crate) const HTTP_REQUEST_LABELS: Labels<2> = Labels::new(["method", "status_code"]); +pub(crate) const ENDPOINT_GROUP_LABELS: Labels<1> = Labels::new(["endpoint_group"]); pub(crate) static HTTP_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index ed33cde48c6..6911671a2cd 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -140,18 +140,13 @@ pub(crate) async fn start_rest_server( let status = info.status(); let method = info.method().as_str().to_string(); let status_code = status.as_str().to_string(); + let labels = crate::metrics::HTTP_REQUEST_LABELS.with_values([method, status_code]); histogram!( parent: &crate::metrics::REQUEST_DURATION_SECS, - "method" => method.clone(), - "status_code" => status_code.clone(), + labels: &labels, ) .record(elapsed.as_secs_f64()); - counter!( - parent: &crate::metrics::HTTP_REQUESTS_TOTAL, - "method" => method, - "status_code" => status_code, - ) - .increment(1); + counter!(parent: &crate::metrics::HTTP_REQUESTS_TOTAL, labels: &labels).increment(1); }); // Docs routes let api_doc = warp::path("openapi.json") diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs index dff2a6f1a4e..25dacefe486 100644 --- a/quickwit/quickwit-storage/src/metrics.rs +++ b/quickwit/quickwit-storage/src/metrics.rs @@ -19,7 +19,7 @@ use std::collections::HashMap; use std::sync::{LazyLock, RwLock}; use quickwit_config::CacheConfig; -use quickwit_metrics::{Counter, Gauge, GaugeGuard, Histogram, counter, gauge, histogram}; +use quickwit_metrics::{Counter, Gauge, GaugeGuard, Histogram, Labels, counter, gauge, histogram}; static GET_SLICE_TIMEOUT_OUTCOME_TOTAL: LazyLock = LazyLock::new(|| { counter!( @@ -119,6 +119,8 @@ pub static OBJECT_STORAGE_GET_ERRORS_TOTAL: LazyLock = LazyLock::new(|| ) }); +pub(crate) const OBJECT_STORAGE_GET_ERROR_LABELS: Labels<1> = Labels::new(["code"]); + pub static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT: LazyLock = LazyLock::new(|| { gauge!( name: "object_storage_get_slice_in_flight_count", @@ -187,37 +189,18 @@ pub struct SingleCacheMetrics { impl CacheMetrics { pub fn for_component(component_name: &str) -> Self { + let component_name = component_name.to_string(); + let labels = CACHE_LABELS.with_values([component_name.clone()]); CacheMetrics { - component_name: component_name.to_string(), + component_name, cache_metrics: SingleCacheMetrics { - in_cache_count: gauge!( - parent: &*CACHE_IN_CACHE_COUNT, - "component_name" => component_name.to_string(), - ), - in_cache_num_bytes: gauge!( - parent: &*CACHE_IN_CACHE_NUM_BYTES, - "component_name" => component_name.to_string(), - ), - hits_num_items: counter!( - parent: &*CACHE_HITS_TOTAL, - "component_name" => component_name.to_string(), - ), - hits_num_bytes: counter!( - parent: &*CACHE_HITS_BYTES, - "component_name" => component_name.to_string(), - ), - misses_num_items: counter!( - parent: &*CACHE_MISSES_TOTAL, - "component_name" => component_name.to_string(), - ), - evict_num_items: counter!( - parent: &*CACHE_EVICT_TOTAL, - "component_name" => component_name.to_string(), - ), - evict_num_bytes: counter!( - parent: &*CACHE_EVICT_BYTES, - "component_name" => component_name.to_string(), - ), + in_cache_count: gauge!(parent: &*CACHE_IN_CACHE_COUNT, labels: &labels), + in_cache_num_bytes: gauge!(parent: &*CACHE_IN_CACHE_NUM_BYTES, labels: &labels), + hits_num_items: counter!(parent: &*CACHE_HITS_TOTAL, labels: &labels), + hits_num_bytes: counter!(parent: &*CACHE_HITS_BYTES, labels: &labels), + misses_num_items: counter!(parent: &*CACHE_MISSES_TOTAL, labels: &labels), + evict_num_items: counter!(parent: &*CACHE_EVICT_TOTAL, labels: &labels), + evict_num_bytes: counter!(parent: &*CACHE_EVICT_BYTES, labels: &labels), }, virtual_caches_metrics: RwLock::default(), } @@ -231,48 +214,36 @@ impl CacheMetrics { let capacity = config.capacity().as_u64().to_string(); let policy = config.policy().to_string(); + let labels = + VIRTUAL_CACHE_LABELS.with_values([self.component_name.clone(), capacity, policy]); let new_virtual_cache_metrics = SingleCacheMetrics { in_cache_count: gauge!( parent: &*VIRTUAL_CACHE_IN_CACHE_COUNT, - "component_name" => self.component_name.clone(), - "capacity" => capacity.clone(), - "policy" => policy.clone(), + labels: &labels, ), in_cache_num_bytes: gauge!( parent: &*VIRTUAL_CACHE_IN_CACHE_NUM_BYTES, - "component_name" => self.component_name.clone(), - "capacity" => capacity.clone(), - "policy" => policy.clone(), + labels: &labels, ), hits_num_items: counter!( parent: &*VIRTUAL_CACHE_HITS_TOTAL, - "component_name" => self.component_name.clone(), - "capacity" => capacity.clone(), - "policy" => policy.clone(), + labels: &labels, ), hits_num_bytes: counter!( parent: &*VIRTUAL_CACHE_HITS_BYTES, - "component_name" => self.component_name.clone(), - "capacity" => capacity.clone(), - "policy" => policy.clone(), + labels: &labels, ), misses_num_items: counter!( parent: &*VIRTUAL_CACHE_MISSES_TOTAL, - "component_name" => self.component_name.clone(), - "capacity" => capacity.clone(), - "policy" => policy.clone(), + labels: &labels, ), evict_num_items: counter!( parent: &*VIRTUAL_CACHE_EVICT_TOTAL, - "component_name" => self.component_name.clone(), - "capacity" => capacity.clone(), - "policy" => policy.clone(), + labels: &labels, ), evict_num_bytes: counter!( parent: &*VIRTUAL_CACHE_EVICT_BYTES, - "component_name" => self.component_name.clone(), - "capacity" => capacity, - "policy" => policy, + labels: &labels, ), }; @@ -285,6 +256,9 @@ impl CacheMetrics { } } +const CACHE_LABELS: Labels<1> = Labels::new(["component_name"]); +const VIRTUAL_CACHE_LABELS: Labels<3> = Labels::new(["component_name", "capacity", "policy"]); + static CACHE_IN_CACHE_COUNT: LazyLock = LazyLock::new(|| { gauge!( name: "in_cache_count", diff --git a/quickwit/quickwit-storage/src/object_storage/error.rs b/quickwit/quickwit-storage/src/object_storage/error.rs index eed6301d96b..24b1e3b118e 100644 --- a/quickwit/quickwit-storage/src/object_storage/error.rs +++ b/quickwit/quickwit-storage/src/object_storage/error.rs @@ -64,11 +64,8 @@ pub trait ToStorageErrorKind { impl ToStorageErrorKind for GetObjectError { fn to_storage_error_kind(&self) -> StorageErrorKind { let error_code = self.code().unwrap_or("unknown").to_string(); - counter!( - parent: &crate::OBJECT_STORAGE_GET_ERRORS_TOTAL, - "code" => error_code, - ) - .increment(1); + let labels = crate::metrics::OBJECT_STORAGE_GET_ERROR_LABELS.with_values([error_code]); + counter!(parent: &crate::OBJECT_STORAGE_GET_ERRORS_TOTAL, labels: &labels).increment(1); match self { GetObjectError::InvalidObjectState(_) => StorageErrorKind::Service, GetObjectError::NoSuchKey(_) => StorageErrorKind::NotFound, From 7a01598635b1fdb7436a652e4e621a9a54e05678 Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Sat, 2 May 2026 16:59:17 +0200 Subject: [PATCH 14/54] Fix CI after metrics migration rebase --- LICENSE-3rdparty.csv | 15 +++++++++++ quickwit/Cargo.lock | 1 - .../src/actors/merge_scheduler_service.rs | 25 ++++++------------- .../parquet_merge_pipeline.rs | 6 ++--- quickwit/quickwit-metrics/src/counter.rs | 2 +- quickwit/quickwit-metrics/src/gauge.rs | 2 +- quickwit/quickwit-metrics/src/histogram.rs | 2 +- quickwit/quickwit-parquet-engine/Cargo.toml | 1 - quickwit/quickwit-search/src/metrics.rs | 2 +- 9 files changed, 28 insertions(+), 28 deletions(-) diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 5c8b54949cd..cc149f03ae6 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -59,6 +59,7 @@ async-task,https://github.com/smol-rs/async-task,Apache-2.0 OR MIT,Stjepan Glavi async-trait,https://github.com/dtolnay/async-trait,MIT OR Apache-2.0,David Tolnay atoi,https://github.com/pacman82/atoi-rs,MIT,Markus Klein atomic-waker,https://github.com/smol-rs/atomic-waker,Apache-2.0 OR MIT,"Stjepan Glavina , Contributors to futures-rs" +atomic_float,https://github.com/thomcc/atomic_float,Apache-2.0 OR MIT OR Unlicense,Thom Chiovoloni aws-config,https://github.com/smithy-lang/smithy-rs,Apache-2.0,"AWS Rust SDK Team , Russell Cohen " aws-credential-types,https://github.com/smithy-lang/smithy-rs,Apache-2.0,AWS Rust SDK Team aws-lc-rs,https://github.com/aws/aws-lc-rs,ISC AND (Apache-2.0 OR ISC),AWS-LibCrypto @@ -179,6 +180,8 @@ const-oid,https://github.com/RustCrypto/formats/tree/master/const-oid,Apache-2.0 const-random,https://github.com/tkaitchuck/constrandom,MIT OR Apache-2.0,Tom Kaitchuck const-random-macro,https://github.com/tkaitchuck/constrandom,MIT OR Apache-2.0,Tom Kaitchuck const_fn,https://github.com/taiki-e/const_fn,Apache-2.0 OR MIT,The const_fn Authors +const_format,https://github.com/rodrimati1992/const_format_crates,Zlib,rodrimati1992 +const_format_proc_macros,https://github.com/rodrimati1992/const_format_crates,Zlib,rodrimati1992 constant_time_eq,https://github.com/cesarb/constant_time_eq,CC0-1.0 OR MIT-0 OR Apache-2.0,Cesar Eduardo Barros convert_case,https://github.com/rutrum/convert-case,MIT,rutrum core-foundation,https://github.com/servo/core-foundation-rs,MIT OR Apache-2.0,The Servo Project Developers @@ -190,6 +193,7 @@ crc-catalog,https://github.com/akhilles/crc-catalog,MIT OR Apache-2.0,Akhil Vela crc-fast,https://github.com/awesomized/crc-fast-rust,MIT OR Apache-2.0,Don MacAskill crc32fast,https://github.com/srijs/rust-crc32fast,MIT OR Apache-2.0,"Sam Rijs , Alex Crichton " criterion-plot,https://github.com/criterion-rs/criterion.rs,Apache-2.0 OR MIT,"Jorge Aparicio , Brook Heisler " +critical-section,https://github.com/rust-embedded/critical-section,MIT OR Apache-2.0,The critical-section Authors cron,https://github.com/zslayton/cron,MIT OR Apache-2.0,Zack Slayton crossbeam-channel,https://github.com/crossbeam-rs/crossbeam,MIT OR Apache-2.0,The crossbeam-channel Authors crossbeam-deque,https://github.com/crossbeam-rs/crossbeam,MIT OR Apache-2.0,The crossbeam-deque Authors @@ -295,6 +299,7 @@ etcetera,https://github.com/lunacookies/etcetera,MIT OR Apache-2.0,The etcetera event-listener,https://github.com/smol-rs/event-listener,Apache-2.0 OR MIT,Stjepan Glavina event-listener,https://github.com/smol-rs/event-listener,Apache-2.0 OR MIT,"Stjepan Glavina , John Nunley " event-listener-strategy,https://github.com/smol-rs/event-listener-strategy,Apache-2.0 OR MIT,John Nunley +evmap,https://github.com/jonhoo/evmap,MIT OR Apache-2.0,Jon Gjengset fail,https://github.com/tikv/fail-rs,Apache-2.0,The TiKV Project Developers fancy-regex,https://github.com/fancy-regex/fancy-regex,MIT,"Raph Levien , Robin Stocker , Keith Hall " fastdivide,https://github.com/fulmicoton/fastdivide,zlib-acknowledgement OR MIT,Paul Masurel @@ -330,6 +335,7 @@ futures-sink,https://github.com/rust-lang/futures-rs,MIT OR Apache-2.0,The futur futures-task,https://github.com/rust-lang/futures-rs,MIT OR Apache-2.0,The futures-task Authors futures-timer,https://github.com/async-rs/futures-timer,MIT OR Apache-2.0,Alex Crichton futures-util,https://github.com/rust-lang/futures-rs,MIT OR Apache-2.0,The futures-util Authors +generator,https://github.com/Xudong-Huang/generator-rs,MIT OR Apache-2.0,Xudong Huang generic-array,https://github.com/fizyk20/generic-array,MIT,"Bartłomiej Kamiński , Aaron Trent " getrandom,https://github.com/rust-random/getrandom,MIT OR Apache-2.0,The Rand Project Developers gimli,https://github.com/gimli-rs/gimli,MIT OR Apache-2.0,The gimli Authors @@ -346,6 +352,7 @@ grok,https://github.com/mmastrac/grok,Apache-2.0,"Matt Mastracci , Jack Grigg " h2,https://github.com/hyperium/h2,MIT,"Carl Lerche , Sean McArthur " half,https://github.com/VoidStarKat/half-rs,MIT OR Apache-2.0,Kathryn Long +hashbag,https://github.com/jonhoo/hashbag,MIT OR Apache-2.0,Jon Gjengset hashbrown,https://github.com/rust-lang/hashbrown,MIT OR Apache-2.0,Amanieu d'Antras hashlink,https://github.com/kyren/hashlink,MIT OR Apache-2.0,kyren hdrhistogram,https://github.com/HdrHistogram/HdrHistogram_rust,MIT OR Apache-2.0,"Jon Gjengset , Marshall Pierce " @@ -418,12 +425,15 @@ json_comments,https://github.com/tmccombs/json-comments-rs,Apache-2.0,Thayne McC jsonschema,https://github.com/Stranger6667/jsonschema,MIT,Dmitry Dygalo jsonwebtoken,https://github.com/Keats/jsonwebtoken,MIT,Vincent Prouillet keccak,https://github.com/RustCrypto/sponges/tree/master/keccak,Apache-2.0 OR MIT,RustCrypto Developers +konst,https://github.com/rodrimati1992/konst,Zlib,rodrimati1992 +konst_macro_rules,https://github.com/rodrimati1992/konst,Zlib,rodrimati1992 krb5-src,https://github.com/MaterializeInc/rust-krb5-src,Apache-2.0,"Materialize, Inc." lalrpop-util,https://github.com/lalrpop/lalrpop,Apache-2.0 OR MIT,Niko Matsakis lambda_runtime,https://github.com/awslabs/aws-lambda-rust-runtime,Apache-2.0,"David Calavera , Harold Sun " lambda_runtime_api_client,https://github.com/awslabs/aws-lambda-rust-runtime,Apache-2.0,"David Calavera , Harold Sun " lazy_static,https://github.com/rust-lang-nursery/lazy-static.rs,MIT OR Apache-2.0,Marvin Löbel leb128fmt,https://github.com/bluk/leb128fmt,MIT OR Apache-2.0,Bryant Luk +left-right,https://github.com/jonhoo/left-right,MIT OR Apache-2.0,Jon Gjengset levenshtein_automata,https://github.com/tantivy-search/levenshtein-automata,MIT,Paul Masurel lexical-core,https://github.com/Alexhuszagh/rust-lexical,MIT OR Apache-2.0,Alex Huszagh lexical-parse-float,https://github.com/Alexhuszagh/rust-lexical,MIT OR Apache-2.0,Alex Huszagh @@ -445,6 +455,7 @@ linux-raw-sys,https://github.com/sunfishcode/linux-raw-sys,Apache-2.0 WITH LLVM- litemap,https://github.com/unicode-org/icu4x,Unicode-3.0,The ICU4X Project Developers lock_api,https://github.com/Amanieu/parking_lot,MIT OR Apache-2.0,Amanieu d'Antras log,https://github.com/rust-lang/log,MIT OR Apache-2.0,The Rust Project Developers +loom,https://github.com/tokio-rs/loom,MIT,Carl Lerche lru,https://github.com/jeromefroe/lru-rs,MIT,Jerome Froelich lru-slab,https://github.com/Ralith/lru-slab,MIT OR Apache-2.0 OR Zlib,Benjamin Saunders lz4,https://github.com/10xGenomics/lz4-rs,MIT,"Jens Heyens , Artem V. Navrotskiy , Patrick Marks " @@ -459,6 +470,8 @@ memchr,https://github.com/BurntSushi/memchr,Unlicense OR MIT,"Andrew Gallant , Yevhenii Reizner , The Contributors" metrics,https://github.com/metrics-rs/metrics,MIT,Toby Lawrence metrics-exporter-dogstatsd,https://github.com/metrics-rs/metrics,MIT,Toby Lawrence +metrics-exporter-otel,https://github.com/palindrom615/metrics,MIT,Whoemoon Jang +metrics-exporter-prometheus,https://github.com/metrics-rs/metrics,MIT AND Apache-2.0,Toby Lawrence metrics-util,https://github.com/metrics-rs/metrics,MIT,Toby Lawrence mime,https://github.com/hyperium/mime,MIT OR Apache-2.0,Sean McArthur mime_guess,https://github.com/abonander/mime_guess,MIT,Austin Bonander @@ -648,6 +661,7 @@ rand_core,https://github.com/rust-random/rand_core,MIT OR Apache-2.0,The Rand Pr rand_hc,https://github.com/rust-random/rand,MIT OR Apache-2.0,The Rand Project Developers rand_xorshift,https://github.com/rust-random/rngs,MIT OR Apache-2.0,"The Rand Project Developers, The Rust Project Developers" rand_xoshiro,https://github.com/rust-random/rngs,MIT OR Apache-2.0,The Rand Project Developers +rapidhash,https://github.com/hoxxep/rapidhash,MIT OR Apache-2.0,Liam Gray raw-cpuid,https://github.com/gz/rust-cpuid,MIT,Gerd Zellweger rayon,https://github.com/rayon-rs/rayon,MIT OR Apache-2.0,The rayon Authors rayon-core,https://github.com/rayon-rs/rayon,MIT OR Apache-2.0,The rayon-core Authors @@ -712,6 +726,7 @@ seahash,https://gitlab.redox-os.org/redox-os/seahash,MIT,"ticki , Kornel " security-framework-sys,https://github.com/kornelski/rust-security-framework,MIT OR Apache-2.0,"Steven Fackler , Kornel " +seize,https://github.com/ibraheemdev/seize,MIT,Ibraheem Ahmed semver,https://github.com/dtolnay/semver,MIT OR Apache-2.0,David Tolnay separator,https://github.com/saghm/rust-separator,MIT,Saghm Rossi seq-macro,https://github.com/dtolnay/seq-macro,MIT OR Apache-2.0,David Tolnay diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 388066efa22..8b12b0809c3 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -9109,7 +9109,6 @@ dependencies = [ "parquet", "proptest", "prost 0.14.3", - "quickwit-common", "quickwit-dst", "quickwit-metrics", "quickwit-proto", diff --git a/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs b/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs index 477d4f44027..fc8bfff5567 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs @@ -269,15 +269,10 @@ impl MergeSchedulerService { merge_permit, }; self.pending_merge_bytes -= parquet_merge_task.merge_operation.total_size_bytes(); - crate::metrics::INDEXER_METRICS - .pending_merge_operations - .set( - self.pending_merge_queue.len() as i64 - + self.pending_parquet_merge_queue.len() as i64, - ); - crate::metrics::INDEXER_METRICS - .pending_merge_bytes - .set(self.pending_merge_bytes as i64); + crate::metrics::PENDING_MERGE_OPERATIONS.set( + (self.pending_merge_queue.len() + self.pending_parquet_merge_queue.len()) as f64, + ); + crate::metrics::PENDING_MERGE_BYTES.set(self.pending_merge_bytes as f64); match split_downloader_mailbox.try_send_message(parquet_merge_task) { Ok(_) => {} Err(quickwit_actors::TrySendError::Full(_)) => { @@ -457,15 +452,9 @@ impl Handler for MergeSchedulerService { }; self.pending_merge_bytes += scheduled.merge_operation.total_size_bytes(); self.pending_parquet_merge_queue.push(scheduled); - crate::metrics::INDEXER_METRICS - .pending_merge_operations - .set( - self.pending_merge_queue.len() as i64 - + self.pending_parquet_merge_queue.len() as i64, - ); - crate::metrics::INDEXER_METRICS - .pending_merge_bytes - .set(self.pending_merge_bytes as i64); + crate::metrics::PENDING_MERGE_OPERATIONS + .set((self.pending_merge_queue.len() + self.pending_parquet_merge_queue.len()) as f64); + crate::metrics::PENDING_MERGE_BYTES.set(self.pending_merge_bytes as f64); self.schedule_pending_merges(ctx); Ok(()) } diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_merge_pipeline.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_merge_pipeline.rs index d1347f2d4bf..7ce7b312d17 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_merge_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_merge_pipeline.rs @@ -367,9 +367,7 @@ impl ParquetMergePipeline { handles.merge_planner.refresh_observe(); handles.merge_uploader.refresh_observe(); handles.merge_publisher.refresh_observe(); - let num_ongoing_merges = crate::metrics::INDEXER_METRICS - .ongoing_merge_operations - .get(); + let num_ongoing_merges = crate::metrics::ONGOING_MERGE_OPERATIONS.get(); self.statistics = self .previous_generations_statistics .clone() @@ -379,7 +377,7 @@ impl ParquetMergePipeline { ) .set_generation(self.statistics.generation) .set_num_spawn_attempts(self.statistics.num_spawn_attempts) - .set_ongoing_merges(usize::try_from(num_ongoing_merges).unwrap_or(0)); + .set_ongoing_merges(num_ongoing_merges.max(0.0) as usize); } async fn perform_health_check( diff --git a/quickwit/quickwit-metrics/src/counter.rs b/quickwit/quickwit-metrics/src/counter.rs index e56e978c061..a35130ec953 100644 --- a/quickwit/quickwit-metrics/src/counter.rs +++ b/quickwit/quickwit-metrics/src/counter.rs @@ -101,7 +101,7 @@ impl CounterInner { /// A registered counter metric backed by [`metrics::Counter`]. /// -/// Created via the [`counter!`] macro, either as a base declaration with +/// Created via the `counter!` macro, either as a base declaration with /// static labels or as a child that extends a parent's labels at runtime. /// /// Counters are **monotonically increasing** — use [`increment`](Self::increment) diff --git a/quickwit/quickwit-metrics/src/gauge.rs b/quickwit/quickwit-metrics/src/gauge.rs index c9eb69f3430..3c55c06b4e4 100644 --- a/quickwit/quickwit-metrics/src/gauge.rs +++ b/quickwit/quickwit-metrics/src/gauge.rs @@ -96,7 +96,7 @@ impl GaugeInner { /// A registered gauge metric backed by [`metrics::Gauge`]. /// -/// Created via the [`gauge!`] macro, either as a base declaration with +/// Created via the `gauge!` macro, either as a base declaration with /// static labels or as a child that extends a parent's labels at runtime. /// /// Unlike counters, gauges can go **up and down** — they represent a diff --git a/quickwit/quickwit-metrics/src/histogram.rs b/quickwit/quickwit-metrics/src/histogram.rs index b904d7522f0..4f7fd13ec2b 100644 --- a/quickwit/quickwit-metrics/src/histogram.rs +++ b/quickwit/quickwit-metrics/src/histogram.rs @@ -107,7 +107,7 @@ impl HistogramInner { /// A registered histogram metric backed by [`metrics::Histogram`]. /// -/// Created via the [`histogram!`] macro, either as a base declaration with +/// Created via the `histogram!` macro, either as a base declaration with /// static labels or as a child that extends a parent's labels at runtime. /// /// Histograms do not support the `observable` flag — they have no shadow diff --git a/quickwit/quickwit-parquet-engine/Cargo.toml b/quickwit/quickwit-parquet-engine/Cargo.toml index b692d479825..83282a2073e 100644 --- a/quickwit/quickwit-parquet-engine/Cargo.toml +++ b/quickwit/quickwit-parquet-engine/Cargo.toml @@ -17,7 +17,6 @@ base64 = { workspace = true } chrono = { workspace = true } parquet = { workspace = true } prost = { workspace = true } -quickwit-common = { workspace = true } quickwit-metrics = { workspace = true } quickwit-dst = { workspace = true } quickwit-proto = { workspace = true } diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index f35a323d313..dc889cc342c 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -106,7 +106,7 @@ impl fmt::Display for SplitSearchOutcomeCounters { impl SplitSearchOutcomeCounters { /// Create a new SplitSearchOutcomeCounters instance, registered in prometheus. pub fn new_registered() -> Self { - Self::new_registered_from_counter(&*SPLIT_SEARCH_OUTCOME) + Self::new_registered_from_counter(&SPLIT_SEARCH_OUTCOME) } /// Create a new SplitSearchOutcomeCounters instance that is not reported. From effdc65185e81bbe98fbaf980b0f348c5c7a9b01 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Mon, 4 May 2026 08:46:36 +0200 Subject: [PATCH 15/54] Replace XOR with wrapping add in cache-key hash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XOR is self-inverse so duplicate labels cancel each other out (a ^ a == 0). Wrapping addition (mod 2^64) is still commutative and associative — preserving order-independence and incremental composability — but distinct label sets now always produce distinct hashes. Co-authored-by: Cursor --- quickwit/quickwit-metrics/src/inner.rs | 43 +++++++++++++++++++++----- quickwit/quickwit-metrics/src/lib.rs | 10 +++--- 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/quickwit/quickwit-metrics/src/inner.rs b/quickwit/quickwit-metrics/src/inner.rs index 2c41ab87248..a269e8c3ec7 100644 --- a/quickwit/quickwit-metrics/src/inner.rs +++ b/quickwit/quickwit-metrics/src/inner.rs @@ -100,23 +100,37 @@ macro_rules! __key_info_metadata { // ─── Cache-key hashing ─── // -// The hash is **order-independent** and **composable**, so -// `hash(seed, [A,B,C])` equals `hash(hash(seed, [A,B]), [C])` — this is -// what makes the `parent:` extension pattern work correctly. +// We need a hash that can be computed **incrementally**: the final hash +// must equal the hash of the metric name combined with the hashes of all +// labels, regardless of the order they are declared or added. This is +// critical for the `parent:` extension pattern, where a child metric +// inherits its parent's hash and folds in only the new labels — the +// result must be identical to hashing everything from scratch. +// +// This requires the combining operator to be **commutative** +// (order-independent) and **associative** (composable): +// `combine(combine(seed, A), B) == combine(seed, combine(A, B))` +// +// We use wrapping addition (mod 2^64) rather than XOR because XOR is +// self-inverse — duplicate labels would cancel each other out +// (`a ^ a == 0`). -/// XOR-folds per-label hashes into `seed`, yielding an order-independent, -/// composable cache key: `hash(seed, [A,B]) == hash(hash(seed, [A]), [B])`. +/// Folds per-label hashes into `seed` via wrapping addition (mod 2^64). +/// +/// The combining operator (wrapping add) is both **commutative** and +/// **associative**, which guarantees order-independence and composability: +/// `hash(seed, [A,B]) == hash(hash(seed, [A]), [B])`. #[doc(hidden)] #[inline] pub fn __key_hash<'a>(seed: u64, labels: impl IntoIterator) -> u64 { - let mut xor = seed; + let mut acc = seed; for (name, value) in labels { let mut h = FxHasher::default(); name.hash(&mut h); value.hash(&mut h); - xor ^= h.finish(); + acc = acc.wrapping_add(h.finish()); } - xor + acc } /// Convenience macro that coerces label name/value expressions into `&str` @@ -310,5 +324,18 @@ mod tests { fn empty_labels_returns_seed(seed: u64) { prop_assert_eq!(__key_hash(seed, std::iter::empty()), seed); } + + #[test] + fn duplicate_labels_do_not_cancel( + seed: u64, + name in "[a-z]{1,8}", + value in "[a-z0-9]{1,16}", + ) { + let one = __key_hash(seed, [(&*name, &*value)]); + let two = __key_hash(seed, [(&*name, &*value), (&*name, &*value)]); + prop_assert_ne!(one, seed, "single label must change the seed"); + prop_assert_ne!(two, seed, "two identical labels must not cancel back to seed"); + prop_assert_ne!(one, two, "one vs two identical labels must produce different hashes"); + } } } diff --git a/quickwit/quickwit-metrics/src/lib.rs b/quickwit/quickwit-metrics/src/lib.rs index 651aa9ff71b..98c3177d9e2 100644 --- a/quickwit/quickwit-metrics/src/lib.rs +++ b/quickwit/quickwit-metrics/src/lib.rs @@ -260,11 +260,11 @@ //! exists, it `Arc::clone`s and populates L1. Only on a full L2 miss does //! it construct the `metrics::Key`, register with the recorder, and insert. //! -//! The hash is **order-independent** and **composable** via XOR-folding of -//! per-label `FxHasher` outputs. This means -//! `hash(parent, [A,B]) == hash(hash(parent, [A]), [B])`, which is what -//! makes the `parent:` extension pattern work correctly without rehashing -//! all labels. +//! Per-label hashes (via `FxHasher`) are combined with wrapping addition +//! (mod 2^64), which is both **commutative** (order-independent) and +//! **associative** (composable): `hash(parent, [A,B]) == +//! hash(hash(parent, [A]), [B])`. This is what makes the `parent:` +//! extension pattern work without rehashing all labels. #![deny(clippy::disallowed_methods)] From c4df03a2fb0bdaac408b04adaaedea01456efe7a Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Mon, 4 May 2026 10:39:16 +0200 Subject: [PATCH 16/54] Factor maybe registered counters --- quickwit/quickwit-common/src/io.rs | 52 +++----------- quickwit/quickwit-common/src/metrics.rs | 81 ++++++++++++++++++++- quickwit/quickwit-search/src/metrics.rs | 96 +++++++------------------ 3 files changed, 116 insertions(+), 113 deletions(-) diff --git a/quickwit/quickwit-common/src/io.rs b/quickwit/quickwit-common/src/io.rs index 240f77c4203..c16d9d4d1a1 100644 --- a/quickwit/quickwit-common/src/io.rs +++ b/quickwit/quickwit-common/src/io.rs @@ -37,6 +37,7 @@ use pin_project::pin_project; use quickwit_metrics::{Counter, Labels, counter}; use tokio::io::AsyncWrite; +use crate::metrics::MaybeRegisteredCounter; use crate::{KillSwitch, Progress, ProtectedZoneGuard}; // Max 1MB at a time. @@ -81,7 +82,7 @@ pub fn limiter(throughput: ByteSize) -> Limiter { #[derive(Clone)] pub struct IoControls { throughput_limiter_opt: Option, - bytes_counter: Counter, + bytes_counter: MaybeRegisteredCounter, progress: Progress, kill_switch: KillSwitch, } @@ -92,7 +93,7 @@ impl Default for IoControls { throughput_limiter_opt: None, progress: Progress::default(), kill_switch: KillSwitch::default(), - bytes_counter: DEFAULT_WRITE_BYTES.clone(), + bytes_counter: MaybeRegisteredCounter::default(), } } } @@ -121,7 +122,8 @@ impl IoControls { pub fn set_component(mut self, component: &str) -> Self { let labels = COMPONENT_LABELS.with_values([component.to_string()]); - self.bytes_counter = counter!(parent: &*WRITE_BYTES, labels: &labels); + self.bytes_counter = + MaybeRegisteredCounter::registered(counter!(parent: &*WRITE_BYTES, labels: &labels)); self } @@ -138,7 +140,7 @@ impl IoControls { } pub fn set_bytes_counter(mut self, bytes_counter: Counter) -> Self { - self.bytes_counter = bytes_counter; + self.bytes_counter = MaybeRegisteredCounter::registered(bytes_counter); self } @@ -161,15 +163,6 @@ impl IoControls { } } -static DEFAULT_WRITE_BYTES: LazyLock = LazyLock::new(|| { - counter!( - name: "default_write_num_bytes", - description: "Default write counter.", - subsystem: "", - observable: true, - ) -}); - #[pin_project] pub struct ControlledWrite { #[pin] @@ -341,7 +334,6 @@ mod tests { use std::time::Duration; use bytesize::ByteSize; - use quickwit_metrics::counter; use tokio::io::{AsyncWriteExt, sink}; use tokio::time::Instant; @@ -349,14 +341,7 @@ mod tests { #[tokio::test] async fn test_controlled_writer_limited_async() { - let io_controls = IoControls::default() - .set_bytes_counter(counter!( - name: "test_controlled_writer_limited_async_num_bytes", - description: "Test bytes counter.", - subsystem: "", - observable: true, - )) - .set_throughput_limit(ByteSize::mb(2)); + let io_controls = IoControls::default().set_throughput_limit(ByteSize::mb(2)); let mut controlled_write = io_controls.clone().wrap_write(sink()); let buf = vec![44u8; 1_000]; let start = Instant::now(); @@ -373,12 +358,7 @@ mod tests { #[tokio::test] async fn test_controlled_writer_no_limit_async() { - let io_controls = IoControls::default().set_bytes_counter(counter!( - name: "test_controlled_writer_no_limit_async_num_bytes", - description: "Test bytes counter.", - subsystem: "", - observable: true, - )); + let io_controls = IoControls::default(); let mut controlled_write = io_controls.clone().wrap_write(sink()); let buf = vec![44u8; 1_000]; let start = Instant::now(); @@ -394,14 +374,7 @@ mod tests { #[test] fn test_controlled_writer_limited_sync() { - let io_controls = IoControls::default() - .set_bytes_counter(counter!( - name: "test_controlled_writer_limited_sync_num_bytes", - description: "Test bytes counter.", - subsystem: "", - observable: true, - )) - .set_throughput_limit(ByteSize::mb(2)); + let io_controls = IoControls::default().set_throughput_limit(ByteSize::mb(2)); let mut controlled_write = io_controls.clone().wrap_write(std::io::sink()); let buf = vec![44u8; 1_000]; let start = Instant::now(); @@ -418,12 +391,7 @@ mod tests { #[test] fn test_controlled_writer_no_limit_sync() { - let io_controls = IoControls::default().set_bytes_counter(counter!( - name: "test_controlled_writer_no_limit_sync_num_bytes", - description: "Test bytes counter.", - subsystem: "", - observable: true, - )); + let io_controls = IoControls::default(); let mut controlled_write = io_controls.clone().wrap_write(std::io::sink()); let buf = vec![44u8; 1_000]; let start = Instant::now(); diff --git a/quickwit/quickwit-common/src/metrics.rs b/quickwit/quickwit-common/src/metrics.rs index 53a06a07dd1..0f44c49d40f 100644 --- a/quickwit/quickwit-common/src/metrics.rs +++ b/quickwit/quickwit-common/src/metrics.rs @@ -13,18 +13,66 @@ // limitations under the License. use std::collections::BTreeMap; -use std::sync::{LazyLock, OnceLock}; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::{Arc, LazyLock, OnceLock}; #[cfg(not(test))] use std::time::Duration; use metrics_exporter_prometheus::PrometheusHandle; pub use prometheus::{exponential_buckets, linear_buckets}; -use quickwit_metrics::{Gauge, Labels, gauge}; +use quickwit_metrics::{Counter, Gauge, Labels, gauge}; const SYSTEM: &str = "quickwit"; static PROMETHEUS_HANDLE: OnceLock = OnceLock::new(); +#[derive(Clone)] +pub struct MaybeRegisteredCounter { + inner: MaybeRegisteredCounterInner, +} + +#[derive(Clone)] +enum MaybeRegisteredCounterInner { + Local(Arc), + Registered(Counter), +} + +impl Default for MaybeRegisteredCounter { + fn default() -> Self { + Self::local() + } +} + +impl MaybeRegisteredCounter { + pub fn local() -> Self { + Self { + inner: MaybeRegisteredCounterInner::Local(Arc::new(AtomicU64::new(0))), + } + } + + pub fn registered(counter: Counter) -> Self { + Self { + inner: MaybeRegisteredCounterInner::Registered(counter), + } + } + + pub fn increment(&self, value: u64) { + match &self.inner { + MaybeRegisteredCounterInner::Local(counter) => { + counter.fetch_add(value, Ordering::Relaxed); + } + MaybeRegisteredCounterInner::Registered(counter) => counter.increment(value), + } + } + + pub fn get(&self) -> u64 { + match &self.inner { + MaybeRegisteredCounterInner::Local(counter) => counter.load(Ordering::Relaxed), + MaybeRegisteredCounterInner::Registered(counter) => counter.get(), + } + } +} + pub fn set_prometheus_handle(handle: PrometheusHandle) -> Result<(), String> { #[cfg(not(test))] let upkeep_handle = handle.clone(); @@ -187,9 +235,38 @@ mod tests { use metrics::with_local_recorder; use metrics_exporter_prometheus::PrometheusBuilder; use metrics_util::debugging::{DebugValue, DebuggingRecorder}; + use quickwit_metrics::counter; use super::*; + #[test] + fn maybe_registered_counter_counts_locally() { + let counter = MaybeRegisteredCounter::local(); + let counter_clone = counter.clone(); + + counter.increment(3); + counter_clone.increment(4); + + assert_eq!(counter.get(), 7); + assert_eq!(counter_clone.get(), 7); + } + + #[test] + fn maybe_registered_counter_wraps_registered_counter() { + let registered_counter = counter!( + name: "maybe_registered_counter_test", + description: "Maybe registered counter test.", + subsystem: "", + observable: true, + ); + let counter = MaybeRegisteredCounter::registered(registered_counter.clone()); + + counter.increment(5); + + assert_eq!(counter.get(), 5); + assert_eq!(registered_counter.get(), 5); + } + #[test] fn metrics_text_payload_renders_prometheus_handle() { let recorder = PrometheusBuilder::new().build_recorder(); diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index dc889cc342c..2dc1bb5b988 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -15,11 +15,10 @@ // See https://prometheus.io/docs/practices/naming/ use std::fmt; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::{Arc, LazyLock}; +use std::sync::LazyLock; use bytesize::ByteSize; -use quickwit_common::metrics::{exponential_buckets, linear_buckets}; +use quickwit_common::metrics::{MaybeRegisteredCounter, exponential_buckets, linear_buckets}; use quickwit_metrics::{Counter, Gauge, Histogram, Labels, counter, gauge, histogram}; pub(crate) const STATUS_LABELS: Labels<1> = Labels::new(["status"]); @@ -27,7 +26,7 @@ pub(crate) const AFFINITY_LABELS: Labels<1> = Labels::new(["affinity"]); fn print_if_not_null( field_name: &'static str, - counter: &SplitSearchOutcomeCounter, + counter: &MaybeRegisteredCounter, f: &mut fmt::Formatter, ) -> fmt::Result { let val = counter.get(); @@ -37,56 +36,15 @@ fn print_if_not_null( Ok(()) } -#[derive(Clone)] -pub struct SplitSearchOutcomeCounter { - inner: SplitSearchOutcomeCounterInner, -} - -#[derive(Clone)] -enum SplitSearchOutcomeCounterInner { - Registered(Counter), - Local(Arc), -} - -impl SplitSearchOutcomeCounter { - fn registered(counter: Counter) -> Self { - Self { - inner: SplitSearchOutcomeCounterInner::Registered(counter), - } - } - - fn local() -> Self { - Self { - inner: SplitSearchOutcomeCounterInner::Local(Arc::new(AtomicU64::new(0))), - } - } - - pub fn increment(&self, value: u64) { - match &self.inner { - SplitSearchOutcomeCounterInner::Registered(counter) => counter.increment(value), - SplitSearchOutcomeCounterInner::Local(value_ref) => { - value_ref.fetch_add(value, Ordering::Relaxed); - } - } - } - - pub fn get(&self) -> u64 { - match &self.inner { - SplitSearchOutcomeCounterInner::Registered(counter) => counter.get(), - SplitSearchOutcomeCounterInner::Local(value_ref) => value_ref.load(Ordering::Relaxed), - } - } -} - pub struct SplitSearchOutcomeCounters { - pub cancel_before_warmup: SplitSearchOutcomeCounter, - pub cache_hit: SplitSearchOutcomeCounter, - pub pruned_before_warmup: SplitSearchOutcomeCounter, - pub cancel_warmup: SplitSearchOutcomeCounter, - pub pruned_after_warmup: SplitSearchOutcomeCounter, - pub cancel_cpu_queue: SplitSearchOutcomeCounter, - pub cancel_cpu: SplitSearchOutcomeCounter, - pub success: SplitSearchOutcomeCounter, + pub cancel_before_warmup: MaybeRegisteredCounter, + pub cache_hit: MaybeRegisteredCounter, + pub pruned_before_warmup: MaybeRegisteredCounter, + pub cancel_warmup: MaybeRegisteredCounter, + pub pruned_after_warmup: MaybeRegisteredCounter, + pub cancel_cpu_queue: MaybeRegisteredCounter, + pub cancel_cpu: MaybeRegisteredCounter, + pub success: MaybeRegisteredCounter, } impl fmt::Display for SplitSearchOutcomeCounters { @@ -112,48 +70,48 @@ impl SplitSearchOutcomeCounters { /// Create a new SplitSearchOutcomeCounters instance that is not reported. pub fn new_unregistered() -> Self { SplitSearchOutcomeCounters { - cancel_before_warmup: SplitSearchOutcomeCounter::local(), - cache_hit: SplitSearchOutcomeCounter::local(), - pruned_before_warmup: SplitSearchOutcomeCounter::local(), - cancel_warmup: SplitSearchOutcomeCounter::local(), - pruned_after_warmup: SplitSearchOutcomeCounter::local(), - cancel_cpu_queue: SplitSearchOutcomeCounter::local(), - cancel_cpu: SplitSearchOutcomeCounter::local(), - success: SplitSearchOutcomeCounter::local(), + cancel_before_warmup: MaybeRegisteredCounter::local(), + cache_hit: MaybeRegisteredCounter::local(), + pruned_before_warmup: MaybeRegisteredCounter::local(), + cancel_warmup: MaybeRegisteredCounter::local(), + pruned_after_warmup: MaybeRegisteredCounter::local(), + cancel_cpu_queue: MaybeRegisteredCounter::local(), + cancel_cpu: MaybeRegisteredCounter::local(), + success: MaybeRegisteredCounter::local(), } } fn new_registered_from_counter(search_split_outcome: &Counter) -> Self { SplitSearchOutcomeCounters { - cancel_before_warmup: SplitSearchOutcomeCounter::registered(counter!( + cancel_before_warmup: MaybeRegisteredCounter::registered(counter!( parent: search_split_outcome, "category" => "cancel_before_warmup", )), - cache_hit: SplitSearchOutcomeCounter::registered(counter!( + cache_hit: MaybeRegisteredCounter::registered(counter!( parent: search_split_outcome, "category" => "cache_hit", )), - pruned_before_warmup: SplitSearchOutcomeCounter::registered(counter!( + pruned_before_warmup: MaybeRegisteredCounter::registered(counter!( parent: search_split_outcome, "category" => "pruned_before_warmup", )), - cancel_warmup: SplitSearchOutcomeCounter::registered(counter!( + cancel_warmup: MaybeRegisteredCounter::registered(counter!( parent: search_split_outcome, "category" => "cancel_warmup", )), - pruned_after_warmup: SplitSearchOutcomeCounter::registered(counter!( + pruned_after_warmup: MaybeRegisteredCounter::registered(counter!( parent: search_split_outcome, "category" => "pruned_after_warmup", )), - cancel_cpu_queue: SplitSearchOutcomeCounter::registered(counter!( + cancel_cpu_queue: MaybeRegisteredCounter::registered(counter!( parent: search_split_outcome, "category" => "cancel_cpu_queue", )), - cancel_cpu: SplitSearchOutcomeCounter::registered(counter!( + cancel_cpu: MaybeRegisteredCounter::registered(counter!( parent: search_split_outcome, "category" => "cancel_cpu", )), - success: SplitSearchOutcomeCounter::registered(counter!( + success: MaybeRegisteredCounter::registered(counter!( parent: search_split_outcome, "category" => "success", )), From 068d85d6246f681e6d0106fe3ccb3e9a7ed64d5c Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Mon, 4 May 2026 10:47:45 +0200 Subject: [PATCH 17/54] Simplify info metric key formatting --- quickwit/quickwit-common/src/metrics.rs | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/quickwit/quickwit-common/src/metrics.rs b/quickwit/quickwit-common/src/metrics.rs index 0f44c49d40f..54f1b14bfac 100644 --- a/quickwit/quickwit-common/src/metrics.rs +++ b/quickwit/quickwit-common/src/metrics.rs @@ -22,8 +22,6 @@ use metrics_exporter_prometheus::PrometheusHandle; pub use prometheus::{exponential_buckets, linear_buckets}; use quickwit_metrics::{Counter, Gauge, Labels, gauge}; -const SYSTEM: &str = "quickwit"; - static PROMETHEUS_HANDLE: OnceLock = OnceLock::new(); #[derive(Clone)] @@ -109,7 +107,7 @@ fn spawn_prometheus_upkeep(handle: PrometheusHandle) -> Result<(), String> { } pub fn register_info(name: &'static str, help: &'static str, kvs: BTreeMap<&'static str, String>) { - let key_name = metric_key_name("", name); + let key_name = format!("quickwit_{name}"); let labels = kvs .into_iter() .map(|(label, value)| metrics::Label::new(label, value)) @@ -222,14 +220,6 @@ fn in_flight_data_gauge(component: &'static str) -> Gauge { gauge!(parent: &*IN_FLIGHT_DATA_BYTES, labels: &labels) } -fn metric_key_name(subsystem: &str, name: &str) -> String { - if subsystem.is_empty() { - format!("{SYSTEM}_{name}") - } else { - format!("{SYSTEM}_{subsystem}_{name}") - } -} - #[cfg(test)] mod tests { use metrics::with_local_recorder; From 7566a5a535a8a60647ae1ae36782656d774399e7 Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Mon, 4 May 2026 10:57:06 +0200 Subject: [PATCH 18/54] Make GaugeGuard delta atomic --- quickwit/quickwit-actors/src/mailbox.rs | 2 +- quickwit/quickwit-common/src/stream_utils.rs | 2 +- quickwit/quickwit-common/src/thread_pool.rs | 4 ++-- .../quickwit-indexing/src/actors/indexer.rs | 4 ++-- .../src/actors/indexing_pipeline.rs | 2 +- .../src/actors/metrics_pipeline/pipeline.rs | 2 +- .../processed_parquet_batch.rs | 2 +- .../src/models/processed_doc.rs | 2 +- .../src/models/raw_doc_batch.rs | 2 +- .../src/metastore/postgres/pool.rs | 2 +- .../quickwit-metrics/examples/http_service.rs | 4 ++-- quickwit/quickwit-metrics/src/gauge.rs | 18 +++++++------- quickwit/quickwit-metrics/src/lib.rs | 6 ++--- quickwit/quickwit-metrics/tests/gauge.rs | 24 +++++++++---------- quickwit/quickwit-serve/src/load_shield.rs | 4 ++-- quickwit/quickwit-storage/src/metrics.rs | 4 ++-- 16 files changed, 42 insertions(+), 42 deletions(-) diff --git a/quickwit/quickwit-actors/src/mailbox.rs b/quickwit/quickwit-actors/src/mailbox.rs index 33e542d391f..cbbf6048f5d 100644 --- a/quickwit/quickwit-actors/src/mailbox.rs +++ b/quickwit/quickwit-actors/src/mailbox.rs @@ -394,7 +394,7 @@ impl Inbox { } fn get_actor_inboxes_count_gauge_guard() -> GaugeGuard { - let mut gauge_guard = GaugeGuard::from_gauge(&INBOX_GAUGE); + let gauge_guard = GaugeGuard::from_gauge(&INBOX_GAUGE); gauge_guard.increment(1.0); gauge_guard } diff --git a/quickwit/quickwit-common/src/stream_utils.rs b/quickwit/quickwit-common/src/stream_utils.rs index 9c955388d1c..c1fe28ccec7 100644 --- a/quickwit/quickwit-common/src/stream_utils.rs +++ b/quickwit/quickwit-common/src/stream_utils.rs @@ -239,7 +239,7 @@ where T: fmt::Debug impl InFlightValue { pub fn new(value: T, value_size: ByteSize, gauge: &'static Gauge) -> Self { - let mut gauge_guard = GaugeGuard::from_gauge(gauge); + let gauge_guard = GaugeGuard::from_gauge(gauge); gauge_guard.increment(value_size.as_u64() as f64); Self(value, gauge_guard) } diff --git a/quickwit/quickwit-common/src/thread_pool.rs b/quickwit/quickwit-common/src/thread_pool.rs index 46e610acfc7..4d81007c71a 100644 --- a/quickwit/quickwit-common/src/thread_pool.rs +++ b/quickwit/quickwit-common/src/thread_pool.rs @@ -101,7 +101,7 @@ impl ThreadPool { { let span = tracing::Span::current(); let ongoing_tasks = self.ongoing_tasks.clone(); - let mut pending_tasks_guard = GaugeGuard::from_gauge(&self.pending_tasks); + let pending_tasks_guard = GaugeGuard::from_gauge(&self.pending_tasks); pending_tasks_guard.increment(1.0); let (tx, rx) = oneshot::channel(); self.thread_pool.spawn(move || { @@ -110,7 +110,7 @@ impl ThreadPool { return; } let _guard = span.enter(); - let mut _ongoing_task_guard = GaugeGuard::from_gauge(&ongoing_tasks); + let _ongoing_task_guard = GaugeGuard::from_gauge(&ongoing_tasks); _ongoing_task_guard.increment(1.0); let result = cpu_intensive_fn(); let _ = tx.send(result); diff --git a/quickwit/quickwit-indexing/src/actors/indexer.rs b/quickwit/quickwit-indexing/src/actors/indexer.rs index 932dfaf1640..43673f86c12 100644 --- a/quickwit/quickwit-indexing/src/actors/indexer.rs +++ b/quickwit/quickwit-indexing/src/actors/indexer.rs @@ -219,7 +219,7 @@ impl IndexerState { let publish_lock = self.publish_lock.clone(); let publish_token_opt = self.publish_token_opt.clone(); - let mut split_builders_guard = GaugeGuard::from_gauge(&crate::metrics::SPLIT_BUILDERS); + let split_builders_guard = GaugeGuard::from_gauge(&crate::metrics::SPLIT_BUILDERS); split_builders_guard.increment(1.0); let workbench = IndexingWorkbench { @@ -578,7 +578,7 @@ impl Indexer { fn memory_usage(&self) -> ByteSize { if let Some(workbench) = &self.indexing_workbench_opt { - ByteSize(workbench.memory_usage.get() as u64) + ByteSize(workbench.memory_usage.delta() as u64) } else { ByteSize(0u64) } diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index c12a8fa8a76..107869932e8 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -132,7 +132,7 @@ impl IndexingPipeline { parent: &crate::metrics::INDEXING_PIPELINES, labels: &labels, ); - let mut indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); + let indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); indexing_pipelines_gauge_guard.increment(1.0); let params_fingerprint = params.params_fingerprint; IndexingPipeline { diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs index f6a3dafba7f..98b366861f6 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs @@ -153,7 +153,7 @@ impl MetricsPipeline { parent: &crate::metrics::INDEXING_PIPELINES, labels: &labels, ); - let mut indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); + let indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); indexing_pipelines_gauge_guard.increment(1.0); let params_fingerprint = params.params_fingerprint; MetricsPipeline { diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs index ebf6a0bd296..7b6831a5b6f 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs @@ -65,7 +65,7 @@ impl ProcessedParquetBatch { .map(|col| col.get_array_memory_size() as i64) .sum(); - let mut gauge_guard = + let gauge_guard = GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_INDEXER_MAILBOX); gauge_guard.increment(memory_size as f64); diff --git a/quickwit/quickwit-indexing/src/models/processed_doc.rs b/quickwit/quickwit-indexing/src/models/processed_doc.rs index 6415da26427..424956e9d17 100644 --- a/quickwit/quickwit-indexing/src/models/processed_doc.rs +++ b/quickwit/quickwit-indexing/src/models/processed_doc.rs @@ -51,7 +51,7 @@ impl ProcessedDocBatch { force_commit: bool, ) -> Self { let delta = docs.iter().map(|doc| doc.num_bytes as i64).sum::(); - let mut gauge_guard = + let gauge_guard = GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_INDEXER_MAILBOX); gauge_guard.increment(delta as f64); Self { diff --git a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs index 777088086c3..d5178afd123 100644 --- a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs +++ b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs @@ -34,7 +34,7 @@ impl RawDocBatch { force_commit: bool, ) -> Self { let delta = docs.iter().map(|doc| doc.len() as i64).sum::(); - let mut gauge_guard = + let gauge_guard = GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_DOC_PROCESSOR_MAILBOX); gauge_guard.increment(delta as f64); diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs b/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs index 909154b89a6..963df072afa 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs @@ -52,7 +52,7 @@ impl<'a, DB: Database> Acquire<'a> for &TrackedPool { super::metrics::IDLE_CONNECTIONS.set(self.inner_pool.num_idle() as f64); Box::pin(async move { - let mut _gauge_guard = GaugeGuard::from_gauge(&super::metrics::ACQUIRE_CONNECTIONS); + let _gauge_guard = GaugeGuard::from_gauge(&super::metrics::ACQUIRE_CONNECTIONS); _gauge_guard.increment(1.0); let conn = acquire_conn_fut.await?; diff --git a/quickwit/quickwit-metrics/examples/http_service.rs b/quickwit/quickwit-metrics/examples/http_service.rs index 1fac4097f30..70e9012b112 100644 --- a/quickwit/quickwit-metrics/examples/http_service.rs +++ b/quickwit/quickwit-metrics/examples/http_service.rs @@ -102,7 +102,7 @@ const REGION_LABEL: Labels<1> = Labels::new(["region"]); fn track_connection(region: &'static str) -> GaugeGuard { let lv = REGION_LABEL.with_values([region]); let g = gauge!(parent: HTTP_ACTIVE_CONNECTIONS, labels: &lv); - let mut guard = GaugeGuard::from_gauge(&g); + let guard = GaugeGuard::from_gauge(&g); guard.increment(1.0); guard } @@ -160,7 +160,7 @@ fn handle_request(method: &'static str, path: &'static str, region: &'static str "method" => method, ); { - let mut _guard = GaugeGuard::from_gauge(&conn_gauge); + let _guard = GaugeGuard::from_gauge(&conn_gauge); _guard.increment(1.0); } diff --git a/quickwit/quickwit-metrics/src/gauge.rs b/quickwit/quickwit-metrics/src/gauge.rs index 3c55c06b4e4..edb4f5399bb 100644 --- a/quickwit/quickwit-metrics/src/gauge.rs +++ b/quickwit/quickwit-metrics/src/gauge.rs @@ -232,7 +232,7 @@ impl GaugeFn for Gauge { /// a panic. /// /// ```ignore -/// let mut guard = GaugeGuard::from_gauge(&gauge); +/// let guard = GaugeGuard::from_gauge(&gauge); /// guard.increment(1.0); /// // gauge is incremented by 1.0 /// // ... do work ... @@ -241,7 +241,7 @@ impl GaugeFn for Gauge { #[derive(Debug)] pub struct GaugeGuard { gauge: Gauge, - delta: f64, + delta: AtomicF64, } impl GaugeGuard { @@ -249,25 +249,25 @@ impl GaugeGuard { pub fn from_gauge(gauge: &Gauge) -> Self { Self { gauge: gauge.clone(), - delta: 0.0, + delta: AtomicF64::new(0.0), } } /// Adds `delta` to the gauge and to the value this guard tracks. - pub fn increment(&mut self, delta: f64) { - self.delta += delta; + pub fn increment(&self, delta: f64) { + self.delta.fetch_add(delta, Ordering::Relaxed); self.gauge.increment(delta); } /// Returns the value this guard is tracking. - pub fn get(&self) -> f64 { - self.delta + pub fn delta(&self) -> f64 { + self.delta.load(Ordering::Relaxed) } } impl Drop for GaugeGuard { fn drop(&mut self) { - self.gauge.decrement(self.delta); + self.gauge.decrement(self.delta.load(Ordering::Relaxed)); } } @@ -296,7 +296,7 @@ impl Drop for GaugeGuard { /// /// ```ignore /// let child = gauge!(parent: base, "method" => method); -/// let mut guard = GaugeGuard::from_gauge(&child); +/// let guard = GaugeGuard::from_gauge(&child); /// guard.increment(1.0); /// ``` #[macro_export] diff --git a/quickwit/quickwit-metrics/src/lib.rs b/quickwit/quickwit-metrics/src/lib.rs index 98c3177d9e2..87844890988 100644 --- a/quickwit/quickwit-metrics/src/lib.rs +++ b/quickwit/quickwit-metrics/src/lib.rs @@ -131,12 +131,12 @@ //! //! ### 5. Use `GaugeGuard` for RAII-based gauge balancing //! -//! [`GaugeGuard`] increments a gauge on creation and decrements it on drop, -//! which is useful for tracking in-flight resources like active connections. +//! [`GaugeGuard`] tracks gauge increments and decrements them on drop, which is +//! useful for tracking in-flight resources like active connections. //! //! ```rust,ignore //! { -//! let mut _guard = GaugeGuard::from_gauge(&ACTIVE_CONNS); +//! let _guard = GaugeGuard::from_gauge(&ACTIVE_CONNS); //! _guard.increment(1.0); //! // ... connection is alive here ... //! } diff --git a/quickwit/quickwit-metrics/tests/gauge.rs b/quickwit/quickwit-metrics/tests/gauge.rs index 5be23bce11b..f5645f53e69 100644 --- a/quickwit/quickwit-metrics/tests/gauge.rs +++ b/quickwit/quickwit-metrics/tests/gauge.rs @@ -91,7 +91,7 @@ fn guard_decrements_on_drop() { ); g.set(0.0); { - let mut _guard = GaugeGuard::from_gauge(&g); + let _guard = GaugeGuard::from_gauge(&g); _guard.increment(5.0); } }); @@ -110,9 +110,9 @@ fn guard_after_set() { ); g.set(10.0); { - let mut guard = GaugeGuard::from_gauge(&g); + let guard = GaugeGuard::from_gauge(&g); guard.increment(3.0); - assert_eq!(guard.get(), 3.0); + assert_eq!(guard.delta(), 3.0); } }); @@ -121,22 +121,22 @@ fn guard_after_set() { } #[test] -fn mutable_guard_tracks_delta() { +fn guard_tracks_delta() { let entries = with_recorder(|| { let g = gauge!( - name: "g_mutable_guard", - description: "mutable guard", + name: "g_guard_delta", + description: "guard delta", subsystem: "test", ); g.set(0.0); { - let mut guard = GaugeGuard::from_gauge(&g); - assert_eq!(guard.get(), 0.0); + let guard = GaugeGuard::from_gauge(&g); + assert_eq!(guard.delta(), 0.0); guard.increment(5.0); guard.increment(-2.0); guard.increment(0.5); guard.increment(-1.5); - assert_eq!(guard.get(), 2.0); + assert_eq!(guard.delta(), 2.0); } }); @@ -153,9 +153,9 @@ fn multiple_guards() { subsystem: "test", ); g.set(0.0); - let mut guard_a = GaugeGuard::from_gauge(&g); + let guard_a = GaugeGuard::from_gauge(&g); guard_a.increment(2.0); - let mut guard_b = GaugeGuard::from_gauge(&g); + let guard_b = GaugeGuard::from_gauge(&g); guard_b.increment(5.0); drop(guard_b); drop(guard_a); @@ -222,7 +222,7 @@ fn observable_guard_matches_recorder() { ); g.set(0.0); { - let mut _guard = GaugeGuard::from_gauge(&g); + let _guard = GaugeGuard::from_gauge(&g); _guard.increment(5.0); assert_eq!(g.get(), 5.0); } diff --git a/quickwit/quickwit-serve/src/load_shield.rs b/quickwit/quickwit-serve/src/load_shield.rs index 09008216849..37b95cedee0 100644 --- a/quickwit/quickwit-serve/src/load_shield.rs +++ b/quickwit/quickwit-serve/src/load_shield.rs @@ -75,12 +75,12 @@ impl LoadShield { } pub async fn acquire_permit(&'static self) -> Result { - let mut pending_gauge_guard = GaugeGuard::from_gauge(&self.pending_gauge); + let pending_gauge_guard = GaugeGuard::from_gauge(&self.pending_gauge); pending_gauge_guard.increment(1.0); let in_flight_permit_opt = self.acquire_in_flight_permit().await?; let concurrency_permit_opt = self.acquire_concurrency_permit().await; drop(pending_gauge_guard); - let mut ongoing_gauge_guard = GaugeGuard::from_gauge(&self.ongoing_gauge); + let ongoing_gauge_guard = GaugeGuard::from_gauge(&self.ongoing_gauge); ongoing_gauge_guard.increment(1.0); Ok(LoadShieldPermit { _in_flight_permit_opt: in_flight_permit_opt, diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs index 25dacefe486..4ef6e370ee9 100644 --- a/quickwit/quickwit-storage/src/metrics.rs +++ b/quickwit/quickwit-storage/src/metrics.rs @@ -413,9 +413,9 @@ pub static CACHE_METRICS_FOR_TESTS: LazyLock = pub fn object_storage_get_slice_in_flight_guards( get_request_size: usize, ) -> (GaugeGuard, GaugeGuard) { - let mut bytes_guard = GaugeGuard::from_gauge(&OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES); + let bytes_guard = GaugeGuard::from_gauge(&OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES); bytes_guard.increment(get_request_size as f64); - let mut count_guard = GaugeGuard::from_gauge(&OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT); + let count_guard = GaugeGuard::from_gauge(&OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT); count_guard.increment(1.0); (bytes_guard, count_guard) } From 211c8137f98645d30185df629b82ce9812ccc385 Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Mon, 4 May 2026 11:03:40 +0200 Subject: [PATCH 19/54] Move shard locality metrics publishing to type --- .../quickwit-control-plane/src/indexing_scheduler/mod.rs | 2 +- quickwit/quickwit-control-plane/src/metrics.rs | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs index f8621f0fff8..9a395269a2a 100644 --- a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs +++ b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs @@ -330,7 +330,7 @@ impl IndexingScheduler { ); let shard_locality_metrics = get_shard_locality_metrics(&new_physical_plan, &shard_locations); - crate::metrics::set_shard_locality_metrics(shard_locality_metrics); + shard_locality_metrics.publish(); if let Some(last_applied_plan) = &self.state.last_applied_physical_plan { let plans_diff = get_indexing_plans_diff( last_applied_plan.indexing_tasks_per_indexer(), diff --git a/quickwit/quickwit-control-plane/src/metrics.rs b/quickwit/quickwit-control-plane/src/metrics.rs index c0b35b3426a..0d3b4df5b49 100644 --- a/quickwit/quickwit-control-plane/src/metrics.rs +++ b/quickwit/quickwit-control-plane/src/metrics.rs @@ -22,9 +22,11 @@ pub struct ShardLocalityMetrics { pub num_local_shards: usize, } -pub fn set_shard_locality_metrics(shard_locality_metrics: ShardLocalityMetrics) { - LOCAL_SHARDS.set(shard_locality_metrics.num_local_shards as f64); - REMOTE_SHARDS.set(shard_locality_metrics.num_remote_shards as f64); +impl ShardLocalityMetrics { + pub fn publish(self) { + LOCAL_SHARDS.set(self.num_local_shards as f64); + REMOTE_SHARDS.set(self.num_remote_shards as f64); + } } pub(crate) static INDEXES_TOTAL: LazyLock = LazyLock::new(|| { From ac5680b7f88317a6de9661090d9db561daa7bffb Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Mon, 4 May 2026 11:08:12 +0200 Subject: [PATCH 20/54] Remove obsolete planning docs --- PLAN.md | 57 -------- TASKS.md | 395 ------------------------------------------------------- 2 files changed, 452 deletions(-) delete mode 100644 PLAN.md delete mode 100644 TASKS.md diff --git a/PLAN.md b/PLAN.md deleted file mode 100644 index 13a662df65f..00000000000 --- a/PLAN.md +++ /dev/null @@ -1,57 +0,0 @@ -# Metrics-RS Macro Migration Plan - -## Summary -Replace Quickwit’s current `prometheus` crate-based metrics with the metricspp API directly. The new implementation lives inside `quickwit-common` as `quickwit_common::metrics`, not as a new Cargo crate, and call sites are migrated to `counter!`, `gauge!`, and `histogram!`. - -Metric names stay compatible as `quickwit_{subsystem}_{name}` so existing dashboards and `/metrics` users keep working. - -## Design Authority -The proposed design in `~/go/src/github.com/DataDog/experimental/users/luca.cominardi/metricspp` is the reference architecture for this migration. Quickwit code should be updated to follow that design, including its API shape, macro behavior, metadata model, observable metric behavior, histogram bucket handling, and exporter setup. - -When existing Quickwit metrics code conflicts with the metricspp design, prefer adapting the existing Quickwit code to the metricspp design. Do not reshape the metricspp approach around Quickwit’s current `prometheus` crate patterns, constructors, or historical implementation details unless there is a concrete Quickwit constraint that makes the proposed design impossible to apply. - -## Key Changes -- Port the metricspp design from `~/go/src/github.com/DataDog/experimental/users/luca.cominardi/metricspp` into `quickwit-common/src/metrics/`: typed `Counter`, `Gauge`, `Histogram`, `GaugeGuard`, observable shadows, inventory metadata, histogram bucket registry, and `describe_metrics()`. -- Add dependencies needed by metricspp: `metrics-util`, `metrics-exporter-prometheus`, `metrics-exporter-otel`, `inventory`, `const_format`, and `atomic_float`. -- Replace existing `new_counter`, `new_gauge`, `new_histogram`, and `*Vec` declarations across Quickwit with static `LazyLock` metrics declared through metricspp macros. -- Replace metric operations with the new API: - - counters: `.increment(n)` or `.absolute(n)` - - gauges: `.increment(n)`, `.decrement(n)`, `.set(n)` - - histograms: `.record(value)` - - labeled metrics: `counter!(parent: BASE, "label" => value)`, etc. -- Use `observable: true` only where production code or tests currently read values through `.get()`. - -## Exporters -- Install one global metrics-rs recorder during CLI startup. -- Always include a Prometheus recorder and retain Quickwit’s existing `/metrics` route by rendering the stored `PrometheusHandle`. -- Configure Prometheus and OTLP histogram buckets from `metricspp::histogram_buckets()` before metrics are first used. -- Add OTLP metrics export behind `QW_ENABLE_OPENTELEMETRY_OTLP_EXPORTER=true`, using the existing OTLP protocol env behavior: - - `OTEL_EXPORTER_OTLP_METRICS_PROTOCOL` - - fallback to `OTEL_EXPORTER_OTLP_PROTOCOL` - - support `grpc`, `http/protobuf`, and `http/json` -- Fan out to Prometheus, OTLP, and the existing DogStatsD/invariant recorder path where applicable. - -## Migration Work -- For every migrated module, start from the metricspp design and ask how the existing Quickwit code should change to match it, not how metricspp should be adjusted to preserve the old Quickwit implementation. -- Convert metric modules in `quickwit-serve`, `quickwit-search`, `quickwit-indexing`, `quickwit-ingest`, `quickwit-storage`, `quickwit-opentelemetry`, `quickwit-jaeger`, `quickwit-cluster`, `quickwit-actors`, and `quickwit-common`. -- Remove direct `prometheus` usage from Quickwit-owned code, including tower/circuit-breaker helpers. -- Replace `register_info("build_info", ...)` with a metricspp-declared counter/info metric set to `1` with build labels. -- Update docs/comments mentioning Prometheus crate semantics to describe metrics-rs plus Prometheus rendering. - -## Test Plan -- Port metricspp tests for counters, gauges, histograms, parent labels, observable metrics, guards, and histogram bucket inventory. -- Add Quickwit integration tests for `/metrics` output: metric names, labels, descriptions, and histogram buckets. -- Add OTLP metrics tests with an in-memory OpenTelemetry exporter. -- Update tests that assert metric values to use `observable: true` declarations or `metrics_util::debugging::DebuggingRecorder`. -- Run: - - `cargo test -p quickwit-common metrics` - - `cargo test -p quickwit-serve metrics_api` - - `cargo test -p quickwit-cli logger` - - `cargo clippy --workspace --tests --all-features` - - `make fmt` - -## Assumptions -- `quickeit-metrics` means the Quickwit metrics module under `quickwit_common::metrics`. -- A broad call-site migration is acceptable; preserving old constructor/type APIs is explicitly out of scope. -- Existing Prometheus metric names must remain stable. -- The existing Quickwit `metrics` Cargo feature for metrics ingestion is unrelated and unchanged. diff --git a/TASKS.md b/TASKS.md deleted file mode 100644 index 037bc57f444..00000000000 --- a/TASKS.md +++ /dev/null @@ -1,395 +0,0 @@ -# Metrics-RS Macro Migration Tasks - -This task list breaks `PLAN.md` into reviewable sections. Each section should be implemented and reviewed independently unless its prerequisites say otherwise. - -## Section 0: Ground Rules And Compatibility Targets - -Status: Done. - -Goal: make the migration safe to review by preserving externally visible behavior. - -Tasks: -- [x] Preserve metric names as `quickwit_{subsystem}_{name}`. -- [x] Preserve empty-subsystem names as `quickwit_{name}`, without a double underscore. -- [x] Preserve existing label names and label values. -- [x] Preserve `/metrics` as the Prometheus text endpoint. -- [x] Do not preserve the old `new_counter`, `new_gauge`, `new_histogram`, or `*Vec` APIs as compatibility shims unless a later section proves a temporary shim is needed for incremental compilation. -- [x] Treat Quickwit metric ingestion features and metrics index logic as unrelated to this migration. - -Review checklist: -- [x] No compatibility target is contradicted by later implementation sections. -- [x] Any intentional metric name, label, or bucket change is explicitly called out in the PR. - -## Section 1: Add Workspace Dependencies - -Status: Done. - -Goal: add the metrics-rs exporter and metricspp support dependencies without changing behavior. - -Tasks: -- [x] Add workspace dependencies in `quickwit/Cargo.toml`: - - [x] `metrics-util` - - [x] `metrics-exporter-prometheus` - - [x] `metrics-exporter-otel` - - [x] `inventory` - - [x] `const_format` - - [x] `atomic_float` -- [x] Add the needed dependencies to `quickwit/quickwit-common/Cargo.toml`. -- [x] Add exporter dependencies to the crate that installs recorders, expected to be `quickwit-cli`. -- [x] Keep `prometheus` temporarily until all direct usages are migrated. -- [x] Run a dependency update/build check to refresh `quickwit/Cargo.lock`. - -Validation: -- [x] `cargo check -p quickwit-common` -- [x] `cargo check -p quickwit-cli` - -Review checklist: -- [x] Dependency versions match the existing workspace style. -- [x] New dependencies are only added to crates that use them. - -## Section 2: Port Metricspp Core Into quickwit-common - -Goal: implement the new `quickwit_common::metrics` API while keeping the scope local to `quickwit-common`. - -Tasks: -- Move the current `quickwit/quickwit-common/src/metrics.rs` implementation into a module layout such as `quickwit/quickwit-common/src/metrics/`. -- Port metricspp core types: - - `Counter` - - `Gauge` - - `Histogram` - - `GaugeGuard` - - observable `CounterShadow` and `GaugeShadow` - - `MetricInfo` - - `HistogramConfig` -- Port macros: - - `counter!` - - `gauge!` - - `histogram!` - - hidden helper macros for key names, metadata, and label counts. -- Keep the public module path as `quickwit_common::metrics`. -- Implement `SYSTEM = "quickwit"`. -- Implement `describe_metrics()`. -- Implement `metrics_info()` and `histogram_buckets()` for inventory introspection. -- Ensure the macros work for: - - base metrics with no labels - - base metrics with static labels - - parent metrics with dynamic label values - - nested parent extension - - observable counters and gauges -- Add or keep bucket helper functions if call sites still rely on `linear_buckets` and `exponential_buckets`. - -Quickwit-specific adjustments: -- Extend `GaugeGuard` or add an equivalent guard so existing in-flight byte/count use cases can add and subtract variable deltas over the guard lifetime. -- Decide how to handle `OwnedGaugeGuard` use sites. Prefer adapting the new guard to cover the same behavior instead of keeping a Prometheus-specific type. -- Provide a histogram timer helper if needed by existing `start_timer()` call sites, or plan those call sites for manual `Instant` plus `record()`. - -Validation: -- `cargo test -p quickwit-common metrics` - -Review checklist: -- The port is inside `quickwit-common`; no new Cargo crate is introduced. -- The API does not expose Prometheus crate types. -- `observable: true` is opt-in and only affects counters/gauges. - -## Section 3: Port Metricspp Unit Tests - -Goal: validate the new metrics primitives before migrating call sites. - -Tasks: -- Port metricspp tests into `quickwit-common`. -- Cover: - - counter increment and absolute values - - gauge set/increment/decrement - - histogram record - - static labels - - parent labels - - dynamic parent labels - - nested parent extension - - observable counter and gauge `get()` - - non-observable sentinel values - - `GaugeGuard` - - histogram bucket inventory - - `describe_metrics()` -- Use `metrics_util::debugging::DebuggingRecorder` for value assertions where possible. - -Validation: -- `cargo test -p quickwit-common metrics` - -Review checklist: -- Tests assert key names include the `quickwit` prefix. -- Tests cover empty subsystem behavior. -- Tests do not depend on the Prometheus crate registry. - -## Section 4: Install Global Metrics Recorder And Prometheus Handle - -Goal: install one global metrics-rs recorder during CLI startup and keep `/metrics` working. - -Tasks: -- Replace Prometheus crate registration/gathering with a metrics-rs recorder setup. -- Install a Prometheus recorder in `quickwit-cli` startup. -- Store a `metrics_exporter_prometheus::PrometheusHandle` somewhere accessible to `quickwit_common::metrics::metrics_text_payload()` or directly to `quickwit-serve`'s metrics handler. -- Configure Prometheus histogram buckets from `quickwit_common::metrics::histogram_buckets()` before metrics are first used. -- Call `quickwit_common::metrics::describe_metrics()` after installing the recorder. -- Keep the existing `/metrics` route in `quickwit-serve`. -- Replace `quickwit_common::metrics::metrics_text_payload()` internals with Prometheus handle rendering. - -Ordering risk: -- Metrics declared through `LazyLock` register on first access. The global recorder must be installed before production metrics are first accessed. Check current startup order around runtime metrics, build info metrics, jemalloc metrics, and CLI setup. - -Validation: -- `cargo test -p quickwit-serve metrics_api` -- A manual or integration scrape returns Prometheus text. - -Review checklist: -- There is exactly one global recorder installation path in production startup. -- `/metrics` does not call `prometheus::gather()`. -- Histogram buckets are configured before metrics registration. - -## Section 5: Preserve DogStatsD And Invariant Metrics - -Goal: keep existing DogStatsD and invariant behavior while using the new recorder path. - -Tasks: -- Replace direct `metrics_exporter_dogstatsd::DogStatsDBuilder::install()` if it conflicts with the single global recorder. -- Fan out metrics to: - - Prometheus - - DogStatsD - - optional OTLP metrics - - the existing invariant recorder path -- Keep existing DogStatsD global labels and prefix behavior. -- Update invariant metrics from raw `metrics::counter!` calls only if needed to fit the fanout recorder. - -Validation: -- `cargo test -p quickwit-cli logger` -- Existing invariant tests, if any, still pass. - -Review checklist: -- DogStatsD is not silently disabled. -- Existing global labels are preserved. -- Recorder fanout does not double-record any metric. - -## Section 6: Add Optional OTLP Metrics Export - -Goal: add OTLP metrics export behind the existing telemetry flag. - -Tasks: -- Enable OTLP metrics only when `QW_ENABLE_OPENTELEMETRY_OTLP_EXPORTER=true`. -- Read `OTEL_EXPORTER_OTLP_METRICS_PROTOCOL`. -- Fall back to `OTEL_EXPORTER_OTLP_PROTOCOL`. -- Support: - - `grpc` - - `http/protobuf` - - `http/json` -- Reuse the existing protocol parsing style from tracing/log export where possible. -- Configure OTLP histogram buckets from `quickwit_common::metrics::histogram_buckets()`. -- Add in-memory exporter tests if the dependency stack supports it cleanly. - -Validation: -- `cargo test -p quickwit-cli logger` -- OTLP metrics test with in-memory or local test exporter. - -Review checklist: -- OTLP traces/logs behavior remains unchanged. -- Metrics protocol env vars do not affect trace/log protocol selection. -- Unsupported protocol errors are clear. - -## Section 7: Replace Build Info Metric - -Goal: replace `register_info("build_info", ...)` with the new metrics API. - -Tasks: -- Remove `register_info` usage from `quickwit-cli`. -- Declare a build info counter metric with build labels. -- Set the metric to `1` using the new counter API. -- Preserve the Prometheus output shape as closely as possible: - - name: `quickwit_build_info` - - labels: build date, commit hash, version, optional tags, target - - value: `1` -- Account for dynamic label values from `BuildInfo`. - -Validation: -- `/metrics` output contains `quickwit_build_info`. -- `cargo test -p quickwit-cli logger` - -Review checklist: -- Build labels match the previous labels. -- The metric is registered after the global recorder is installed. - -## Section 8: Migrate quickwit-common Internal Metrics - -Goal: convert `quickwit-common` call sites and helpers to the new API. - -Tasks: -- Convert `quickwit-common/src/metrics.rs` consumers to the new module layout. -- Convert `MEMORY_METRICS` and in-flight gauges. -- Convert `quickwit-common/src/tower/metrics.rs`. -- Convert `quickwit-common/src/tower/circuit_breaker.rs`. -- Convert `quickwit-common/src/thread_pool.rs`. -- Convert `quickwit-common/src/stream_utils.rs`. -- Remove direct Prometheus crate imports from `quickwit-common`. - -Special cases: -- Tests in tower metrics currently read `.get()` from counters. Mark those test metrics `observable: true` or use `DebuggingRecorder`. -- In-flight data guards rely on variable add/sub behavior. - -Validation: -- `cargo test -p quickwit-common metrics` -- `cargo test -p quickwit-common tower` -- `cargo check -p quickwit-common --all-features` - -Review checklist: -- `quickwit-common` no longer exposes Prometheus types. -- Guard behavior is preserved for byte accounting. - -## Section 9: Migrate Server And Search Metrics - -Goal: migrate the REST/gRPC/search-facing metric modules and their call sites. - -Tasks: -- Convert `quickwit-serve/src/metrics.rs`. -- Convert HTTP request metrics in `quickwit-serve/src/rest.rs`. -- Convert `quickwit-serve` circuit breaker metric call sites. -- Convert `quickwit-search/src/metrics.rs`. -- Convert `quickwit-search/src/metrics_trackers.rs`. -- Convert search permit and scroll context gauge guards. -- Convert histogram timers in search code. - -Special cases: -- `SplitSearchOutcomeCounters` has local unregistered counters. Decide whether to replace with observable local counters, `DebuggingRecorder` in tests, or a small non-exported local helper type. - -Validation: -- `cargo test -p quickwit-serve metrics_api` -- `cargo test -p quickwit-search metrics` -- `cargo check -p quickwit-serve --all-features` -- `cargo check -p quickwit-search --all-features` - -Review checklist: -- HTTP metric names and labels match previous output. -- Search display/debug code that reads counters remains correct. - -## Section 10: Migrate Indexing, Ingest, And Storage Metrics - -Goal: migrate the high-volume ingestion, indexing, and storage metric modules. - -Tasks: -- Convert `quickwit-indexing/src/metrics.rs`. -- Convert indexing actors that use counters, gauges, and gauge guards. -- Convert `quickwit-ingest/src/metrics.rs`. -- Convert `quickwit-ingest/src/ingest_v2/metrics.rs`. -- Convert ingest router, ingester, and replication call sites. -- Convert `quickwit-storage/src/metrics.rs`. -- Convert object storage request counters and histograms. -- Convert cache metrics. -- Convert histogram timers in object storage code. - -Validation: -- `cargo check -p quickwit-indexing --all-features` -- `cargo check -p quickwit-ingest --all-features` -- `cargo check -p quickwit-storage --all-features` -- Run focused unit tests in changed modules. - -Review checklist: -- High-cardinality dynamic labels use parent extension consistently. -- Per-index metric behavior through `index_label()` is preserved. -- In-flight byte gauges balance on drop as before. - -## Section 11: Migrate Remaining Service Crates - -Goal: remove Prometheus-specific metric types from the remaining Quickwit-owned crates. - -Tasks: -- Convert `quickwit-jaeger/src/metrics.rs` and Jaeger call sites. -- Convert `quickwit-cluster/src/metrics.rs` and cluster call sites. -- Convert `quickwit-actors` mailbox/backpressure metrics. -- Convert `quickwit-opentelemetry` OTLP ingest metrics. -- Convert `quickwit-lambda-client` metrics. -- Convert `quickwit-parquet-engine` metrics. -- Convert `quickwit-metastore` Postgres metrics. -- Convert `quickwit-control-plane` metrics. -- Convert `quickwit-janitor` metrics if present. - -Validation: -- `cargo check -p quickwit-jaeger --all-features` -- `cargo check -p quickwit-cluster --all-features` -- `cargo check -p quickwit-actors --all-features` -- `cargo check -p quickwit-opentelemetry --all-features` -- `cargo check -p quickwit-lambda-client --all-features` -- `cargo check -p quickwit-parquet-engine --all-features` -- `cargo check -p quickwit-metastore --all-features` -- `cargo check -p quickwit-control-plane --all-features` - -Review checklist: -- No remaining Quickwit-owned metric modules import Prometheus crate metric types. -- Counter/gauge/histogram operation names are migrated consistently. - -## Section 12: Remove Direct Prometheus Usage From Quickwit-Owned Code - -Goal: finish the backend migration and remove obsolete dependencies. - -Tasks: -- Search for direct Prometheus imports: - - `prometheus::` - - `IntCounter` - - `IntGauge` - - `HistogramVec` - - `IntCounterVec` - - `IntGaugeVec` - - `new_counter` - - `new_gauge` - - `new_histogram` - - `register_info` -- Remove old constructor functions and vector wrapper types. -- Remove `prometheus` from `quickwit-common` dependencies if no longer needed. -- Keep any third-party or generated Prometheus references only if they are not Quickwit-owned migration targets. -- Update docs and comments that describe Prometheus crate semantics. - -Validation: -- `rg "prometheus::|IntCounter|IntGauge|HistogramVec|IntCounterVec|IntGaugeVec|new_counter|new_gauge|new_histogram|register_info" quickwit -g '*.rs'` -- `cargo check --workspace --all-features` - -Review checklist: -- No compatibility shim remains by accident. -- Dependency cleanup does not remove unrelated Prometheus functionality. - -## Section 13: Add /metrics Integration Coverage - -Goal: prove the Prometheus rendering contract survived the migration. - -Tasks: -- Add integration tests for `/metrics` output. -- Assert: - - metric names - - labels - - descriptions/help text - - histogram bucket boundaries - - build info metric -- Include at least one counter, gauge, and histogram. -- Include at least one labeled child metric. - -Validation: -- `cargo test -p quickwit-serve metrics_api` -- Relevant integration test target if tests live outside `quickwit-serve`. - -Review checklist: -- Tests validate Prometheus text output, not internal implementation details. -- Tests avoid depending on global metric state in a flaky way. - -## Section 14: Final Workspace Verification - -Goal: verify the whole migration after all sections land. - -Tasks: -- Run focused tests: - - `cargo test -p quickwit-common metrics` - - `cargo test -p quickwit-serve metrics_api` - - `cargo test -p quickwit-cli logger` -- Run broader checks: - - `cargo clippy --workspace --tests --all-features` - - `make fmt` -- If runtime confidence is needed, start a local Quickwit node and scrape `/metrics`. -- Document any tests that are skipped due to environment requirements. - -Review checklist: -- Formatting uses the repository entrypoint `make fmt`. -- Clippy and tests are reported with exact failures if they cannot fully pass. -- The final PR description lists any expected metric-output differences. From 7baa12d7a61aadc136538c820787338594e45db5 Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Mon, 4 May 2026 11:29:04 +0200 Subject: [PATCH 21/54] Fix CI warning errors --- quickwit/quickwit-common/src/io.rs | 13 +------------ quickwit/quickwit-search/src/scroll_context.rs | 2 +- .../quickwit-search/src/search_permit_provider.rs | 2 +- quickwit/quickwit-serve/src/decompression.rs | 3 +-- 4 files changed, 4 insertions(+), 16 deletions(-) diff --git a/quickwit/quickwit-common/src/io.rs b/quickwit/quickwit-common/src/io.rs index c16d9d4d1a1..c37e4921db1 100644 --- a/quickwit/quickwit-common/src/io.rs +++ b/quickwit/quickwit-common/src/io.rs @@ -79,7 +79,7 @@ pub fn limiter(throughput: ByteSize) -> Limiter { .build() } -#[derive(Clone)] +#[derive(Clone, Default)] pub struct IoControls { throughput_limiter_opt: Option, bytes_counter: MaybeRegisteredCounter, @@ -87,17 +87,6 @@ pub struct IoControls { kill_switch: KillSwitch, } -impl Default for IoControls { - fn default() -> Self { - IoControls { - throughput_limiter_opt: None, - progress: Progress::default(), - kill_switch: KillSwitch::default(), - bytes_counter: MaybeRegisteredCounter::default(), - } - } -} - impl IoControls { #[must_use] pub fn progress(&self) -> &Progress { diff --git a/quickwit/quickwit-search/src/scroll_context.rs b/quickwit/quickwit-search/src/scroll_context.rs index 7f200ca3167..3165534b1e9 100644 --- a/quickwit/quickwit-search/src/scroll_context.rs +++ b/quickwit/quickwit-search/src/scroll_context.rs @@ -148,7 +148,7 @@ impl Default for MiniKV { impl MiniKV { pub async fn put(&self, key: Vec, payload: Vec, ttl: Duration) { - let mut metric_guard = + let metric_guard = GaugeGuard::from_gauge(&crate::metrics::SEARCHER_LOCAL_KV_STORE_SIZE_BYTES); metric_guard.increment(payload.len() as f64); let mut cache_lock = self.ttl_with_cache.write().await; diff --git a/quickwit/quickwit-search/src/search_permit_provider.rs b/quickwit/quickwit-search/src/search_permit_provider.rs index c30e5365f8e..e8cb09643f1 100644 --- a/quickwit/quickwit-search/src/search_permit_provider.rs +++ b/quickwit/quickwit-search/src/search_permit_provider.rs @@ -332,7 +332,7 @@ impl SearchPermitActor { fn assign_available_permits(&mut self) { while let Some(permit_request) = self.pop_next_request_if_serviceable() { - let mut ongoing_gauge_guard = + let ongoing_gauge_guard = GaugeGuard::from_gauge(&crate::metrics::LEAF_SEARCH_SINGLE_SPLIT_TASKS_ONGOING); ongoing_gauge_guard.increment(1.0); self.total_memory_allocated += permit_request.permit_size; diff --git a/quickwit/quickwit-serve/src/decompression.rs b/quickwit/quickwit-serve/src/decompression.rs index 7492d8cece2..c44bd8de7ea 100644 --- a/quickwit/quickwit-serve/src/decompression.rs +++ b/quickwit/quickwit-serve/src/decompression.rs @@ -114,8 +114,7 @@ pub(crate) struct Body { impl Body { pub fn new(content: Bytes, load_shield_permit: LoadShieldPermit) -> Body { - let mut gauge_guard = - GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_REST_SERVER); + let gauge_guard = GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_REST_SERVER); gauge_guard.increment(content.len() as f64); Body { content, From ee6a28c97dd5a7459757c4e7a5d98ed013341485 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Mon, 4 May 2026 14:45:57 +0200 Subject: [PATCH 22/54] Restore Cargo.lock in metrics-inventory run script The EXIT trap now also restores Cargo.lock, which gets modified when cargo resolves the patched Cargo.toml dependencies. Co-authored-by: Cursor --- quickwit/quickwit-metrics-inventory/scripts/run.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/quickwit/quickwit-metrics-inventory/scripts/run.sh b/quickwit/quickwit-metrics-inventory/scripts/run.sh index 253d7d51605..a4ddf8b9e62 100755 --- a/quickwit/quickwit-metrics-inventory/scripts/run.sh +++ b/quickwit/quickwit-metrics-inventory/scripts/run.sh @@ -1,8 +1,9 @@ #!/usr/bin/env bash # # Discovers quickwit-metrics reverse dependencies, patches Cargo.toml and -# src/main.rs, builds and runs the inventory binary, then restores both -# files via git. Files are always restored — even on Ctrl-C or failure. +# src/main.rs, builds and runs the inventory binary, then restores +# Cargo.toml, Cargo.lock, and src/main.rs via git. +# Files are always restored — even on Ctrl-C or failure. # # Usage: # ./scripts/run_inventory.sh @@ -13,9 +14,10 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" CRATE_DIR="$(dirname "$SCRIPT_DIR")" WORKSPACE_DIR="$(dirname "$CRATE_DIR")" CARGO_TOML="$CRATE_DIR/Cargo.toml" +CARGO_LOCK="$WORKSPACE_DIR/Cargo.lock" MAIN_RS="$CRATE_DIR/src/main.rs" -trap 'git restore "$CARGO_TOML" "$MAIN_RS"' EXIT +trap 'git restore "$CARGO_TOML" "$CARGO_LOCK" "$MAIN_RS"' EXIT # --format '{lib}' outputs the Rust crate name (underscores, no version/path). # --prefix none removes tree decorators. tail skips the root (quickwit-metrics itself). From ceac5eb44d03fb936df42bf09f9da412e2778da5 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Mon, 4 May 2026 14:46:30 +0200 Subject: [PATCH 23/54] Fix usage comment in metrics-inventory run script Co-authored-by: Cursor --- quickwit/quickwit-metrics-inventory/scripts/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quickwit/quickwit-metrics-inventory/scripts/run.sh b/quickwit/quickwit-metrics-inventory/scripts/run.sh index a4ddf8b9e62..08f23e49cee 100755 --- a/quickwit/quickwit-metrics-inventory/scripts/run.sh +++ b/quickwit/quickwit-metrics-inventory/scripts/run.sh @@ -6,7 +6,7 @@ # Files are always restored — even on Ctrl-C or failure. # # Usage: -# ./scripts/run_inventory.sh +# ./scripts/run.sh set -euo pipefail From fe87f7ee99866b2208c152002c8a86b6a8cd43b5 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Mon, 4 May 2026 15:09:37 +0200 Subject: [PATCH 24/54] Remove unused ahash workspace dependency Co-authored-by: Cursor --- quickwit/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index c1d16ebff24..73d7367d521 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -94,7 +94,6 @@ authors = ["Quickwit, Inc. "] license = "Apache-2.0" [workspace.dependencies] -ahash = "0.8" anyhow = "1" arc-swap = "1.8" arrow = { version = "58", default-features = false, features = ["ipc"] } From 41f3e231941dd6c0e05cddbb26b94fc69fb30dc3 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Mon, 4 May 2026 17:09:24 +0200 Subject: [PATCH 25/54] Make all counters and gauges always observable Remove the `observable` flag from `MetricInfo`, macros, and all call sites. Every counter now unconditionally maintains an `AtomicU64` shadow and every gauge an `AtomicF64` shadow, so `get()` always returns the real value. This eliminates a hash-collision bug where metrics with the same key but different `observable` flags could silently conflict. Co-authored-by: Cursor --- quickwit/quickwit-actors/src/mailbox.rs | 3 - quickwit/quickwit-common/src/io.rs | 1 - quickwit/quickwit-common/src/metrics.rs | 1 - quickwit/quickwit-common/src/stream_utils.rs | 2 - quickwit/quickwit-indexing/src/metrics.rs | 1 - .../quickwit-metrics-inventory/src/main.rs | 3 +- quickwit/quickwit-metrics/benches/cache.rs | 1 - .../benches/quickwit_metrics.rs | 4 - quickwit/quickwit-metrics/src/counter.rs | 86 ++++--------------- quickwit/quickwit-metrics/src/gauge.rs | 86 ++++--------------- quickwit/quickwit-metrics/src/histogram.rs | 1 - quickwit/quickwit-metrics/src/inner.rs | 2 - quickwit/quickwit-metrics/src/lib.rs | 14 +-- quickwit/quickwit-metrics/tests/counter.rs | 17 ---- quickwit/quickwit-metrics/tests/gauge.rs | 17 ---- quickwit/quickwit-search/src/metrics.rs | 1 - quickwit/quickwit-storage/src/metrics.rs | 14 --- 17 files changed, 40 insertions(+), 214 deletions(-) diff --git a/quickwit/quickwit-actors/src/mailbox.rs b/quickwit/quickwit-actors/src/mailbox.rs index cbbf6048f5d..a72a8cf8dc9 100644 --- a/quickwit/quickwit-actors/src/mailbox.rs +++ b/quickwit/quickwit-actors/src/mailbox.rs @@ -525,7 +525,6 @@ mod tests { name: "test_counter_low_backpressure", description: "help for test_counter", subsystem: "actor", - observable: true, ); let wait_duration = Duration::from_millis(1); let processed = mailbox @@ -556,7 +555,6 @@ mod tests { name: "test_counter_backpressure", description: "help for test_counter", subsystem: "actor", - observable: true, ); let wait_duration = Duration::from_millis(1); mailbox @@ -592,7 +590,6 @@ mod tests { name: "test_counter_no_waiting_backpressure", description: "help for test_counter", subsystem: "actor", - observable: true, ); let start = Instant::now(); mailbox diff --git a/quickwit/quickwit-common/src/io.rs b/quickwit/quickwit-common/src/io.rs index c37e4921db1..5c8b0e07a5e 100644 --- a/quickwit/quickwit-common/src/io.rs +++ b/quickwit/quickwit-common/src/io.rs @@ -53,7 +53,6 @@ static WRITE_BYTES: LazyLock = LazyLock::new(|| { name: "write_bytes", description: "Number of bytes written by a given component in [indexer, merger, deleter, split_downloader_{merge,delete}]", subsystem: "", - observable: true, ) }); diff --git a/quickwit/quickwit-common/src/metrics.rs b/quickwit/quickwit-common/src/metrics.rs index 54f1b14bfac..4c6685e84c8 100644 --- a/quickwit/quickwit-common/src/metrics.rs +++ b/quickwit/quickwit-common/src/metrics.rs @@ -247,7 +247,6 @@ mod tests { name: "maybe_registered_counter_test", description: "Maybe registered counter test.", subsystem: "", - observable: true, ); let counter = MaybeRegisteredCounter::registered(registered_counter.clone()); diff --git a/quickwit/quickwit-common/src/stream_utils.rs b/quickwit/quickwit-common/src/stream_utils.rs index c1fe28ccec7..40c99f38f7c 100644 --- a/quickwit/quickwit-common/src/stream_utils.rs +++ b/quickwit/quickwit-common/src/stream_utils.rs @@ -304,7 +304,6 @@ mod tests { name: "common", description: "help", subsystem: "test_tracked_service_stream_bounded", - observable: true, ) }); @@ -329,7 +328,6 @@ mod tests { name: "common", description: "help", subsystem: "test_tracked_service_stream_unbounded", - observable: true, ) }); diff --git a/quickwit/quickwit-indexing/src/metrics.rs b/quickwit/quickwit-indexing/src/metrics.rs index 699c2568e19..09eba98fec4 100644 --- a/quickwit/quickwit-indexing/src/metrics.rs +++ b/quickwit/quickwit-indexing/src/metrics.rs @@ -74,7 +74,6 @@ pub(crate) static ONGOING_MERGE_OPERATIONS: LazyLock = LazyLock::new(|| { name: "ongoing_merge_operations", description: "Number of ongoing merge operations", subsystem: "indexing", - observable: true, ) }); diff --git a/quickwit/quickwit-metrics-inventory/src/main.rs b/quickwit/quickwit-metrics-inventory/src/main.rs index 75c8edfd5f5..7322044afa7 100644 --- a/quickwit/quickwit-metrics-inventory/src/main.rs +++ b/quickwit/quickwit-metrics-inventory/src/main.rs @@ -60,9 +60,8 @@ fn main() { println!("{module}"); for (key, info) in metrics { println!( - " {key: = LazyLock::new(|| { name: "cache_dyn_parent_counter", description: "bench dynamic parent counter for cache benches", subsystem: "bench", - observable: true, "service" => "api" ) }); diff --git a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs index f651cc9d391..61e2b75bd50 100644 --- a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs +++ b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs @@ -593,7 +593,6 @@ static DYN_PARENT_COUNTER: LazyLock = LazyLock::new(|| { name: "dyn_parent_counter", description: "bench dynamic parent counter", subsystem: "bench", - observable: true, "service" => "api" ) }); @@ -603,7 +602,6 @@ static DYN_PARENT_GAUGE: LazyLock = LazyLock::new(|| { name: "dyn_parent_gauge", description: "bench dynamic parent gauge", subsystem: "bench", - observable: true, "service" => "api" ) }); @@ -673,7 +671,6 @@ static OBS_COUNTER: LazyLock = LazyLock::new(|| { name: "obs_counter", description: "bench observable counter", subsystem: "bench", - observable: true ) }); @@ -682,7 +679,6 @@ static OBS_GAUGE: LazyLock = LazyLock::new(|| { name: "obs_gauge", description: "bench observable gauge", subsystem: "bench", - observable: true ) }); diff --git a/quickwit/quickwit-metrics/src/counter.rs b/quickwit/quickwit-metrics/src/counter.rs index a35130ec953..edb04d06b79 100644 --- a/quickwit/quickwit-metrics/src/counter.rs +++ b/quickwit/quickwit-metrics/src/counter.rs @@ -63,15 +63,15 @@ pub fn __counter_get_or_register( /// Held behind an `Arc` so that all handles (`Counter` clones, thread-local /// caches, parent extensions with matching labels) point to the same data. struct CounterInner { - /// Static metadata (name, subsystem, description, observable flag). + /// Static metadata (name, subsystem, description). info: &'static MetricInfo, /// Full metric key: qualified name + all labels. key: metrics::Key, /// Recorder-provided counter handle for the actual recording backend. inner: metrics::Counter, - /// Shadow atomic for observable counters (`Some`), or `None` for - /// fire-and-forget counters where `get()` returns `u64::MAX`. - shadow: Option, + /// Shadow atomic that mirrors every mutation so `get()` can read + /// the current value without querying the recorder. + shadow: AtomicU64, /// Pre-computed cache key used for DashMap lookups, thread-local /// comparisons, and the `Hash` / `Eq` impls on `Counter`. hash: u64, @@ -84,16 +84,11 @@ impl CounterInner { key: metrics::Key, inner: metrics::Counter, ) -> Self { - let shadow = if info.observable { - Some(AtomicU64::new(0)) - } else { - None - }; Self { info, key, inner, - shadow, + shadow: AtomicU64::new(0), hash, } } @@ -107,10 +102,9 @@ impl CounterInner { /// Counters are **monotonically increasing** — use [`increment`](Self::increment) /// for deltas or [`absolute`](Self::absolute) to set a known total. /// -/// When declared with `observable: true`, the counter holds an -/// `AtomicU64` shadow inside its `Arc`. All clones share -/// the same atomic, so `get()` is always consistent. Non-observable -/// counters store `None` and `get()` returns `u64::MAX`. +/// Every counter maintains an `AtomicU64` shadow so that [`get()`](Self::get) +/// can read the current value without querying the recorder. All clones +/// share the same shadow via `Arc`. #[derive(Clone)] #[repr(transparent)] pub struct Counter(Arc); @@ -168,37 +162,22 @@ impl Counter { &self.0.key } - /// Adds `value` to the counter. - /// - /// If observable, also bumps the shadow `AtomicU64` so that - /// [`get()`](Self::get) reflects the update. + /// Adds `value` to the counter and its shadow atomic. pub fn increment(&self, value: u64) { - if let Some(s) = self.0.shadow.as_ref() { - s.fetch_add(value, Ordering::Relaxed); - } + self.0.shadow.fetch_add(value, Ordering::Relaxed); self.0.inner.increment(value); } /// Sets the counter to an absolute `value`, useful for process-level /// totals that are already tracked externally. - /// - /// If observable, also stores into the shadow `AtomicU64`. pub fn absolute(&self, value: u64) { - if let Some(s) = self.0.shadow.as_ref() { - s.store(value, Ordering::Relaxed); - } + self.0.shadow.store(value, Ordering::Relaxed); self.0.inner.absolute(value); } - /// Returns the current shadow counter value. - /// - /// Observable counters return the accumulated value. - /// Non-observable counters always return `u64::MAX`. + /// Returns the current counter value from the shadow atomic. pub fn get(&self) -> u64 { - match self.0.shadow.as_ref() { - Some(s) => s.load(Ordering::Relaxed), - None => u64::MAX, - } + self.0.shadow.load(Ordering::Relaxed) } } @@ -219,8 +198,8 @@ impl CounterFn for Counter { /// # Base declaration /// /// Creates a new counter with a static name, description, subsystem, and -/// optional static labels. By default counters are non-observable; add -/// `observable: true` to enable `get()` readback via a shadow `AtomicU64`. +/// optional static labels. Every counter maintains a shadow `AtomicU64` +/// so [`get()`](Counter::get) always returns the current value. /// /// ```ignore /// let c = counter!( @@ -230,18 +209,6 @@ impl CounterFn for Counter { /// "env" => "prod", /// ); /// c.increment(1); -/// ``` -/// -/// With `observable: true`, `get()` returns the current value: -/// -/// ```ignore -/// let c = counter!( -/// name: "requests_total", -/// description: "Total number of HTTP requests", -/// subsystem: "http", -/// observable: true, -/// ); -/// c.increment(1); /// assert_eq!(c.get(), 1); /// ``` /// @@ -256,41 +223,20 @@ impl CounterFn for Counter { /// ``` #[macro_export] macro_rules! counter { - // Base declaration without observable (defaults to false). - // Convenience shorthand that delegates to the full base arm below. - ( - name: $name:literal, - description: $description:literal, - subsystem: $subsystem:tt - $(, $label:literal => $value:literal)* $(,)? - ) => {{ - $crate::counter!( - name: $name, - description: $description, - subsystem: $subsystem, - observable: false - $(, $label => $value)* - ) - }}; - // Base declaration: all-static name, labels, and key — zero allocations. ( name: $name:literal, description: $description:literal, - subsystem: $subsystem:tt, - observable: $observable:expr + subsystem: $subsystem:tt $(, $label:literal => $value:literal)* $(,)? ) => {{ - // Expand compile-time statics: KEY_NAME, INFO, KEY, LABELS, METADATA. $crate::__key_info_metadata!( kind: $crate::MetricKind::Counter, - observable: $observable, name: $name, description: $description, subsystem: $subsystem $(, $label => $value)* ); - // Thread-local cache + global DashMap registration. $crate::__metric_declaration!( metric_type: $crate::Counter, register_fn: $crate::__counter_get_or_register, diff --git a/quickwit/quickwit-metrics/src/gauge.rs b/quickwit/quickwit-metrics/src/gauge.rs index edb4f5399bb..1d66ee36f63 100644 --- a/quickwit/quickwit-metrics/src/gauge.rs +++ b/quickwit/quickwit-metrics/src/gauge.rs @@ -63,15 +63,15 @@ pub fn __gauge_get_or_register( /// Held behind an `Arc` so that all handles (`Gauge` clones, thread-local /// caches, parent extensions with matching labels) point to the same data. struct GaugeInner { - /// Static metadata (name, subsystem, description, observable flag). + /// Static metadata (name, subsystem, description). info: &'static MetricInfo, /// Full metric key: qualified name + all labels. key: metrics::Key, /// Recorder-provided gauge handle for the actual recording backend. inner: metrics::Gauge, - /// Shadow atomic for observable gauges (`Some`), or `None` for - /// fire-and-forget gauges where `get()` returns `f64::NAN`. - shadow: Option, + /// Shadow atomic that mirrors every mutation so `get()` can read + /// the current value without querying the recorder. + shadow: AtomicF64, /// Pre-computed cache key used for DashMap lookups, thread-local /// comparisons, and the `Hash` / `Eq` impls on `Gauge`. hash: u64, @@ -79,16 +79,11 @@ struct GaugeInner { impl GaugeInner { fn new(hash: u64, info: &'static MetricInfo, key: metrics::Key, inner: metrics::Gauge) -> Self { - let shadow = if info.observable { - Some(AtomicF64::new(0.0)) - } else { - None - }; Self { info, key, inner, - shadow, + shadow: AtomicF64::new(0.0), hash, } } @@ -102,10 +97,9 @@ impl GaugeInner { /// Unlike counters, gauges can go **up and down** — they represent a /// point-in-time value such as active connections or queue depth. /// -/// When declared with `observable: true`, the gauge holds an -/// `AtomicF64` shadow inside its `Arc`. All clones share -/// the same atomic, so `get()` is always consistent. Non-observable -/// gauges store `None` and `get()` returns `f64::NAN`. +/// Every gauge maintains an `AtomicF64` shadow so that [`get()`](Self::get) +/// can read the current value without querying the recorder. All clones +/// share the same shadow via `Arc`. #[derive(Clone)] #[repr(transparent)] pub struct Gauge(Arc); @@ -163,48 +157,27 @@ impl Gauge { &self.0.key } - /// Adds `value` to the current gauge reading. - /// - /// If observable, also bumps the shadow `AtomicF64` so that - /// [`get()`](Self::get) reflects the update. + /// Adds `value` to the current gauge reading and its shadow atomic. pub fn increment(&self, value: f64) { - if let Some(s) = self.0.shadow.as_ref() { - s.fetch_add(value, Ordering::Relaxed); - } + self.0.shadow.fetch_add(value, Ordering::Relaxed); self.0.inner.increment(value); } - /// Subtracts `value` from the current gauge reading. - /// - /// If observable, also decrements the shadow `AtomicF64` so that - /// [`get()`](Self::get) reflects the update. + /// Subtracts `value` from the current gauge reading and its shadow atomic. pub fn decrement(&self, value: f64) { - if let Some(s) = self.0.shadow.as_ref() { - s.fetch_sub(value, Ordering::Relaxed); - } + self.0.shadow.fetch_sub(value, Ordering::Relaxed); self.0.inner.decrement(value); } - /// Replaces the current gauge reading with `value`. - /// - /// If observable, also stores into the shadow `AtomicF64` so that - /// [`get()`](Self::get) reflects the update. + /// Replaces the current gauge reading and its shadow atomic with `value`. pub fn set(&self, value: f64) { - if let Some(s) = self.0.shadow.as_ref() { - s.store(value, Ordering::Relaxed); - } + self.0.shadow.store(value, Ordering::Relaxed); self.0.inner.set(value); } - /// Returns the current shadow gauge value. - /// - /// Observable gauges return the tracked value. - /// Non-observable gauges always return `f64::NAN`. + /// Returns the current gauge value from the shadow atomic. pub fn get(&self) -> f64 { - match self.0.shadow.as_ref() { - Some(s) => s.load(Ordering::Relaxed), - None => f64::NAN, - } + self.0.shadow.load(Ordering::Relaxed) } } @@ -276,8 +249,8 @@ impl Drop for GaugeGuard { /// # Base declaration /// /// Creates a new gauge with a static name, description, subsystem, and -/// optional static labels. By default gauges are non-observable; add -/// `observable: true` to enable `get()` readback via a shadow `AtomicF64`. +/// optional static labels. Every gauge maintains a shadow `AtomicF64` +/// so [`get()`](Gauge::get) always returns the current value. /// /// ```ignore /// let g = gauge!( @@ -301,41 +274,20 @@ impl Drop for GaugeGuard { /// ``` #[macro_export] macro_rules! gauge { - // Base declaration without observable (defaults to false). - // Convenience shorthand that delegates to the full base arm below. - ( - name: $name:literal, - description: $description:literal, - subsystem: $subsystem:tt - $(, $label:literal => $value:literal)* $(,)? - ) => {{ - $crate::gauge!( - name: $name, - description: $description, - subsystem: $subsystem, - observable: false - $(, $label => $value)* - ) - }}; - // Base declaration: all-static name, labels, and key — zero allocations. ( name: $name:literal, description: $description:literal, - subsystem: $subsystem:tt, - observable: $observable:expr + subsystem: $subsystem:tt $(, $label:literal => $value:literal)* $(,)? ) => {{ - // Expand compile-time statics: KEY_NAME, INFO, KEY, LABELS, METADATA. $crate::__key_info_metadata!( kind: $crate::MetricKind::Gauge, - observable: $observable, name: $name, description: $description, subsystem: $subsystem $(, $label => $value)* ); - // Thread-local cache + global DashMap registration. $crate::__metric_declaration!( metric_type: $crate::Gauge, register_fn: $crate::__gauge_get_or_register, diff --git a/quickwit/quickwit-metrics/src/histogram.rs b/quickwit/quickwit-metrics/src/histogram.rs index 4f7fd13ec2b..2a99f3bc9d6 100644 --- a/quickwit/quickwit-metrics/src/histogram.rs +++ b/quickwit/quickwit-metrics/src/histogram.rs @@ -266,7 +266,6 @@ macro_rules! histogram { // Expand compile-time statics: KEY_NAME, INFO, KEY, LABELS, METADATA. $crate::__key_info_metadata!( kind: $crate::MetricKind::Histogram, - observable: false, name: $name, description: $description, subsystem: $subsystem diff --git a/quickwit/quickwit-metrics/src/inner.rs b/quickwit/quickwit-metrics/src/inner.rs index a269e8c3ec7..5db979403a9 100644 --- a/quickwit/quickwit-metrics/src/inner.rs +++ b/quickwit/quickwit-metrics/src/inner.rs @@ -73,7 +73,6 @@ macro_rules! __metadata { macro_rules! __key_info_metadata { ( kind: $kind:expr, - observable: $observable:expr, name: $name:literal, description: $description:literal, subsystem: $subsystem:tt @@ -85,7 +84,6 @@ macro_rules! __key_info_metadata { key_name: KEY_NAME, description: $description, kind: $kind, - observable: $observable, metadata: &METADATA, static_labels: &[$(($label, $value)),*], }; diff --git a/quickwit/quickwit-metrics/src/lib.rs b/quickwit/quickwit-metrics/src/lib.rs index 87844890988..6dcc22b1517 100644 --- a/quickwit/quickwit-metrics/src/lib.rs +++ b/quickwit/quickwit-metrics/src/lib.rs @@ -145,11 +145,9 @@ //! //! ### 6. Observable metrics //! -//! All counters and gauges expose a `get()` method. By default -//! (non-observable), `get()` returns a sentinel (`u64::MAX` for counters, -//! `f64::NAN` for gauges). When you need to read the actual metric value -//! in production — for back-pressure, health checks, or conditional -//! logging — add `observable: true`: +//! All counters and gauges expose a `get()` method that returns the +//! current metric value — useful for back-pressure, health checks, or +//! conditional logging: //! //! ```rust,ignore //! static PENDING: LazyLock = LazyLock::new(|| { @@ -157,7 +155,6 @@ //! name: "pending_bytes", //! description: "Bytes waiting to be flushed", //! subsystem: "indexer", -//! observable: true, //! ) //! }); //! @@ -166,8 +163,7 @@ //! ``` //! //! `get()` always returns a value directly (`u64` for counters, `f64` for -//! gauges). Non-observable metrics return `u64::MAX` / `f64::NAN` and -//! skip the shadow entirely. +//! gauges). //! //! Under the hood, observable state lives inside the shared //! `Arc<…Inner>` (e.g. `Arc`, `Arc`). All @@ -331,8 +327,6 @@ pub struct MetricInfo { pub description: &'static str, /// Which `describe_*` method to call for this metric. pub kind: MetricKind, - /// Whether this metric was declared with `observable: true`. - pub observable: bool, /// Recorder metadata capturing the subsystem (target), verbosity level, /// and module path where the metric was declared. pub metadata: &'static metrics::Metadata<'static>, diff --git a/quickwit/quickwit-metrics/tests/counter.rs b/quickwit/quickwit-metrics/tests/counter.rs index ea576812f53..2a772f4a841 100644 --- a/quickwit/quickwit-metrics/tests/counter.rs +++ b/quickwit/quickwit-metrics/tests/counter.rs @@ -156,7 +156,6 @@ fn observable_get_matches_recorder() { name: "oc_get", description: "observable counter", subsystem: "test", - observable: true ); c.increment(3); c.increment(7); @@ -177,7 +176,6 @@ fn observable_absolute_matches_recorder() { name: "oc_abs", description: "observable absolute counter", subsystem: "test", - observable: true ); c.absolute(42); c @@ -188,19 +186,6 @@ fn observable_absolute_matches_recorder() { assert_eq!(snap[0].3, DebugValue::Counter(42)); } -#[test] -fn non_observable_returns_max() { - with_recorder(|| { - let c = counter!( - name: "noc_get", - description: "non-observable counter", - subsystem: "test", - ); - c.increment(5); - assert_eq!(c.get(), u64::MAX); - }); -} - #[test] fn observable_parent_children_share_shadow() { with_recorder(|| { @@ -208,7 +193,6 @@ fn observable_parent_children_share_shadow() { name: "oc_shared", description: "shared shadow counter", subsystem: "test", - observable: true ); let child_a = counter!(parent: parent, "region" => "us-east"); let child_b = counter!(parent: parent, "region" => "us-east"); @@ -228,7 +212,6 @@ fn observable_parent_distinct_labels_separate_shadow() { name: "oc_distinct", description: "distinct shadow counter", subsystem: "test", - observable: true ); let child_a = counter!(parent: parent, "region" => "us-east"); let child_b = counter!(parent: parent, "region" => "eu-west"); diff --git a/quickwit/quickwit-metrics/tests/gauge.rs b/quickwit/quickwit-metrics/tests/gauge.rs index f5645f53e69..c365368a3b8 100644 --- a/quickwit/quickwit-metrics/tests/gauge.rs +++ b/quickwit/quickwit-metrics/tests/gauge.rs @@ -176,7 +176,6 @@ fn observable_set_matches_recorder() { name: "og_set", description: "observable gauge", subsystem: "test", - observable: true ); g.set(42.0); g @@ -196,7 +195,6 @@ fn observable_inc_dec_matches_recorder() { name: "og_inc_dec", description: "observable inc/dec gauge", subsystem: "test", - observable: true ); g.set(10.0); g.increment(5.0); @@ -218,7 +216,6 @@ fn observable_guard_matches_recorder() { name: "og_guard", description: "observable gauge with guard", subsystem: "test", - observable: true ); g.set(0.0); { @@ -234,19 +231,6 @@ fn observable_guard_matches_recorder() { assert_eq!(snap[0].3, DebugValue::Gauge(0.0.into())); } -#[test] -fn non_observable_returns_nan() { - with_recorder(|| { - let g = gauge!( - name: "nog_get", - description: "non-observable gauge", - subsystem: "test", - ); - g.set(99.0); - assert!(g.get().is_nan()); - }); -} - #[test] fn observable_parent_children_share_shadow() { with_recorder(|| { @@ -254,7 +238,6 @@ fn observable_parent_children_share_shadow() { name: "og_shared", description: "shared shadow gauge", subsystem: "test", - observable: true ); let child_a = gauge!(parent: parent, "region" => "us-east"); let child_b = gauge!(parent: parent, "region" => "us-east"); diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 2dc1bb5b988..761da4e27d3 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -156,7 +156,6 @@ static SPLIT_SEARCH_OUTCOME: LazyLock = LazyLock::new(|| { name: "split_search_outcome", description: "Count the state in which each leaf search split ended", subsystem: "search", - observable: true, ) }); diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs index 4ef6e370ee9..ac0bf6b32d5 100644 --- a/quickwit/quickwit-storage/src/metrics.rs +++ b/quickwit/quickwit-storage/src/metrics.rs @@ -264,7 +264,6 @@ static CACHE_IN_CACHE_COUNT: LazyLock = LazyLock::new(|| { name: "in_cache_count", description: "Count of in cache by component", subsystem: "cache", - observable: true, ) }); @@ -273,7 +272,6 @@ static CACHE_IN_CACHE_NUM_BYTES: LazyLock = LazyLock::new(|| { name: "in_cache_num_bytes", description: "Number of bytes in cache by component", subsystem: "cache", - observable: true, ) }); @@ -282,7 +280,6 @@ static CACHE_HITS_TOTAL: LazyLock = LazyLock::new(|| { name: "cache_hits_total", description: "Number of cache hits by component", subsystem: "cache", - observable: true, ) }); @@ -291,7 +288,6 @@ static CACHE_HITS_BYTES: LazyLock = LazyLock::new(|| { name: "cache_hits_bytes", description: "Number of cache hits in bytes by component", subsystem: "cache", - observable: true, ) }); @@ -300,7 +296,6 @@ static CACHE_MISSES_TOTAL: LazyLock = LazyLock::new(|| { name: "cache_misses_total", description: "Number of cache misses by component", subsystem: "cache", - observable: true, ) }); @@ -309,7 +304,6 @@ static CACHE_EVICT_TOTAL: LazyLock = LazyLock::new(|| { name: "cache_evict_total", description: "Number of cache entry evicted by component", subsystem: "cache", - observable: true, ) }); @@ -318,7 +312,6 @@ static CACHE_EVICT_BYTES: LazyLock = LazyLock::new(|| { name: "cache_evict_bytes", description: "Number of cache entry evicted in bytes by component", subsystem: "cache", - observable: true, ) }); @@ -327,7 +320,6 @@ static VIRTUAL_CACHE_IN_CACHE_COUNT: LazyLock = LazyLock::new(|| { name: "virtual_in_cache_count", description: "Count of in cache by component", subsystem: "cache", - observable: true, ) }); @@ -336,7 +328,6 @@ static VIRTUAL_CACHE_IN_CACHE_NUM_BYTES: LazyLock = LazyLock::new(|| { name: "virtual_in_cache_num_bytes", description: "Number of bytes in cache by component", subsystem: "cache", - observable: true, ) }); @@ -345,7 +336,6 @@ static VIRTUAL_CACHE_HITS_TOTAL: LazyLock = LazyLock::new(|| { name: "virtual_cache_hits_total", description: "Number of cache hits by component", subsystem: "cache", - observable: true, ) }); @@ -354,7 +344,6 @@ static VIRTUAL_CACHE_HITS_BYTES: LazyLock = LazyLock::new(|| { name: "virtual_cache_hits_bytes", description: "Number of cache hits in bytes by component", subsystem: "cache", - observable: true, ) }); @@ -363,7 +352,6 @@ static VIRTUAL_CACHE_MISSES_TOTAL: LazyLock = LazyLock::new(|| { name: "virtual_cache_misses_total", description: "Number of cache misses by component", subsystem: "cache", - observable: true, ) }); @@ -372,7 +360,6 @@ static VIRTUAL_CACHE_EVICT_TOTAL: LazyLock = LazyLock::new(|| { name: "virtual_cache_evict_total", description: "Number of cache entry evicted by component", subsystem: "cache", - observable: true, ) }); @@ -381,7 +368,6 @@ static VIRTUAL_CACHE_EVICT_BYTES: LazyLock = LazyLock::new(|| { name: "virtual_cache_evict_bytes", description: "Number of cache entry evicted in bytes by component", subsystem: "cache", - observable: true, ) }); From f4298b5990540fb3979f2f2e0aa3cb65dc8892a9 Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Mon, 4 May 2026 15:10:45 +0200 Subject: [PATCH 26/54] Remove redundant metric parent reborrows --- quickwit/quickwit-common/src/io.rs | 2 +- quickwit/quickwit-common/src/metrics.rs | 2 +- quickwit/quickwit-common/src/runtimes.rs | 8 ++-- quickwit/quickwit-common/src/thread_pool.rs | 4 +- quickwit/quickwit-common/src/tower/metrics.rs | 6 +-- .../quickwit-control-plane/src/metrics.rs | 8 ++-- .../quickwit-ingest/src/ingest_v2/metrics.rs | 34 +++++++------- quickwit/quickwit-search/src/metrics.rs | 12 ++--- quickwit/quickwit-storage/src/metrics.rs | 44 +++++++++---------- 9 files changed, 60 insertions(+), 60 deletions(-) diff --git a/quickwit/quickwit-common/src/io.rs b/quickwit/quickwit-common/src/io.rs index 5c8b0e07a5e..8509ee96b5b 100644 --- a/quickwit/quickwit-common/src/io.rs +++ b/quickwit/quickwit-common/src/io.rs @@ -111,7 +111,7 @@ impl IoControls { pub fn set_component(mut self, component: &str) -> Self { let labels = COMPONENT_LABELS.with_values([component.to_string()]); self.bytes_counter = - MaybeRegisteredCounter::registered(counter!(parent: &*WRITE_BYTES, labels: &labels)); + MaybeRegisteredCounter::registered(counter!(parent: WRITE_BYTES, labels: &labels)); self } diff --git a/quickwit/quickwit-common/src/metrics.rs b/quickwit/quickwit-common/src/metrics.rs index 4c6685e84c8..7c932843c02 100644 --- a/quickwit/quickwit-common/src/metrics.rs +++ b/quickwit/quickwit-common/src/metrics.rs @@ -217,7 +217,7 @@ pub static IN_FLIGHT_OTHER_SOURCE: LazyLock = fn in_flight_data_gauge(component: &'static str) -> Gauge { let labels = COMPONENT_LABELS.with_values([component]); - gauge!(parent: &*IN_FLIGHT_DATA_BYTES, labels: &labels) + gauge!(parent: IN_FLIGHT_DATA_BYTES, labels: &labels) } #[cfg(test)] diff --git a/quickwit/quickwit-common/src/runtimes.rs b/quickwit/quickwit-common/src/runtimes.rs index 21f0ae9015c..600f7e2f8a5 100644 --- a/quickwit/quickwit-common/src/runtimes.rs +++ b/quickwit/quickwit-common/src/runtimes.rs @@ -218,13 +218,13 @@ impl RuntimeMetricsRecorder { pub fn new(label: &'static str) -> Self { let labels = RUNTIME_TYPE_LABELS.with_values([label]); Self { - scheduled_tasks: gauge!(parent: &*TOKIO_SCHEDULED_TASKS, labels: &labels), + scheduled_tasks: gauge!(parent: TOKIO_SCHEDULED_TASKS, labels: &labels), worker_busy_duration_milliseconds_total: counter!( - parent: &*TOKIO_WORKER_BUSY_DURATION_MILLISECONDS_TOTAL, + parent: TOKIO_WORKER_BUSY_DURATION_MILLISECONDS_TOTAL, labels: &labels, ), - worker_busy_ratio: gauge!(parent: &*TOKIO_WORKER_BUSY_RATIO, labels: &labels), - worker_threads: gauge!(parent: &*TOKIO_WORKER_THREADS, labels: &labels), + worker_busy_ratio: gauge!(parent: TOKIO_WORKER_BUSY_RATIO, labels: &labels), + worker_threads: gauge!(parent: TOKIO_WORKER_THREADS, labels: &labels), } } diff --git a/quickwit/quickwit-common/src/thread_pool.rs b/quickwit/quickwit-common/src/thread_pool.rs index 4d81007c71a..00ef5dea3ad 100644 --- a/quickwit/quickwit-common/src/thread_pool.rs +++ b/quickwit/quickwit-common/src/thread_pool.rs @@ -63,8 +63,8 @@ impl ThreadPool { .build() .expect("failed to spawn thread pool"); let labels = THREAD_POOL_LABELS.with_values([name]); - let ongoing_tasks = gauge!(parent: &*THREAD_POOL_ONGOING_TASKS, labels: &labels); - let pending_tasks = gauge!(parent: &*THREAD_POOL_PENDING_TASKS, labels: &labels); + let ongoing_tasks = gauge!(parent: THREAD_POOL_ONGOING_TASKS, labels: &labels); + let pending_tasks = gauge!(parent: THREAD_POOL_PENDING_TASKS, labels: &labels); ThreadPool { thread_pool: Arc::new(thread_pool), ongoing_tasks, diff --git a/quickwit/quickwit-common/src/tower/metrics.rs b/quickwit/quickwit-common/src/tower/metrics.rs index a66676e23a0..caf8353d7f0 100644 --- a/quickwit/quickwit-common/src/tower/metrics.rs +++ b/quickwit/quickwit-common/src/tower/metrics.rs @@ -109,10 +109,10 @@ impl GrpcMetricsLayer { pub fn new(subsystem: &'static str, kind: &'static str) -> Self { let labels = GRPC_SERVICE_LABELS.with_values([subsystem, kind]); Self { - requests_total: counter!(parent: &*GRPC_REQUESTS_TOTAL, labels: &labels), - requests_in_flight: gauge!(parent: &*GRPC_REQUESTS_IN_FLIGHT, labels: &labels), + requests_total: counter!(parent: GRPC_REQUESTS_TOTAL, labels: &labels), + requests_in_flight: gauge!(parent: GRPC_REQUESTS_IN_FLIGHT, labels: &labels), request_duration_seconds: histogram!( - parent: &*GRPC_REQUEST_DURATION_SECONDS, + parent: GRPC_REQUEST_DURATION_SECONDS, labels: &labels, ), } diff --git a/quickwit/quickwit-control-plane/src/metrics.rs b/quickwit/quickwit-control-plane/src/metrics.rs index 0d3b4df5b49..4875f2091a7 100644 --- a/quickwit/quickwit-control-plane/src/metrics.rs +++ b/quickwit/quickwit-control-plane/src/metrics.rs @@ -46,10 +46,10 @@ static SHARDS: LazyLock = LazyLock::new(|| { }); pub(crate) static OPEN_SHARDS: LazyLock = - LazyLock::new(|| gauge!(parent: &*SHARDS, "state" => "open")); + LazyLock::new(|| gauge!(parent: SHARDS, "state" => "open")); pub(crate) static CLOSED_SHARDS: LazyLock = - LazyLock::new(|| gauge!(parent: &*SHARDS, "state" => "closed")); + LazyLock::new(|| gauge!(parent: SHARDS, "state" => "closed")); pub(crate) const INDEX_ID_LABELS: Labels<1> = Labels::new(["index_id"]); @@ -62,10 +62,10 @@ static INDEXED_SHARDS: LazyLock = LazyLock::new(|| { }); pub(crate) static LOCAL_SHARDS: LazyLock = - LazyLock::new(|| gauge!(parent: &*INDEXED_SHARDS, "locality" => "local")); + LazyLock::new(|| gauge!(parent: INDEXED_SHARDS, "locality" => "local")); pub(crate) static REMOTE_SHARDS: LazyLock = - LazyLock::new(|| gauge!(parent: &*INDEXED_SHARDS, "locality" => "remote")); + LazyLock::new(|| gauge!(parent: INDEXED_SHARDS, "locality" => "remote")); pub(crate) static APPLY_PLAN_TOTAL: LazyLock = LazyLock::new(|| { counter!( diff --git a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs index 4841f835e0a..50b8504eb81 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs @@ -27,49 +27,49 @@ static INGEST_RESULT_TOTAL: LazyLock = LazyLock::new(|| { }); pub(super) static INGEST_RESULT_SUCCESS: LazyLock = - LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "success")); + LazyLock::new(|| counter!(parent: INGEST_RESULT_TOTAL, "result" => "success")); pub(super) static INGEST_RESULT_CIRCUIT_BREAKER: LazyLock = - LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "circuit_breaker")); + LazyLock::new(|| counter!(parent: INGEST_RESULT_TOTAL, "result" => "circuit_breaker")); pub(super) static INGEST_RESULT_UNSPECIFIED: LazyLock = - LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "unspecified")); + LazyLock::new(|| counter!(parent: INGEST_RESULT_TOTAL, "result" => "unspecified")); pub(super) static INGEST_RESULT_INDEX_NOT_FOUND: LazyLock = - LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "index_not_found")); + LazyLock::new(|| counter!(parent: INGEST_RESULT_TOTAL, "result" => "index_not_found")); pub(super) static INGEST_RESULT_SOURCE_NOT_FOUND: LazyLock = - LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "source_not_found")); + LazyLock::new(|| counter!(parent: INGEST_RESULT_TOTAL, "result" => "source_not_found")); pub(super) static INGEST_RESULT_INTERNAL: LazyLock = - LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "internal")); + LazyLock::new(|| counter!(parent: INGEST_RESULT_TOTAL, "result" => "internal")); pub(super) static INGEST_RESULT_NO_SHARDS_AVAILABLE: LazyLock = - LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "no_shards_available")); + LazyLock::new(|| counter!(parent: INGEST_RESULT_TOTAL, "result" => "no_shards_available")); pub(super) static INGEST_RESULT_SHARD_RATE_LIMITED: LazyLock = - LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "shard_rate_limited")); + LazyLock::new(|| counter!(parent: INGEST_RESULT_TOTAL, "result" => "shard_rate_limited")); pub(super) static INGEST_RESULT_WAL_FULL: LazyLock = - LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "wal_full")); + LazyLock::new(|| counter!(parent: INGEST_RESULT_TOTAL, "result" => "wal_full")); pub(super) static INGEST_RESULT_TIMEOUT: LazyLock = - LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "timeout")); + LazyLock::new(|| counter!(parent: INGEST_RESULT_TOTAL, "result" => "timeout")); pub(super) static INGEST_RESULT_ROUTER_TIMEOUT: LazyLock = - LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "router_timeout")); + LazyLock::new(|| counter!(parent: INGEST_RESULT_TOTAL, "result" => "router_timeout")); pub(super) static INGEST_RESULT_ROUTER_LOAD_SHEDDING: LazyLock = - LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "router_load_shedding")); + LazyLock::new(|| counter!(parent: INGEST_RESULT_TOTAL, "result" => "router_load_shedding")); pub(super) static INGEST_RESULT_LOAD_SHEDDING: LazyLock = - LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "load_shedding")); + LazyLock::new(|| counter!(parent: INGEST_RESULT_TOTAL, "result" => "load_shedding")); pub(super) static INGEST_RESULT_SHARD_NOT_FOUND: LazyLock = - LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "shard_not_found")); + LazyLock::new(|| counter!(parent: INGEST_RESULT_TOTAL, "result" => "shard_not_found")); pub(super) static INGEST_RESULT_UNAVAILABLE: LazyLock = - LazyLock::new(|| counter!(parent: &*INGEST_RESULT_TOTAL, "result" => "unavailable")); + LazyLock::new(|| counter!(parent: INGEST_RESULT_TOTAL, "result" => "unavailable")); pub(super) static INGEST_ATTEMPTS: LazyLock = LazyLock::new(|| { counter!( @@ -98,10 +98,10 @@ static SHARDS: LazyLock = LazyLock::new(|| { }); pub(super) static OPEN_SHARDS: LazyLock = - LazyLock::new(|| gauge!(parent: &*SHARDS, "state" => "open")); + LazyLock::new(|| gauge!(parent: SHARDS, "state" => "open")); pub(super) static CLOSED_SHARDS: LazyLock = - LazyLock::new(|| gauge!(parent: &*SHARDS, "state" => "closed")); + LazyLock::new(|| gauge!(parent: SHARDS, "state" => "closed")); pub(super) static SHARD_LT_THROUGHPUT_MIB: LazyLock = LazyLock::new(|| { histogram!( diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 761da4e27d3..7a63db2d038 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -171,10 +171,10 @@ static LEAF_SEARCH_SINGLE_SPLIT_TASKS_BASE: LazyLock = LazyLock::new(|| { }); pub(crate) static LEAF_SEARCH_SINGLE_SPLIT_TASKS_ONGOING: LazyLock = - LazyLock::new(|| gauge!(parent: &*LEAF_SEARCH_SINGLE_SPLIT_TASKS_BASE, "status" => "ongoing")); + LazyLock::new(|| gauge!(parent: LEAF_SEARCH_SINGLE_SPLIT_TASKS_BASE, "status" => "ongoing")); pub(crate) static LEAF_SEARCH_SINGLE_SPLIT_TASKS_PENDING: LazyLock = - LazyLock::new(|| gauge!(parent: &*LEAF_SEARCH_SINGLE_SPLIT_TASKS_BASE, "status" => "pending")); + LazyLock::new(|| gauge!(parent: LEAF_SEARCH_SINGLE_SPLIT_TASKS_BASE, "status" => "pending")); static ROOT_SEARCH_REQUESTS_TOTAL_BASE: LazyLock = LazyLock::new(|| { counter!( @@ -185,7 +185,7 @@ static ROOT_SEARCH_REQUESTS_TOTAL_BASE: LazyLock = LazyLock::new(|| { }); pub(crate) static ROOT_SEARCH_REQUESTS_TOTAL: LazyLock = - LazyLock::new(|| counter!(parent: &*ROOT_SEARCH_REQUESTS_TOTAL_BASE, "kind" => "server")); + LazyLock::new(|| counter!(parent: ROOT_SEARCH_REQUESTS_TOTAL_BASE, "kind" => "server")); static ROOT_SEARCH_REQUEST_DURATION_SECONDS_BASE: LazyLock = LazyLock::new(|| { histogram!( @@ -198,7 +198,7 @@ static ROOT_SEARCH_REQUEST_DURATION_SECONDS_BASE: LazyLock = LazyLock pub(crate) static ROOT_SEARCH_REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { histogram!( - parent: &*ROOT_SEARCH_REQUEST_DURATION_SECONDS_BASE, + parent: ROOT_SEARCH_REQUEST_DURATION_SECONDS_BASE, "kind" => "server", ) }); @@ -221,7 +221,7 @@ static LEAF_SEARCH_REQUESTS_TOTAL_BASE: LazyLock = LazyLock::new(|| { }); pub(crate) static LEAF_SEARCH_REQUESTS_TOTAL: LazyLock = - LazyLock::new(|| counter!(parent: &*LEAF_SEARCH_REQUESTS_TOTAL_BASE, "kind" => "server")); + LazyLock::new(|| counter!(parent: LEAF_SEARCH_REQUESTS_TOTAL_BASE, "kind" => "server")); static LEAF_SEARCH_REQUEST_DURATION_SECONDS_BASE: LazyLock = LazyLock::new(|| { histogram!( @@ -234,7 +234,7 @@ static LEAF_SEARCH_REQUEST_DURATION_SECONDS_BASE: LazyLock = LazyLock pub(crate) static LEAF_SEARCH_REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { histogram!( - parent: &*LEAF_SEARCH_REQUEST_DURATION_SECONDS_BASE, + parent: LEAF_SEARCH_REQUEST_DURATION_SECONDS_BASE, "kind" => "server", ) }); diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs index ac0bf6b32d5..3173b6f80c8 100644 --- a/quickwit/quickwit-storage/src/metrics.rs +++ b/quickwit/quickwit-storage/src/metrics.rs @@ -31,14 +31,14 @@ static GET_SLICE_TIMEOUT_OUTCOME_TOTAL: LazyLock = LazyLock::new(|| { pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_0_TIMEOUT: LazyLock = LazyLock::new(|| { counter!( - parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, + parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, "outcome" => "success_after_0_timeout", ) }); pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_1_TIMEOUT: LazyLock = LazyLock::new(|| { counter!( - parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, + parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, "outcome" => "success_after_1_timeout", ) }); @@ -46,14 +46,14 @@ pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_1_TIMEOUT: LazyLock = LazyLo pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_2_PLUS_TIMEOUT: LazyLock = LazyLock::new(|| { counter!( - parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, + parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, "outcome" => "success_after_2+_timeout", ) }); pub static GET_SLICE_TIMEOUT_ALL_TIMEOUTS: LazyLock = LazyLock::new(|| { counter!( - parent: &*GET_SLICE_TIMEOUT_OUTCOME_TOTAL, + parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, "outcome" => "all_timeouts", ) }); @@ -77,28 +77,28 @@ static OBJECT_STORAGE_REQUEST_DURATION: LazyLock = LazyLock::new(|| { pub static OBJECT_STORAGE_DELETE_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( - parent: &*OBJECT_STORAGE_REQUESTS_TOTAL, + parent: OBJECT_STORAGE_REQUESTS_TOTAL, "action" => "delete_object", ) }); pub static OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( - parent: &*OBJECT_STORAGE_REQUESTS_TOTAL, + parent: OBJECT_STORAGE_REQUESTS_TOTAL, "action" => "delete_objects", ) }); pub static OBJECT_STORAGE_DELETE_REQUEST_DURATION: LazyLock = LazyLock::new(|| { histogram!( - parent: &*OBJECT_STORAGE_REQUEST_DURATION, + parent: OBJECT_STORAGE_REQUEST_DURATION, "action" => "delete_object", ) }); pub static OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION: LazyLock = LazyLock::new(|| { histogram!( - parent: &*OBJECT_STORAGE_REQUEST_DURATION, + parent: OBJECT_STORAGE_REQUEST_DURATION, "action" => "delete_objects", ) }); @@ -194,13 +194,13 @@ impl CacheMetrics { CacheMetrics { component_name, cache_metrics: SingleCacheMetrics { - in_cache_count: gauge!(parent: &*CACHE_IN_CACHE_COUNT, labels: &labels), - in_cache_num_bytes: gauge!(parent: &*CACHE_IN_CACHE_NUM_BYTES, labels: &labels), - hits_num_items: counter!(parent: &*CACHE_HITS_TOTAL, labels: &labels), - hits_num_bytes: counter!(parent: &*CACHE_HITS_BYTES, labels: &labels), - misses_num_items: counter!(parent: &*CACHE_MISSES_TOTAL, labels: &labels), - evict_num_items: counter!(parent: &*CACHE_EVICT_TOTAL, labels: &labels), - evict_num_bytes: counter!(parent: &*CACHE_EVICT_BYTES, labels: &labels), + in_cache_count: gauge!(parent: CACHE_IN_CACHE_COUNT, labels: &labels), + in_cache_num_bytes: gauge!(parent: CACHE_IN_CACHE_NUM_BYTES, labels: &labels), + hits_num_items: counter!(parent: CACHE_HITS_TOTAL, labels: &labels), + hits_num_bytes: counter!(parent: CACHE_HITS_BYTES, labels: &labels), + misses_num_items: counter!(parent: CACHE_MISSES_TOTAL, labels: &labels), + evict_num_items: counter!(parent: CACHE_EVICT_TOTAL, labels: &labels), + evict_num_bytes: counter!(parent: CACHE_EVICT_BYTES, labels: &labels), }, virtual_caches_metrics: RwLock::default(), } @@ -218,31 +218,31 @@ impl CacheMetrics { VIRTUAL_CACHE_LABELS.with_values([self.component_name.clone(), capacity, policy]); let new_virtual_cache_metrics = SingleCacheMetrics { in_cache_count: gauge!( - parent: &*VIRTUAL_CACHE_IN_CACHE_COUNT, + parent: VIRTUAL_CACHE_IN_CACHE_COUNT, labels: &labels, ), in_cache_num_bytes: gauge!( - parent: &*VIRTUAL_CACHE_IN_CACHE_NUM_BYTES, + parent: VIRTUAL_CACHE_IN_CACHE_NUM_BYTES, labels: &labels, ), hits_num_items: counter!( - parent: &*VIRTUAL_CACHE_HITS_TOTAL, + parent: VIRTUAL_CACHE_HITS_TOTAL, labels: &labels, ), hits_num_bytes: counter!( - parent: &*VIRTUAL_CACHE_HITS_BYTES, + parent: VIRTUAL_CACHE_HITS_BYTES, labels: &labels, ), misses_num_items: counter!( - parent: &*VIRTUAL_CACHE_MISSES_TOTAL, + parent: VIRTUAL_CACHE_MISSES_TOTAL, labels: &labels, ), evict_num_items: counter!( - parent: &*VIRTUAL_CACHE_EVICT_TOTAL, + parent: VIRTUAL_CACHE_EVICT_TOTAL, labels: &labels, ), evict_num_bytes: counter!( - parent: &*VIRTUAL_CACHE_EVICT_BYTES, + parent: VIRTUAL_CACHE_EVICT_BYTES, labels: &labels, ), }; From 7143ffd05ded34fc1dd90421f63eee6f3b23dc5e Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Mon, 4 May 2026 17:36:38 +0200 Subject: [PATCH 27/54] Rename metric recorder builder helpers --- quickwit/quickwit-cli/src/logger.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/quickwit/quickwit-cli/src/logger.rs b/quickwit/quickwit-cli/src/logger.rs index 558e168f575..af187ee0a02 100644 --- a/quickwit/quickwit-cli/src/logger.rs +++ b/quickwit/quickwit-cli/src/logger.rs @@ -421,11 +421,10 @@ fn init_metrics_provider( build_info: &BuildInfo, otlp_config: &OtlpExporterConfig, ) -> anyhow::Result> { - let prometheus_recorder = install_prometheus_recorder()?; + let prometheus_recorder = build_prometheus_recorder()?; let (quickwit_recorder, meter_provider) = if otlp_config.is_enabled() { - let (otlp_recorder, meter_provider) = - install_otlp_metrics_recorder(build_info, otlp_config)?; + let (otlp_recorder, meter_provider) = build_otlp_metrics_recorder(build_info, otlp_config)?; let recorder = FanoutBuilder::default() .add_recorder(prometheus_recorder) .add_recorder(otlp_recorder) @@ -438,7 +437,7 @@ fn init_metrics_provider( (recorder, None) }; - let dogstatsd_recorder = install_dogstatsd_recorder(build_info)?; + let dogstatsd_recorder = build_dogstatsd_recorder(build_info)?; let mut router = RouterBuilder::from_recorder(metrics::NoopRecorder); router @@ -451,7 +450,7 @@ fn init_metrics_provider( Ok(meter_provider) } -fn install_prometheus_recorder() -> anyhow::Result { +fn build_prometheus_recorder() -> anyhow::Result { let mut prometheus_builder = PrometheusBuilder::new(); for (name, buckets) in quickwit_metrics::histogram_buckets() { prometheus_builder = prometheus_builder @@ -467,7 +466,7 @@ fn install_prometheus_recorder() -> anyhow::Result { Ok(prometheus_recorder) } -fn install_otlp_metrics_recorder( +fn build_otlp_metrics_recorder( build_info: &BuildInfo, otlp_config: &OtlpExporterConfig, ) -> anyhow::Result<(OpenTelemetryRecorder, SdkMetricsProvider)> { @@ -491,7 +490,7 @@ fn install_otlp_metrics_recorder( Ok((recorder, metrics_provider)) } -fn install_dogstatsd_recorder(build_info: &BuildInfo) -> anyhow::Result { +fn build_dogstatsd_recorder(build_info: &BuildInfo) -> anyhow::Result { // Reading both `CLOUDPREM_*` and `CP_*` env vars for backward compatibility. The former is // deprecated and can be removed after 2026-04-01. let host: String = quickwit_common::get_from_env_opt("CLOUDPREM_DOGSTATSD_SERVER_HOST", false) From c6c45cf100cca01e9e65bcde581e4da594af129e Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Tue, 5 May 2026 08:56:24 +0200 Subject: [PATCH 28/54] Clarify ongoing task gauge guard usage --- quickwit/quickwit-common/src/thread_pool.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/quickwit/quickwit-common/src/thread_pool.rs b/quickwit/quickwit-common/src/thread_pool.rs index 00ef5dea3ad..23de936c16c 100644 --- a/quickwit/quickwit-common/src/thread_pool.rs +++ b/quickwit/quickwit-common/src/thread_pool.rs @@ -110,8 +110,8 @@ impl ThreadPool { return; } let _guard = span.enter(); - let _ongoing_task_guard = GaugeGuard::from_gauge(&ongoing_tasks); - _ongoing_task_guard.increment(1.0); + let ongoing_task_guard = GaugeGuard::from_gauge(&ongoing_tasks); + ongoing_task_guard.increment(1.0); let result = cpu_intensive_fn(); let _ = tx.send(result); }); From 7860749cf6f9cdd9b18c516392c207d0cf1928b9 Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Tue, 5 May 2026 09:03:22 +0200 Subject: [PATCH 29/54] Replace GaugeGuard from_gauge with new --- quickwit/quickwit-actors/src/mailbox.rs | 4 +--- quickwit/quickwit-common/src/stream_utils.rs | 3 +-- quickwit/quickwit-common/src/thread_pool.rs | 6 ++---- .../quickwit-indexing/src/actors/indexer.rs | 5 ++--- .../src/actors/indexing_pipeline.rs | 3 +-- .../src/actors/metrics_pipeline/pipeline.rs | 3 +-- .../processed_parquet_batch.rs | 7 ++++--- .../src/models/processed_doc.rs | 7 ++++--- .../src/models/raw_doc_batch.rs | 13 +++++++----- quickwit/quickwit-indexing/src/source/mod.rs | 2 +- .../quickwit-ingest/src/ingest_v2/ingester.rs | 7 ++++--- .../src/ingest_v2/replication.rs | 7 ++++--- .../quickwit-ingest/src/ingest_v2/router.rs | 7 ++++--- .../src/metastore/postgres/pool.rs | 3 +-- .../quickwit-metrics/examples/http_service.rs | 7 ++----- quickwit/quickwit-metrics/src/gauge.rs | 15 ++++++------- quickwit/quickwit-metrics/src/lib.rs | 3 +-- quickwit/quickwit-metrics/tests/gauge.rs | 21 +++++++------------ .../quickwit-search/src/scroll_context.rs | 7 ++++--- .../src/search_permit_provider.rs | 3 +-- quickwit/quickwit-serve/src/decompression.rs | 6 ++++-- quickwit/quickwit-serve/src/load_shield.rs | 6 ++---- quickwit/quickwit-storage/src/metrics.rs | 9 ++++---- 23 files changed, 73 insertions(+), 81 deletions(-) diff --git a/quickwit/quickwit-actors/src/mailbox.rs b/quickwit/quickwit-actors/src/mailbox.rs index a72a8cf8dc9..9534658a248 100644 --- a/quickwit/quickwit-actors/src/mailbox.rs +++ b/quickwit/quickwit-actors/src/mailbox.rs @@ -394,9 +394,7 @@ impl Inbox { } fn get_actor_inboxes_count_gauge_guard() -> GaugeGuard { - let gauge_guard = GaugeGuard::from_gauge(&INBOX_GAUGE); - gauge_guard.increment(1.0); - gauge_guard + GaugeGuard::new(&INBOX_GAUGE, 1.0) } pub(crate) fn create_mailbox( diff --git a/quickwit/quickwit-common/src/stream_utils.rs b/quickwit/quickwit-common/src/stream_utils.rs index 40c99f38f7c..e79ce60c350 100644 --- a/quickwit/quickwit-common/src/stream_utils.rs +++ b/quickwit/quickwit-common/src/stream_utils.rs @@ -239,8 +239,7 @@ where T: fmt::Debug impl InFlightValue { pub fn new(value: T, value_size: ByteSize, gauge: &'static Gauge) -> Self { - let gauge_guard = GaugeGuard::from_gauge(gauge); - gauge_guard.increment(value_size.as_u64() as f64); + let gauge_guard = GaugeGuard::new(gauge, value_size.as_u64() as f64); Self(value, gauge_guard) } diff --git a/quickwit/quickwit-common/src/thread_pool.rs b/quickwit/quickwit-common/src/thread_pool.rs index 23de936c16c..3d910238196 100644 --- a/quickwit/quickwit-common/src/thread_pool.rs +++ b/quickwit/quickwit-common/src/thread_pool.rs @@ -101,8 +101,7 @@ impl ThreadPool { { let span = tracing::Span::current(); let ongoing_tasks = self.ongoing_tasks.clone(); - let pending_tasks_guard = GaugeGuard::from_gauge(&self.pending_tasks); - pending_tasks_guard.increment(1.0); + let pending_tasks_guard = GaugeGuard::new(&self.pending_tasks, 1.0); let (tx, rx) = oneshot::channel(); self.thread_pool.spawn(move || { drop(pending_tasks_guard); @@ -110,8 +109,7 @@ impl ThreadPool { return; } let _guard = span.enter(); - let ongoing_task_guard = GaugeGuard::from_gauge(&ongoing_tasks); - ongoing_task_guard.increment(1.0); + let _ongoing_task_guard = GaugeGuard::new(&ongoing_tasks, 1.0); let result = cpu_intensive_fn(); let _ = tx.send(result); }); diff --git a/quickwit/quickwit-indexing/src/actors/indexer.rs b/quickwit/quickwit-indexing/src/actors/indexer.rs index 43673f86c12..f9edb347871 100644 --- a/quickwit/quickwit-indexing/src/actors/indexer.rs +++ b/quickwit/quickwit-indexing/src/actors/indexer.rs @@ -219,8 +219,7 @@ impl IndexerState { let publish_lock = self.publish_lock.clone(); let publish_token_opt = self.publish_token_opt.clone(); - let split_builders_guard = GaugeGuard::from_gauge(&crate::metrics::SPLIT_BUILDERS); - split_builders_guard.increment(1.0); + let split_builders_guard = GaugeGuard::new(&crate::metrics::SPLIT_BUILDERS, 1.0); let workbench = IndexingWorkbench { workbench_id, @@ -232,7 +231,7 @@ impl IndexerState { publish_lock, publish_token_opt, last_delete_opstamp, - memory_usage: GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_INDEX_WRITER), + memory_usage: GaugeGuard::new(&quickwit_common::metrics::IN_FLIGHT_INDEX_WRITER, 0.0), cooperative_indexing_period, split_builders_guard, }; diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index 107869932e8..68ceb837229 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -132,8 +132,7 @@ impl IndexingPipeline { parent: &crate::metrics::INDEXING_PIPELINES, labels: &labels, ); - let indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); - indexing_pipelines_gauge_guard.increment(1.0); + let indexing_pipelines_gauge_guard = GaugeGuard::new(&indexing_pipelines_gauge, 1.0); let params_fingerprint = params.params_fingerprint; IndexingPipeline { params, diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs index 98b366861f6..d40eaaed3d1 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs @@ -153,8 +153,7 @@ impl MetricsPipeline { parent: &crate::metrics::INDEXING_PIPELINES, labels: &labels, ); - let indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); - indexing_pipelines_gauge_guard.increment(1.0); + let indexing_pipelines_gauge_guard = GaugeGuard::new(&indexing_pipelines_gauge, 1.0); let params_fingerprint = params.params_fingerprint; MetricsPipeline { params, diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs index 7b6831a5b6f..7078dd24f21 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs @@ -65,9 +65,10 @@ impl ProcessedParquetBatch { .map(|col| col.get_array_memory_size() as i64) .sum(); - let gauge_guard = - GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_INDEXER_MAILBOX); - gauge_guard.increment(memory_size as f64); + let gauge_guard = GaugeGuard::new( + &quickwit_common::metrics::IN_FLIGHT_INDEXER_MAILBOX, + memory_size as f64, + ); Self { batches, diff --git a/quickwit/quickwit-indexing/src/models/processed_doc.rs b/quickwit/quickwit-indexing/src/models/processed_doc.rs index 424956e9d17..e6957734d0e 100644 --- a/quickwit/quickwit-indexing/src/models/processed_doc.rs +++ b/quickwit/quickwit-indexing/src/models/processed_doc.rs @@ -51,9 +51,10 @@ impl ProcessedDocBatch { force_commit: bool, ) -> Self { let delta = docs.iter().map(|doc| doc.num_bytes as i64).sum::(); - let gauge_guard = - GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_INDEXER_MAILBOX); - gauge_guard.increment(delta as f64); + let gauge_guard = GaugeGuard::new( + &quickwit_common::metrics::IN_FLIGHT_INDEXER_MAILBOX, + delta as f64, + ); Self { docs, checkpoint_delta, diff --git a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs index d5178afd123..901e51c527b 100644 --- a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs +++ b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs @@ -34,9 +34,10 @@ impl RawDocBatch { force_commit: bool, ) -> Self { let delta = docs.iter().map(|doc| doc.len() as i64).sum::(); - let gauge_guard = - GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_DOC_PROCESSOR_MAILBOX); - gauge_guard.increment(delta as f64); + let gauge_guard = GaugeGuard::new( + &quickwit_common::metrics::IN_FLIGHT_DOC_PROCESSOR_MAILBOX, + delta as f64, + ); Self { docs, @@ -67,8 +68,10 @@ impl fmt::Debug for RawDocBatch { impl Default for RawDocBatch { fn default() -> Self { - let _gauge_guard = - GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_DOC_PROCESSOR_MAILBOX); + let _gauge_guard = GaugeGuard::new( + &quickwit_common::metrics::IN_FLIGHT_DOC_PROCESSOR_MAILBOX, + 0.0, + ); Self { docs: Vec::new(), checkpoint_delta: SourceCheckpointDelta::default(), diff --git a/quickwit/quickwit-indexing/src/source/mod.rs b/quickwit/quickwit-indexing/src/source/mod.rs index a5f946ec7a5..78ed27c7dce 100644 --- a/quickwit/quickwit-indexing/src/source/mod.rs +++ b/quickwit/quickwit-indexing/src/source/mod.rs @@ -537,7 +537,7 @@ impl BatchBuilder { SourceType::Pulsar => &quickwit_common::metrics::IN_FLIGHT_PULSAR_SOURCE, _ => &quickwit_common::metrics::IN_FLIGHT_OTHER_SOURCE, }; - let gauge_guard = GaugeGuard::from_gauge(gauge); + let gauge_guard = GaugeGuard::new(gauge, 0.0); Self { docs: Vec::with_capacity(capacity), diff --git a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs index ef4a4b1a6ad..6834d917a55 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs @@ -1125,9 +1125,10 @@ impl IngesterService for Ingester { _ => None, }) .sum::(); - let mut _gauge_guard = - GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_INGESTER_PERSIST); - _gauge_guard.increment(request_size_bytes as f64); + let _gauge_guard = GaugeGuard::new( + &quickwit_common::metrics::IN_FLIGHT_INGESTER_PERSIST, + request_size_bytes as f64, + ); self.persist_inner(persist_request).await } diff --git a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs index adf562f9bc1..93b99831c3d 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs @@ -503,9 +503,10 @@ impl ReplicationTask { ))); } let request_size_bytes = replicate_request.num_bytes(); - let mut _gauge_guard = - GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_INGESTER_REPLICATE); - _gauge_guard.increment(request_size_bytes as f64); + let _gauge_guard = GaugeGuard::new( + &quickwit_common::metrics::IN_FLIGHT_INGESTER_REPLICATE, + request_size_bytes as f64, + ); self.current_replication_seqno += 1; diff --git a/quickwit/quickwit-ingest/src/ingest_v2/router.rs b/quickwit/quickwit-ingest/src/ingest_v2/router.rs index fb03c76609d..d5b2460b749 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/router.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/router.rs @@ -569,9 +569,10 @@ impl IngestRouterService for IngestRouter { async fn ingest(&self, ingest_request: IngestRequestV2) -> IngestV2Result { let request_size_bytes = ingest_request.num_bytes(); - let mut _gauge_guard = - GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_INGEST_ROUTER); - _gauge_guard.increment(request_size_bytes as f64); + let _gauge_guard = GaugeGuard::new( + &quickwit_common::metrics::IN_FLIGHT_INGEST_ROUTER, + request_size_bytes as f64, + ); let num_subrequests = ingest_request.subrequests.len(); let _permit = self diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs b/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs index 963df072afa..984a2724472 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs @@ -52,8 +52,7 @@ impl<'a, DB: Database> Acquire<'a> for &TrackedPool { super::metrics::IDLE_CONNECTIONS.set(self.inner_pool.num_idle() as f64); Box::pin(async move { - let _gauge_guard = GaugeGuard::from_gauge(&super::metrics::ACQUIRE_CONNECTIONS); - _gauge_guard.increment(1.0); + let _gauge_guard = GaugeGuard::new(&super::metrics::ACQUIRE_CONNECTIONS, 1.0); let conn = acquire_conn_fut.await?; Ok(conn) diff --git a/quickwit/quickwit-metrics/examples/http_service.rs b/quickwit/quickwit-metrics/examples/http_service.rs index 70e9012b112..12926e217cb 100644 --- a/quickwit/quickwit-metrics/examples/http_service.rs +++ b/quickwit/quickwit-metrics/examples/http_service.rs @@ -102,9 +102,7 @@ const REGION_LABEL: Labels<1> = Labels::new(["region"]); fn track_connection(region: &'static str) -> GaugeGuard { let lv = REGION_LABEL.with_values([region]); let g = gauge!(parent: HTTP_ACTIVE_CONNECTIONS, labels: &lv); - let guard = GaugeGuard::from_gauge(&g); - guard.increment(1.0); - guard + GaugeGuard::new(&g, 1.0) } // ─── Prometheus setup ─── @@ -160,8 +158,7 @@ fn handle_request(method: &'static str, path: &'static str, region: &'static str "method" => method, ); { - let _guard = GaugeGuard::from_gauge(&conn_gauge); - _guard.increment(1.0); + let _guard = GaugeGuard::new(&conn_gauge, 1.0); } println!(" [{region}] {method} {path} -> {status} ({duration_ms:.3}s)"); diff --git a/quickwit/quickwit-metrics/src/gauge.rs b/quickwit/quickwit-metrics/src/gauge.rs index 1d66ee36f63..3aed39f5408 100644 --- a/quickwit/quickwit-metrics/src/gauge.rs +++ b/quickwit/quickwit-metrics/src/gauge.rs @@ -205,8 +205,7 @@ impl GaugeFn for Gauge { /// a panic. /// /// ```ignore -/// let guard = GaugeGuard::from_gauge(&gauge); -/// guard.increment(1.0); +/// let guard = GaugeGuard::new(&gauge, 1.0); /// // gauge is incremented by 1.0 /// // ... do work ... /// // gauge is decremented by 1.0 when guard drops @@ -218,11 +217,14 @@ pub struct GaugeGuard { } impl GaugeGuard { - /// Creates a guard that tracks `gauge` without changing its value. - pub fn from_gauge(gauge: &Gauge) -> Self { + /// Creates a guard that adds `delta` to `gauge` and tracks it until drop. + pub fn new(gauge: &Gauge, delta: f64) -> Self { + if delta != 0.0 { + gauge.increment(delta); + } Self { gauge: gauge.clone(), - delta: AtomicF64::new(0.0), + delta: AtomicF64::new(delta), } } @@ -269,8 +271,7 @@ impl Drop for GaugeGuard { /// /// ```ignore /// let child = gauge!(parent: base, "method" => method); -/// let guard = GaugeGuard::from_gauge(&child); -/// guard.increment(1.0); +/// let guard = GaugeGuard::new(&child, 1.0); /// ``` #[macro_export] macro_rules! gauge { diff --git a/quickwit/quickwit-metrics/src/lib.rs b/quickwit/quickwit-metrics/src/lib.rs index 6dcc22b1517..f903d425afb 100644 --- a/quickwit/quickwit-metrics/src/lib.rs +++ b/quickwit/quickwit-metrics/src/lib.rs @@ -136,8 +136,7 @@ //! //! ```rust,ignore //! { -//! let _guard = GaugeGuard::from_gauge(&ACTIVE_CONNS); -//! _guard.increment(1.0); +//! let _guard = GaugeGuard::new(&ACTIVE_CONNS, 1.0); //! // ... connection is alive here ... //! } //! // gauge decremented automatically on drop diff --git a/quickwit/quickwit-metrics/tests/gauge.rs b/quickwit/quickwit-metrics/tests/gauge.rs index c365368a3b8..8cf29c086d1 100644 --- a/quickwit/quickwit-metrics/tests/gauge.rs +++ b/quickwit/quickwit-metrics/tests/gauge.rs @@ -91,8 +91,7 @@ fn guard_decrements_on_drop() { ); g.set(0.0); { - let _guard = GaugeGuard::from_gauge(&g); - _guard.increment(5.0); + let _guard = GaugeGuard::new(&g, 5.0); } }); @@ -110,8 +109,7 @@ fn guard_after_set() { ); g.set(10.0); { - let guard = GaugeGuard::from_gauge(&g); - guard.increment(3.0); + let guard = GaugeGuard::new(&g, 3.0); assert_eq!(guard.delta(), 3.0); } }); @@ -130,13 +128,13 @@ fn guard_tracks_delta() { ); g.set(0.0); { - let guard = GaugeGuard::from_gauge(&g); - assert_eq!(guard.delta(), 0.0); + let guard = GaugeGuard::new(&g, 2.0); + assert_eq!(guard.delta(), 2.0); guard.increment(5.0); guard.increment(-2.0); guard.increment(0.5); guard.increment(-1.5); - assert_eq!(guard.delta(), 2.0); + assert_eq!(guard.delta(), 4.0); } }); @@ -153,10 +151,8 @@ fn multiple_guards() { subsystem: "test", ); g.set(0.0); - let guard_a = GaugeGuard::from_gauge(&g); - guard_a.increment(2.0); - let guard_b = GaugeGuard::from_gauge(&g); - guard_b.increment(5.0); + let guard_a = GaugeGuard::new(&g, 2.0); + let guard_b = GaugeGuard::new(&g, 5.0); drop(guard_b); drop(guard_a); }); @@ -219,8 +215,7 @@ fn observable_guard_matches_recorder() { ); g.set(0.0); { - let _guard = GaugeGuard::from_gauge(&g); - _guard.increment(5.0); + let _guard = GaugeGuard::new(&g, 5.0); assert_eq!(g.get(), 5.0); } g diff --git a/quickwit/quickwit-search/src/scroll_context.rs b/quickwit/quickwit-search/src/scroll_context.rs index 3165534b1e9..89e8bac9fba 100644 --- a/quickwit/quickwit-search/src/scroll_context.rs +++ b/quickwit/quickwit-search/src/scroll_context.rs @@ -148,9 +148,10 @@ impl Default for MiniKV { impl MiniKV { pub async fn put(&self, key: Vec, payload: Vec, ttl: Duration) { - let metric_guard = - GaugeGuard::from_gauge(&crate::metrics::SEARCHER_LOCAL_KV_STORE_SIZE_BYTES); - metric_guard.increment(payload.len() as f64); + let metric_guard = GaugeGuard::new( + &crate::metrics::SEARCHER_LOCAL_KV_STORE_SIZE_BYTES, + payload.len() as f64, + ); let mut cache_lock = self.ttl_with_cache.write().await; cache_lock.insert( key, diff --git a/quickwit/quickwit-search/src/search_permit_provider.rs b/quickwit/quickwit-search/src/search_permit_provider.rs index e8cb09643f1..2bfcfaa0db8 100644 --- a/quickwit/quickwit-search/src/search_permit_provider.rs +++ b/quickwit/quickwit-search/src/search_permit_provider.rs @@ -333,8 +333,7 @@ impl SearchPermitActor { fn assign_available_permits(&mut self) { while let Some(permit_request) = self.pop_next_request_if_serviceable() { let ongoing_gauge_guard = - GaugeGuard::from_gauge(&crate::metrics::LEAF_SEARCH_SINGLE_SPLIT_TASKS_ONGOING); - ongoing_gauge_guard.increment(1.0); + GaugeGuard::new(&crate::metrics::LEAF_SEARCH_SINGLE_SPLIT_TASKS_ONGOING, 1.0); self.total_memory_allocated += permit_request.permit_size; self.num_warmup_slots_available -= 1; permit_request diff --git a/quickwit/quickwit-serve/src/decompression.rs b/quickwit/quickwit-serve/src/decompression.rs index c44bd8de7ea..e3fb7a833a0 100644 --- a/quickwit/quickwit-serve/src/decompression.rs +++ b/quickwit/quickwit-serve/src/decompression.rs @@ -114,8 +114,10 @@ pub(crate) struct Body { impl Body { pub fn new(content: Bytes, load_shield_permit: LoadShieldPermit) -> Body { - let gauge_guard = GaugeGuard::from_gauge(&quickwit_common::metrics::IN_FLIGHT_REST_SERVER); - gauge_guard.increment(content.len() as f64); + let gauge_guard = GaugeGuard::new( + &quickwit_common::metrics::IN_FLIGHT_REST_SERVER, + content.len() as f64, + ); Body { content, _gauge_guard: gauge_guard, diff --git a/quickwit/quickwit-serve/src/load_shield.rs b/quickwit/quickwit-serve/src/load_shield.rs index 37b95cedee0..f4545df6211 100644 --- a/quickwit/quickwit-serve/src/load_shield.rs +++ b/quickwit/quickwit-serve/src/load_shield.rs @@ -75,13 +75,11 @@ impl LoadShield { } pub async fn acquire_permit(&'static self) -> Result { - let pending_gauge_guard = GaugeGuard::from_gauge(&self.pending_gauge); - pending_gauge_guard.increment(1.0); + let pending_gauge_guard = GaugeGuard::new(&self.pending_gauge, 1.0); let in_flight_permit_opt = self.acquire_in_flight_permit().await?; let concurrency_permit_opt = self.acquire_concurrency_permit().await; drop(pending_gauge_guard); - let ongoing_gauge_guard = GaugeGuard::from_gauge(&self.ongoing_gauge); - ongoing_gauge_guard.increment(1.0); + let ongoing_gauge_guard = GaugeGuard::new(&self.ongoing_gauge, 1.0); Ok(LoadShieldPermit { _in_flight_permit_opt: in_flight_permit_opt, _concurrency_permit_opt: concurrency_permit_opt, diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs index 3173b6f80c8..dd8451a1d4c 100644 --- a/quickwit/quickwit-storage/src/metrics.rs +++ b/quickwit/quickwit-storage/src/metrics.rs @@ -399,9 +399,10 @@ pub static CACHE_METRICS_FOR_TESTS: LazyLock = pub fn object_storage_get_slice_in_flight_guards( get_request_size: usize, ) -> (GaugeGuard, GaugeGuard) { - let bytes_guard = GaugeGuard::from_gauge(&OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES); - bytes_guard.increment(get_request_size as f64); - let count_guard = GaugeGuard::from_gauge(&OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT); - count_guard.increment(1.0); + let bytes_guard = GaugeGuard::new( + &OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES, + get_request_size as f64, + ); + let count_guard = GaugeGuard::new(&OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT, 1.0); (bytes_guard, count_guard) } From 4434af0011fce57762f849a594928159905704d3 Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Tue, 5 May 2026 09:09:32 +0200 Subject: [PATCH 30/54] Update histogram timer API --- quickwit/Cargo.lock | 1 + quickwit/Cargo.toml | 1 + quickwit/quickwit-metrics/Cargo.toml | 1 + quickwit/quickwit-metrics/src/histogram.rs | 12 ++++-------- quickwit/quickwit-metrics/tests/histogram.rs | 6 +++--- quickwit/quickwit-search/src/leaf.rs | 3 ++- quickwit/quickwit-search/src/list_terms.rs | 3 ++- .../src/object_storage/s3_compatible_storage.rs | 6 ++++-- .../quickwit-storage/src/opendal_storage/base.rs | 6 ++++-- 9 files changed, 22 insertions(+), 17 deletions(-) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 8b12b0809c3..1722f0b1bee 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -9064,6 +9064,7 @@ dependencies = [ "metrics-util", "papaya", "proptest", + "quanta", "rustc-hash", ] diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 73d7367d521..da53067955f 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -212,6 +212,7 @@ pulsar = { version = "6.6", default-features = false, features = [ "compression", "tokio-runtime", ] } +quanta = "0.12" quick_cache = "0.6.18" quote = "1.0" rand = "0.10" diff --git a/quickwit/quickwit-metrics/Cargo.toml b/quickwit/quickwit-metrics/Cargo.toml index d328d063ab7..530149ae1d8 100644 --- a/quickwit/quickwit-metrics/Cargo.toml +++ b/quickwit/quickwit-metrics/Cargo.toml @@ -17,6 +17,7 @@ inventory = { workspace = true } const_format = { workspace = true } atomic_float = { workspace = true } dashmap = { workspace = true } +quanta = { workspace = true } rustc-hash = { workspace = true } [dev-dependencies] diff --git a/quickwit/quickwit-metrics/src/histogram.rs b/quickwit/quickwit-metrics/src/histogram.rs index 2a99f3bc9d6..d67e77918f5 100644 --- a/quickwit/quickwit-metrics/src/histogram.rs +++ b/quickwit/quickwit-metrics/src/histogram.rs @@ -13,10 +13,10 @@ // limitations under the License. use std::sync::{Arc, LazyLock}; -use std::time::Instant; use dashmap::DashMap; use metrics::HistogramFn; +use quanta::Instant; use crate::MetricInfo; @@ -175,11 +175,6 @@ impl Histogram { pub fn record(&self, value: f64) { self.0.inner.record(value); } - - /// Starts a timer that records the elapsed time in seconds when dropped. - pub fn start_timer(&self) -> HistogramTimer { - HistogramTimer::new(self.clone()) - } } /// Bridges `Histogram` into the `metrics` recorder trait so it can be @@ -199,9 +194,10 @@ pub struct HistogramTimer { } impl HistogramTimer { - fn new(histogram: Histogram) -> Self { + /// Starts a timer that records the elapsed time in seconds when dropped. + pub fn new(histogram: &Histogram) -> Self { Self { - histogram, + histogram: histogram.clone(), start: Instant::now(), observed: false, } diff --git a/quickwit/quickwit-metrics/tests/histogram.rs b/quickwit/quickwit-metrics/tests/histogram.rs index 747e420422d..da42b5a7fd9 100644 --- a/quickwit/quickwit-metrics/tests/histogram.rs +++ b/quickwit/quickwit-metrics/tests/histogram.rs @@ -17,7 +17,7 @@ mod common; use common::with_recorder; use metrics::with_local_recorder; use metrics_util::debugging::{DebugValue, DebuggingRecorder}; -use quickwit_metrics::histogram; +use quickwit_metrics::{HistogramTimer, histogram}; #[test] fn base_records_value() { @@ -125,7 +125,7 @@ fn timer_records_value_on_drop() { subsystem: "test", buckets: vec![1.0, 5.0, 10.0] ); - let _timer = h.start_timer(); + let _timer = HistogramTimer::new(&h); }); let (name, labels, value) = &entries[0]; @@ -149,7 +149,7 @@ fn timer_observe_duration_records_once() { subsystem: "test", buckets: vec![1.0, 5.0, 10.0] ); - h.start_timer().observe_duration(); + HistogramTimer::new(&h).observe_duration(); }); let (name, labels, value) = &entries[0]; diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index 38fb94793e0..3cf38681a2b 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -29,6 +29,7 @@ use quickwit_common::pretty::PrettySample; use quickwit_common::uri::Uri; use quickwit_directories::{CachingDirectory, HotDirectory, StorageDirectory}; use quickwit_doc_mapper::{Automaton, DocMapper, FastFieldWarmupInfo, TermRange, WarmupInfo}; +use quickwit_metrics::HistogramTimer; use quickwit_proto::search::lambda_single_split_result::Outcome; use quickwit_proto::search::{ CountHits, LeafSearchRequest, LeafSearchResponse, PartialHit, ResourceStats, SearchRequest, @@ -1861,7 +1862,7 @@ async fn leaf_search_single_split_wrapper( split: SplitIdAndFooterOffsets, mut search_permit: SearchPermit, ) { - let timer = crate::metrics::LEAF_SEARCH_SPLIT_DURATION_SECS.start_timer(); + let timer = HistogramTimer::new(&crate::metrics::LEAF_SEARCH_SPLIT_DURATION_SECS); let leaf_search_single_split_opt_res: crate::Result> = leaf_search_single_split( request, diff --git a/quickwit/quickwit-search/src/list_terms.rs b/quickwit/quickwit-search/src/list_terms.rs index b3d6ef8bb6a..753fc384ef1 100644 --- a/quickwit/quickwit-search/src/list_terms.rs +++ b/quickwit/quickwit-search/src/list_terms.rs @@ -23,6 +23,7 @@ use itertools::{Either, Itertools}; use quickwit_common::pretty::PrettySample; use quickwit_config::build_doc_mapper; use quickwit_metastore::{ListSplitsRequestExt, MetastoreServiceStreamSplitsExt, SplitMetadata}; +use quickwit_metrics::HistogramTimer; use quickwit_proto::metastore::{ListSplitsRequest, MetastoreService, MetastoreServiceClient}; use quickwit_proto::search::{ LeafListTermsRequest, LeafListTermsResponse, ListTermsRequest, ListTermsResponse, @@ -354,7 +355,7 @@ pub async fn leaf_list_terms( let leaf_split_search_permit = search_permit_recv.await; // TODO dedicated counter and timer? crate::metrics::LEAF_LIST_TERMS_SPLITS_TOTAL.increment(1); - let timer = crate::metrics::LEAF_SEARCH_SPLIT_DURATION_SECS.start_timer(); + let timer = HistogramTimer::new(&crate::metrics::LEAF_SEARCH_SPLIT_DURATION_SECS); let leaf_search_single_split_res = leaf_list_terms_single_split( &searcher_context_clone, request, diff --git a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs index f9e6d2e7e59..d8528a27922 100644 --- a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs +++ b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs @@ -43,6 +43,7 @@ use quickwit_common::retry::{Retry, RetryParams}; use quickwit_common::uri::Uri; use quickwit_common::{chunk_range, into_u64_range}; use quickwit_config::S3StorageConfig; +use quickwit_metrics::HistogramTimer; use regex::Regex; use tokio::io::{AsyncRead, AsyncReadExt, AsyncWriteExt, BufReader, ReadBuf}; use tokio::sync::Semaphore; @@ -648,7 +649,8 @@ impl S3CompatibleObjectStorage { let delete_objects_res: StorageResult = aws_retry(&self.retry_params, || async { crate::OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL.increment(1); - let _timer = crate::OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION.start_timer(); + let _timer = + HistogramTimer::new(&crate::OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION); self.s3_client .delete_objects() .bucket(self.bucket.clone()) @@ -795,7 +797,7 @@ impl Storage for S3CompatibleObjectStorage { let key = self.key(path); let delete_res = aws_retry(&self.retry_params, || async { crate::OBJECT_STORAGE_DELETE_REQUESTS_TOTAL.increment(1); - let _timer = crate::OBJECT_STORAGE_DELETE_REQUEST_DURATION.start_timer(); + let _timer = HistogramTimer::new(&crate::OBJECT_STORAGE_DELETE_REQUEST_DURATION); self.s3_client .delete_object() .bucket(&bucket) diff --git a/quickwit/quickwit-storage/src/opendal_storage/base.rs b/quickwit/quickwit-storage/src/opendal_storage/base.rs index a8a5be3ac82..88aec761265 100644 --- a/quickwit/quickwit-storage/src/opendal_storage/base.rs +++ b/quickwit/quickwit-storage/src/opendal_storage/base.rs @@ -20,6 +20,7 @@ use async_trait::async_trait; use futures::AsyncWriteExt as FuturesAsyncWriteExt; use opendal::{DeleteInput, IntoDeleteInput, Operator}; use quickwit_common::uri::Uri; +use quickwit_metrics::HistogramTimer; use tokio::io::{AsyncRead, AsyncWriteExt as TokioAsyncWriteExt}; use tokio_util::compat::{FuturesAsyncReadCompatExt, FuturesAsyncWriteCompatExt}; @@ -153,7 +154,7 @@ impl Storage for OpendalStorage { async fn delete(&self, path: &Path) -> StorageResult<()> { let path = path.as_os_str().to_string_lossy(); crate::OBJECT_STORAGE_DELETE_REQUESTS_TOTAL.increment(1); - let _timer = crate::OBJECT_STORAGE_DELETE_REQUEST_DURATION.start_timer(); + let _timer = HistogramTimer::new(&crate::OBJECT_STORAGE_DELETE_REQUEST_DURATION); self.op.delete(&path).await?; Ok(()) } @@ -168,7 +169,8 @@ impl Storage for OpendalStorage { let mut bulk_error = BulkDeleteError::default(); for (index, path) in paths.iter().enumerate() { crate::OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL.increment(1); - let _timer = crate::OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION.start_timer(); + let _timer = + HistogramTimer::new(&crate::OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION); let result = self.op.delete(&path.as_os_str().to_string_lossy()).await; if let Err(err) = result { let storage_error_kind = err.kind(); From eef3789bdab6e8b35ce327127b92e2a1168cb131 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 10:07:42 +0200 Subject: [PATCH 31/54] Inline single-use Labels constants into direct "key" => value syntax Replace label_values! macro calls with inline "key" => value arguments in counter!/gauge!/histogram! macros wherever a Labels constant was defined statically and used in only one place. This eliminates 20+ Labels constants and their associated label_values! indirection, making metric call sites more readable and self-documenting. Co-authored-by: Cursor --- quickwit/quickwit-cli/src/lib.rs | 4 +- quickwit/quickwit-cluster/src/grpc_gossip.rs | 3 +- quickwit/quickwit-cluster/src/lib.rs | 12 +- .../example/src/codegen/hello.rs | 560 ++++++++--------- quickwit/quickwit-common/src/io.rs | 13 +- quickwit/quickwit-common/src/metrics.rs | 7 +- quickwit/quickwit-common/src/runtimes.rs | 13 +- quickwit/quickwit-common/src/thread_pool.rs | 9 +- .../src/tower/circuit_breaker.rs | 3 +- quickwit/quickwit-common/src/tower/metrics.rs | 37 +- .../src/source_config/serialize.rs | 4 +- .../src/control_plane.rs | 7 +- .../src/indexing_scheduler/mod.rs | 6 +- .../src/ingest/ingest_controller.rs | 7 +- .../quickwit-control-plane/src/model/mod.rs | 4 +- .../src/model/shard_table.rs | 31 +- .../src/actors/doc_processor.rs | 16 +- .../quickwit-indexing/src/actors/indexer.rs | 3 +- .../src/actors/indexing_pipeline.rs | 20 +- .../src/actors/merge_pipeline.rs | 9 +- .../src/actors/merge_scheduler_service.rs | 19 +- .../parquet_merge_pipeline.rs | 3 +- .../metrics_pipeline/parquet_uploader.rs | 3 +- .../src/actors/metrics_pipeline/pipeline.rs | 10 +- .../quickwit-indexing/src/actors/uploader.rs | 7 +- quickwit/quickwit-indexing/src/metrics.rs | 7 +- .../src/source/kafka_source.rs | 3 +- .../src/source/queue_sources/shared_state.rs | 4 +- .../src/codegen/ingest_service.rs | 561 ++++++++++-------- .../quickwit-ingest/src/ingest_api_service.rs | 6 +- .../src/ingest_v2/broadcast/local_shards.rs | 13 +- .../quickwit-ingest/src/ingest_v2/ingester.rs | 21 +- .../quickwit-ingest/src/ingest_v2/metrics.rs | 6 +- .../src/ingest_v2/replication.rs | 5 +- .../quickwit-ingest/src/ingest_v2/router.rs | 6 +- quickwit/quickwit-ingest/src/lib.rs | 17 +- quickwit/quickwit-jaeger/src/lib.rs | 67 ++- quickwit/quickwit-jaeger/src/v1.rs | 22 +- quickwit/quickwit-jaeger/src/v2.rs | 46 +- .../src/actors/delete_task_planner.rs | 7 +- .../src/actors/garbage_collector.rs | 41 +- quickwit/quickwit-janitor/src/metrics.rs | 18 +- .../quickwit-lambda-client/src/invoker.rs | 19 +- .../quickwit-lambda-client/src/metrics.rs | 4 +- .../benches/quickwit_metrics.rs | 16 +- .../quickwit-metrics/examples/http_service.rs | 17 +- quickwit/quickwit-metrics/src/counter.rs | 2 +- quickwit/quickwit-metrics/src/gauge.rs | 2 +- quickwit/quickwit-metrics/src/histogram.rs | 2 +- quickwit/quickwit-metrics/src/labels.rs | 57 +- quickwit/quickwit-metrics/src/lib.rs | 12 +- .../quickwit-opentelemetry/src/otlp/logs.rs | 53 +- .../src/otlp/otel_metrics.rs | 53 +- .../quickwit-opentelemetry/src/otlp/traces.rs | 56 +- .../src/index/accumulator.rs | 7 +- .../src/ingest/processor.rs | 7 +- .../src/ingest/sketch_processor.rs | 9 +- quickwit/quickwit-search/src/leaf.rs | 12 +- quickwit/quickwit-search/src/list_terms.rs | 5 +- quickwit/quickwit-search/src/metrics.rs | 1 - .../quickwit-search/src/metrics_trackers.rs | 35 +- .../quickwit-search/src/scroll_context.rs | 4 +- .../quickwit-search/src/search_job_placer.rs | 4 +- .../src/search_permit_provider.rs | 13 +- quickwit/quickwit-serve/src/lib.rs | 3 +- quickwit/quickwit-serve/src/load_shield.rs | 6 +- quickwit/quickwit-serve/src/metrics.rs | 5 +- quickwit/quickwit-serve/src/rest.rs | 14 +- .../src/cache/quickwit_cache.rs | 4 +- .../src/file_descriptor_cache.rs | 3 +- quickwit/quickwit-storage/src/metrics.rs | 55 +- .../src/object_storage/error.rs | 9 +- .../src/split_cache/split_table.rs | 14 +- 73 files changed, 1102 insertions(+), 1061 deletions(-) diff --git a/quickwit/quickwit-cli/src/lib.rs b/quickwit/quickwit-cli/src/lib.rs index afdde568ac3..7bc4851000f 100644 --- a/quickwit/quickwit-cli/src/lib.rs +++ b/quickwit/quickwit-cli/src/lib.rs @@ -354,6 +354,8 @@ pub mod busy_detector { use tracing::debug; + use crate::metrics::THREAD_UNPARK_DURATION_MICROSECONDS; + // we need that time reference to use an atomic and not a mutex for LAST_UNPARK static TIME_REF: LazyLock = LazyLock::new(Instant::now); static ENABLED: AtomicBool = AtomicBool::new(false); @@ -391,7 +393,7 @@ pub mod busy_detector { .unwrap_or_default(); let now = now.as_micros() as u64; let delta = now - time.load(Ordering::Relaxed); - crate::metrics::THREAD_UNPARK_DURATION_MICROSECONDS.record(delta as f64); + THREAD_UNPARK_DURATION_MICROSECONDS.record(delta as f64); if delta > ALLOWED_DELAY_MICROS { emit_debug(delta, now); } diff --git a/quickwit/quickwit-cluster/src/grpc_gossip.rs b/quickwit/quickwit-cluster/src/grpc_gossip.rs index ddd6f14c7bf..cdbc76d4075 100644 --- a/quickwit/quickwit-cluster/src/grpc_gossip.rs +++ b/quickwit/quickwit-cluster/src/grpc_gossip.rs @@ -31,6 +31,7 @@ use tracing::{info, warn}; use crate::grpc_service::cluster_grpc_client; use crate::member::NodeStateExt; +use crate::metrics::GRPC_GOSSIP_ROUNDS_TOTAL; const MAX_GOSSIP_PEERS: usize = 3; @@ -107,7 +108,7 @@ async fn perform_grpc_gossip_rounds( warn!("failed to fetch cluster state from node `{node_id}`"); continue; }; - crate::metrics::GRPC_GOSSIP_ROUNDS_TOTAL.increment(1); + GRPC_GOSSIP_ROUNDS_TOTAL.increment(1); let mut chitchat_guard = chitchat.lock().await; diff --git a/quickwit/quickwit-cluster/src/lib.rs b/quickwit/quickwit-cluster/src/lib.rs index 0387b4e5123..0a0d532ee58 100644 --- a/quickwit/quickwit-cluster/src/lib.rs +++ b/quickwit/quickwit-cluster/src/lib.rs @@ -49,6 +49,10 @@ pub use crate::cluster::{ create_cluster_for_test, create_cluster_for_test_with_id, grpc_addr_from_listen_addr_for_test, }; pub use crate::member::{ClusterMember, INDEXING_CPU_CAPACITY_KEY}; +use crate::metrics::{ + GOSSIP_RECV_BYTES_TOTAL, GOSSIP_RECV_MESSAGES_TOTAL, GOSSIP_SENT_BYTES_TOTAL, + GOSSIP_SENT_MESSAGES_TOTAL, +}; pub use crate::node::ClusterNode; #[derive(Debug, Clone, Copy, Eq, PartialEq)] @@ -105,10 +109,10 @@ impl Transport for CountingUdpTransport { let socket = UdpSocket::open(listen_addr).await?; Ok(Box::new(CountingUdpSocket { socket, - gossip_recv: crate::metrics::GOSSIP_RECV_MESSAGES_TOTAL.clone(), - gossip_recv_bytes: crate::metrics::GOSSIP_RECV_BYTES_TOTAL.clone(), - gossip_send: crate::metrics::GOSSIP_SENT_MESSAGES_TOTAL.clone(), - gossip_send_bytes: crate::metrics::GOSSIP_SENT_BYTES_TOTAL.clone(), + gossip_recv: GOSSIP_RECV_MESSAGES_TOTAL.clone(), + gossip_recv_bytes: GOSSIP_RECV_BYTES_TOTAL.clone(), + gossip_send: GOSSIP_SENT_MESSAGES_TOTAL.clone(), + gossip_send_bytes: GOSSIP_SENT_BYTES_TOTAL.clone(), })) } } diff --git a/quickwit/quickwit-codegen/example/src/codegen/hello.rs b/quickwit/quickwit-codegen/example/src/codegen/hello.rs index bacf8b3edac..d5007fea29b 100644 --- a/quickwit/quickwit-codegen/example/src/codegen/hello.rs +++ b/quickwit/quickwit-codegen/example/src/codegen/hello.rs @@ -1,45 +1,93 @@ // This file is @generated by prost-build. -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct HelloRequest { #[prost(string, tag = "1")] pub name: ::prost::alloc::string::String, } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct HelloResponse { #[prost(string, tag = "1")] pub message: ::prost::alloc::string::String, } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct GoodbyeRequest { #[prost(string, tag = "1")] pub name: ::prost::alloc::string::String, } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct GoodbyeResponse { #[prost(string, tag = "1")] pub message: ::prost::alloc::string::String, } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct PingRequest { #[prost(string, tag = "1")] pub name: ::prost::alloc::string::String, } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct PingResponse { #[prost(string, tag = "1")] pub message: ::prost::alloc::string::String, } +use quickwit_common::tower::RpcName; /// BEGIN quickwit-codegen #[allow(unused_imports)] use std::str::FromStr; use tower::{Layer, Service, ServiceExt}; -use quickwit_common::tower::RpcName; impl RpcName for HelloRequest { fn rpc_name() -> &'static str { "hello" @@ -62,10 +110,7 @@ pub trait Hello: std::fmt::Debug + Send + Sync + 'static { ///Says hello. async fn hello(&self, request: HelloRequest) -> crate::HelloResult; ///Says goodbye. - async fn goodbye( - &self, - request: GoodbyeRequest, - ) -> crate::HelloResult; + async fn goodbye(&self, request: GoodbyeRequest) -> crate::HelloResult; ///Ping pong. async fn ping( &self, @@ -87,7 +132,7 @@ impl HelloClient { { #[cfg(any(test, feature = "testsuite"))] assert!( - std::any::TypeId::of:: < T > () != std::any::TypeId::of:: < MockHello > (), + std::any::TypeId::of::() != std::any::TypeId::of::(), "`MockHello` must be wrapped in a `MockHelloWrapper`: use `HelloClient::from_mock(mock)` to instantiate the client" ); Self { @@ -113,9 +158,8 @@ impl HelloClient { max_message_size: bytesize::ByteSize, compression_encoding_opt: Option, ) -> Self { - let (_, connection_keys_watcher) = tokio::sync::watch::channel( - std::collections::HashSet::from_iter([addr]), - ); + let (_, connection_keys_watcher) = + tokio::sync::watch::channel(std::collections::HashSet::from_iter([addr])); let mut client = hello_grpc_client::HelloGrpcClient::new(channel) .max_decoding_message_size(max_message_size.0 as usize) .max_encoding_message_size(max_message_size.0 as usize); @@ -171,10 +215,7 @@ impl Hello for HelloClient { async fn hello(&self, request: HelloRequest) -> crate::HelloResult { self.inner.0.hello(request).await } - async fn goodbye( - &self, - request: GoodbyeRequest, - ) -> crate::HelloResult { + async fn goodbye(&self, request: GoodbyeRequest) -> crate::HelloResult { self.inner.0.goodbye(request).await } async fn ping( @@ -225,9 +266,8 @@ pub mod mock_hello { } } } -pub type BoxFuture = std::pin::Pin< - Box> + Send + 'static>, ->; +pub type BoxFuture = + std::pin::Pin> + Send + 'static>>; impl tower::Service for InnerHelloClient { type Response = HelloResponse; type Error = crate::HelloError; @@ -270,10 +310,7 @@ impl tower::Service> for InnerHelloC ) -> std::task::Poll> { std::task::Poll::Ready(Ok(())) } - fn call( - &mut self, - request: quickwit_common::ServiceStream, - ) -> Self::Future { + fn call(&mut self, request: quickwit_common::ServiceStream) -> Self::Future { let svc = self.clone(); let fut = async move { svc.0.ping(request).await }; Box::pin(fut) @@ -284,16 +321,9 @@ impl tower::Service> for InnerHelloC struct HelloTowerServiceStack { #[allow(dead_code)] inner: InnerHelloClient, - hello_svc: quickwit_common::tower::BoxService< - HelloRequest, - HelloResponse, - crate::HelloError, - >, - goodbye_svc: quickwit_common::tower::BoxService< - GoodbyeRequest, - GoodbyeResponse, - crate::HelloError, - >, + hello_svc: quickwit_common::tower::BoxService, + goodbye_svc: + quickwit_common::tower::BoxService, ping_svc: quickwit_common::tower::BoxService< quickwit_common::ServiceStream, HelloStream, @@ -305,10 +335,7 @@ impl Hello for HelloTowerServiceStack { async fn hello(&self, request: HelloRequest) -> crate::HelloResult { self.hello_svc.clone().ready().await?.call(request).await } - async fn goodbye( - &self, - request: GoodbyeRequest, - ) -> crate::HelloResult { + async fn goodbye(&self, request: GoodbyeRequest) -> crate::HelloResult { self.goodbye_svc.clone().ready().await?.call(request).await } async fn ping( @@ -331,11 +358,7 @@ type HelloLayer = quickwit_common::tower::BoxLayer< crate::HelloError, >; type GoodbyeLayer = quickwit_common::tower::BoxLayer< - quickwit_common::tower::BoxService< - GoodbyeRequest, - GoodbyeResponse, - crate::HelloError, - >, + quickwit_common::tower::BoxService, GoodbyeRequest, GoodbyeResponse, crate::HelloError, @@ -360,29 +383,20 @@ impl HelloTowerLayerStack { pub fn stack_layer(mut self, layer: L) -> Self where L: tower::Layer< - quickwit_common::tower::BoxService< - HelloRequest, - HelloResponse, - crate::HelloError, - >, - > + Clone + Send + Sync + 'static, + quickwit_common::tower::BoxService, + > + Clone + + Send + + Sync + + 'static, , - >>::Service: tower::Service< - HelloRequest, - Response = HelloResponse, - Error = crate::HelloError, - > + Clone + Send + Sync + 'static, + quickwit_common::tower::BoxService, + >>::Service: tower::Service + + Clone + + Send + + Sync + + 'static, <, + quickwit_common::tower::BoxService, >>::Service as tower::Service>::Future: Send + 'static, L: tower::Layer< quickwit_common::tower::BoxService< @@ -390,24 +404,19 @@ impl HelloTowerLayerStack { GoodbyeResponse, crate::HelloError, >, - > + Clone + Send + Sync + 'static, + > + Clone + + Send + + Sync + + 'static, , - >>::Service: tower::Service< - GoodbyeRequest, - Response = GoodbyeResponse, - Error = crate::HelloError, - > + Clone + Send + Sync + 'static, + quickwit_common::tower::BoxService, + >>::Service: tower::Service + + Clone + + Send + + Sync + + 'static, <, + quickwit_common::tower::BoxService, >>::Service as tower::Service>::Future: Send + 'static, L: tower::Layer< quickwit_common::tower::BoxService< @@ -415,7 +424,10 @@ impl HelloTowerLayerStack { HelloStream, crate::HelloError, >, - > + Clone + Send + Sync + 'static, + > + Clone + + Send + + Sync + + 'static, , @@ -426,39 +438,43 @@ impl HelloTowerLayerStack { quickwit_common::ServiceStream, Response = HelloStream, Error = crate::HelloError, - > + Clone + Send + Sync + 'static, + > + Clone + + Send + + Sync + + 'static, <, HelloStream, crate::HelloError, >, - >>::Service as tower::Service< - quickwit_common::ServiceStream, - >>::Future: Send + 'static, + >>::Service as tower::Service>>::Future: + Send + 'static, { - self.hello_layers.push(quickwit_common::tower::BoxLayer::new(layer.clone())); - self.goodbye_layers.push(quickwit_common::tower::BoxLayer::new(layer.clone())); - self.ping_layers.push(quickwit_common::tower::BoxLayer::new(layer.clone())); + self.hello_layers + .push(quickwit_common::tower::BoxLayer::new(layer.clone())); + self.goodbye_layers + .push(quickwit_common::tower::BoxLayer::new(layer.clone())); + self.ping_layers + .push(quickwit_common::tower::BoxLayer::new(layer.clone())); self } pub fn stack_hello_layer(mut self, layer: L) -> Self where L: tower::Layer< - quickwit_common::tower::BoxService< - HelloRequest, - HelloResponse, - crate::HelloError, - >, - > + Send + Sync + 'static, - L::Service: tower::Service< - HelloRequest, - Response = HelloResponse, - Error = crate::HelloError, - > + Clone + Send + Sync + 'static, + quickwit_common::tower::BoxService, + > + Send + + Sync + + 'static, + L::Service: tower::Service + + Clone + + Send + + Sync + + 'static, >::Future: Send + 'static, { - self.hello_layers.push(quickwit_common::tower::BoxLayer::new(layer)); + self.hello_layers + .push(quickwit_common::tower::BoxLayer::new(layer)); self } pub fn stack_goodbye_layer(mut self, layer: L) -> Self @@ -469,15 +485,18 @@ impl HelloTowerLayerStack { GoodbyeResponse, crate::HelloError, >, - > + Send + Sync + 'static, - L::Service: tower::Service< - GoodbyeRequest, - Response = GoodbyeResponse, - Error = crate::HelloError, - > + Clone + Send + Sync + 'static, + > + Send + + Sync + + 'static, + L::Service: tower::Service + + Clone + + Send + + Sync + + 'static, >::Future: Send + 'static, { - self.goodbye_layers.push(quickwit_common::tower::BoxLayer::new(layer)); + self.goodbye_layers + .push(quickwit_common::tower::BoxLayer::new(layer)); self } pub fn stack_ping_layer(mut self, layer: L) -> Self @@ -488,17 +507,22 @@ impl HelloTowerLayerStack { HelloStream, crate::HelloError, >, - > + Send + Sync + 'static, + > + Send + + Sync + + 'static, L::Service: tower::Service< quickwit_common::ServiceStream, Response = HelloStream, Error = crate::HelloError, - > + Clone + Send + Sync + 'static, - , - >>::Future: Send + 'static, + > + Clone + + Send + + Sync + + 'static, + >>::Future: + Send + 'static, { - self.ping_layers.push(quickwit_common::tower::BoxLayer::new(layer)); + self.ping_layers + .push(quickwit_common::tower::BoxLayer::new(layer)); self } pub fn build(self, instance: T) -> HelloClient @@ -515,12 +539,8 @@ impl HelloTowerLayerStack { max_message_size: bytesize::ByteSize, compression_encoding_opt: Option, ) -> HelloClient { - let client = HelloClient::from_channel( - addr, - channel, - max_message_size, - compression_encoding_opt, - ); + let client = + HelloClient::from_channel(addr, channel, max_message_size, compression_encoding_opt); let inner_client = client.inner; self.build_from_inner_client(inner_client) } @@ -538,17 +558,12 @@ impl HelloTowerLayerStack { let inner_client = client.inner; self.build_from_inner_client(inner_client) } - pub fn build_from_mailbox( - self, - mailbox: quickwit_actors::Mailbox, - ) -> HelloClient + pub fn build_from_mailbox(self, mailbox: quickwit_actors::Mailbox) -> HelloClient where A: quickwit_actors::Actor + std::fmt::Debug + Send + 'static, HelloMailbox: Hello, { - let inner_client = InnerHelloClient( - std::sync::Arc::new(HelloMailbox::new(mailbox)), - ); + let inner_client = InnerHelloClient(std::sync::Arc::new(HelloMailbox::new(mailbox))); self.build_from_inner_client(inner_client) } #[cfg(any(test, feature = "testsuite"))] @@ -558,30 +573,18 @@ impl HelloTowerLayerStack { self.build_from_inner_client(inner_client) } fn build_from_inner_client(self, inner_client: InnerHelloClient) -> HelloClient { - let hello_svc = self - .hello_layers - .into_iter() - .rev() - .fold( - quickwit_common::tower::BoxService::new(inner_client.clone()), - |svc, layer| layer.layer(svc), - ); - let goodbye_svc = self - .goodbye_layers - .into_iter() - .rev() - .fold( - quickwit_common::tower::BoxService::new(inner_client.clone()), - |svc, layer| layer.layer(svc), - ); - let ping_svc = self - .ping_layers - .into_iter() - .rev() - .fold( - quickwit_common::tower::BoxService::new(inner_client.clone()), - |svc, layer| layer.layer(svc), - ); + let hello_svc = self.hello_layers.into_iter().rev().fold( + quickwit_common::tower::BoxService::new(inner_client.clone()), + |svc, layer| layer.layer(svc), + ); + let goodbye_svc = self.goodbye_layers.into_iter().rev().fold( + quickwit_common::tower::BoxService::new(inner_client.clone()), + |svc, layer| layer.layer(svc), + ); + let ping_svc = self.ping_layers.into_iter().rev().fold( + quickwit_common::tower::BoxService::new(inner_client.clone()), + |svc, layer| layer.layer(svc), + ); let tower_svc_stack = HelloTowerServiceStack { inner: inner_client, hello_svc, @@ -630,7 +633,8 @@ impl Clone for HelloMailbox { impl tower::Service for HelloMailbox where A: quickwit_actors::Actor - + quickwit_actors::DeferableReplyHandler> + Send + + quickwit_actors::DeferableReplyHandler> + + Send + 'static, M: std::fmt::Debug + Send + 'static, T: Send + 'static, @@ -652,7 +656,10 @@ where fn call(&mut self, message: M) -> Self::Future { let mailbox = self.inner.clone(); let fut = async move { - mailbox.ask_for_res(message).await.map_err(|error| error.into()) + mailbox + .ask_for_res(message) + .await + .map_err(|error| error.into()) }; Box::pin(fut) } @@ -661,21 +668,17 @@ where impl Hello for HelloMailbox where A: quickwit_actors::Actor + std::fmt::Debug, - HelloMailbox< - A, - >: tower::Service< + HelloMailbox: tower::Service< HelloRequest, Response = HelloResponse, Error = crate::HelloError, Future = BoxFuture, - > - + tower::Service< + > + tower::Service< GoodbyeRequest, Response = GoodbyeResponse, Error = crate::HelloError, Future = BoxFuture, - > - + tower::Service< + > + tower::Service< quickwit_common::ServiceStream, Response = HelloStream, Error = crate::HelloError, @@ -685,10 +688,7 @@ where async fn hello(&self, request: HelloRequest) -> crate::HelloResult { self.clone().call(request).await } - async fn goodbye( - &self, - request: GoodbyeRequest, - ) -> crate::HelloResult { + async fn goodbye(&self, request: GoodbyeRequest) -> crate::HelloResult { self.clone().call(request).await } async fn ping( @@ -705,8 +705,11 @@ where } fn endpoints(&self) -> Vec { vec![ - quickwit_common::uri::Uri::from_str(& format!("actor://localhost/{}", self - .inner.actor_instance_id())).expect("URI should be valid") + quickwit_common::uri::Uri::from_str(&format!( + "actor://localhost/{}", + self.inner.actor_instance_id() + )) + .expect("URI should be valid"), ] } } @@ -714,9 +717,8 @@ where pub struct HelloGrpcClientAdapter { inner: T, #[allow(dead_code)] - connection_addrs_rx: tokio::sync::watch::Receiver< - std::collections::HashSet, - >, + connection_addrs_rx: + tokio::sync::watch::Receiver>, } impl HelloGrpcClientAdapter { pub fn new( @@ -734,11 +736,14 @@ impl HelloGrpcClientAdapter { #[async_trait::async_trait] impl Hello for HelloGrpcClientAdapter> where - T: tonic::client::GrpcService + std::fmt::Debug + Clone + Send - + Sync + 'static, + T: tonic::client::GrpcService + + std::fmt::Debug + + Clone + + Send + + Sync + + 'static, T::ResponseBody: tonic::codegen::Body + Send + 'static, - ::Error: Into - + Send, + ::Error: Into + Send, T::Future: Send, { async fn hello(&self, request: HelloRequest) -> crate::HelloResult { @@ -747,24 +752,19 @@ where .hello(request) .await .map(|response| response.into_inner()) - .map_err(|status| crate::error::grpc_status_to_service_error( - status, - HelloRequest::rpc_name(), - )) + .map_err(|status| { + crate::error::grpc_status_to_service_error(status, HelloRequest::rpc_name()) + }) } - async fn goodbye( - &self, - request: GoodbyeRequest, - ) -> crate::HelloResult { + async fn goodbye(&self, request: GoodbyeRequest) -> crate::HelloResult { self.inner .clone() .goodbye(request) .await .map(|response| response.into_inner()) - .map_err(|status| crate::error::grpc_status_to_service_error( - status, - GoodbyeRequest::rpc_name(), - )) + .map_err(|status| { + crate::error::grpc_status_to_service_error(status, GoodbyeRequest::rpc_name()) + }) } async fn ping( &self, @@ -777,16 +777,13 @@ where .map(|response| { let streaming: tonic::Streaming<_> = response.into_inner(); let stream = quickwit_common::ServiceStream::from(streaming); - stream - .map_err(|status| crate::error::grpc_status_to_service_error( - status, - PingRequest::rpc_name(), - )) + stream.map_err(|status| { + crate::error::grpc_status_to_service_error(status, PingRequest::rpc_name()) + }) + }) + .map_err(|status| { + crate::error::grpc_status_to_service_error(status, PingRequest::rpc_name()) }) - .map_err(|status| crate::error::grpc_status_to_service_error( - status, - PingRequest::rpc_name(), - )) } async fn check_connectivity(&self) -> anyhow::Result<()> { if self.connection_addrs_rx.borrow().is_empty() { @@ -798,9 +795,12 @@ where self.connection_addrs_rx .borrow() .iter() - .flat_map(|addr| quickwit_common::uri::Uri::from_str( - &format!("grpc://{addr}/{}.{}", "hello", "Hello"), - )) + .flat_map(|addr| { + quickwit_common::uri::Uri::from_str(&format!( + "grpc://{addr}/{}.{}", + "hello", "Hello" + )) + }) .collect() } } @@ -854,9 +854,9 @@ impl hello_grpc_server::HelloGrpc for HelloGrpcServerAdapter { quickwit_common::ServiceStream::from(streaming) }) .await - .map(|stream| tonic::Response::new( - stream.map_err(crate::error::grpc_error_to_grpc_status), - )) + .map(|stream| { + tonic::Response::new(stream.map_err(crate::error::grpc_error_to_grpc_status)) + }) .map_err(crate::error::grpc_error_to_grpc_status) } } @@ -867,10 +867,10 @@ pub mod hello_grpc_client { dead_code, missing_docs, clippy::wildcard_imports, - clippy::let_unit_value, + clippy::let_unit_value )] - use tonic::codegen::*; use tonic::codegen::http::Uri; + use tonic::codegen::*; #[derive(Debug, Clone)] pub struct HelloGrpcClient { inner: tonic::client::Grpc, @@ -909,14 +909,13 @@ pub mod hello_grpc_client { F: tonic::service::Interceptor, T::ResponseBody: Default, T: tonic::codegen::Service< - http::Request, - Response = http::Response< - >::ResponseBody, + http::Request, + Response = http::Response< + >::ResponseBody, + >, >, - >, - , - >>::Error: Into + std::marker::Send + std::marker::Sync, + >>::Error: + Into + std::marker::Send + std::marker::Sync, { HelloGrpcClient::new(InterceptedService::new(inner, interceptor)) } @@ -956,40 +955,29 @@ pub mod hello_grpc_client { &mut self, request: impl tonic::IntoRequest, ) -> std::result::Result, tonic::Status> { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::unknown( - format!("Service was not ready: {}", e.into()), - ) - })?; + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; let codec = tonic_prost::ProstCodec::default(); let path = http::uri::PathAndQuery::from_static("/hello.Hello/Hello"); let mut req = request.into_request(); - req.extensions_mut().insert(GrpcMethod::new("hello.Hello", "Hello")); + req.extensions_mut() + .insert(GrpcMethod::new("hello.Hello", "Hello")); self.inner.unary(req, path, codec).await } /// Says goodbye. pub async fn goodbye( &mut self, request: impl tonic::IntoRequest, - ) -> std::result::Result< - tonic::Response, - tonic::Status, - > { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::unknown( - format!("Service was not ready: {}", e.into()), - ) - })?; + ) -> std::result::Result, tonic::Status> { + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; let codec = tonic_prost::ProstCodec::default(); let path = http::uri::PathAndQuery::from_static("/hello.Hello/Goodbye"); let mut req = request.into_request(); - req.extensions_mut().insert(GrpcMethod::new("hello.Hello", "Goodbye")); + req.extensions_mut() + .insert(GrpcMethod::new("hello.Hello", "Goodbye")); self.inner.unary(req, path, codec).await } /// Ping pong. @@ -1000,18 +988,14 @@ pub mod hello_grpc_client { tonic::Response>, tonic::Status, > { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::unknown( - format!("Service was not ready: {}", e.into()), - ) - })?; + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; let codec = tonic_prost::ProstCodec::default(); let path = http::uri::PathAndQuery::from_static("/hello.Hello/Ping"); let mut req = request.into_streaming_request(); - req.extensions_mut().insert(GrpcMethod::new("hello.Hello", "Ping")); + req.extensions_mut() + .insert(GrpcMethod::new("hello.Hello", "Ping")); self.inner.streaming(req, path, codec).await } } @@ -1023,7 +1007,7 @@ pub mod hello_grpc_server { dead_code, missing_docs, clippy::wildcard_imports, - clippy::let_unit_value, + clippy::let_unit_value )] use tonic::codegen::*; /// Generated trait containing gRPC methods that should be implemented for use with HelloGrpcServer. @@ -1042,8 +1026,7 @@ pub mod hello_grpc_server { /// Server streaming response type for the Ping method. type PingStream: tonic::codegen::tokio_stream::Stream< Item = std::result::Result, - > - + std::marker::Send + > + std::marker::Send + 'static; /// Ping pong. async fn ping( @@ -1072,10 +1055,7 @@ pub mod hello_grpc_server { max_encoding_message_size: None, } } - pub fn with_interceptor( - inner: T, - interceptor: F, - ) -> InterceptedService + pub fn with_interceptor(inner: T, interceptor: F) -> InterceptedService where F: tonic::service::Interceptor, { @@ -1130,21 +1110,15 @@ pub mod hello_grpc_server { "/hello.Hello/Hello" => { #[allow(non_camel_case_types)] struct HelloSvc(pub Arc); - impl tonic::server::UnaryService - for HelloSvc { + impl tonic::server::UnaryService for HelloSvc { type Response = super::HelloResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; + type Future = BoxFuture, tonic::Status>; fn call( &mut self, request: tonic::Request, ) -> Self::Future { let inner = Arc::clone(&self.0); - let fut = async move { - ::hello(&inner, request).await - }; + let fut = async move { ::hello(&inner, request).await }; Box::pin(fut) } } @@ -1173,21 +1147,16 @@ pub mod hello_grpc_server { "/hello.Hello/Goodbye" => { #[allow(non_camel_case_types)] struct GoodbyeSvc(pub Arc); - impl tonic::server::UnaryService - for GoodbyeSvc { + impl tonic::server::UnaryService for GoodbyeSvc { type Response = super::GoodbyeResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; + type Future = BoxFuture, tonic::Status>; fn call( &mut self, request: tonic::Request, ) -> Self::Future { let inner = Arc::clone(&self.0); - let fut = async move { - ::goodbye(&inner, request).await - }; + let fut = + async move { ::goodbye(&inner, request).await }; Box::pin(fut) } } @@ -1216,24 +1185,17 @@ pub mod hello_grpc_server { "/hello.Hello/Ping" => { #[allow(non_camel_case_types)] struct PingSvc(pub Arc); - impl< - T: HelloGrpc, - > tonic::server::StreamingService - for PingSvc { + impl tonic::server::StreamingService for PingSvc { type Response = super::PingResponse; type ResponseStream = T::PingStream; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; + type Future = + BoxFuture, tonic::Status>; fn call( &mut self, request: tonic::Request>, ) -> Self::Future { let inner = Arc::clone(&self.0); - let fut = async move { - ::ping(&inner, request).await - }; + let fut = async move { ::ping(&inner, request).await }; Box::pin(fut) } } @@ -1259,25 +1221,19 @@ pub mod hello_grpc_server { }; Box::pin(fut) } - _ => { - Box::pin(async move { - let mut response = http::Response::new( - tonic::body::Body::default(), - ); - let headers = response.headers_mut(); - headers - .insert( - tonic::Status::GRPC_STATUS, - (tonic::Code::Unimplemented as i32).into(), - ); - headers - .insert( - http::header::CONTENT_TYPE, - tonic::metadata::GRPC_CONTENT_TYPE, - ); - Ok(response) - }) - } + _ => Box::pin(async move { + let mut response = http::Response::new(tonic::body::Body::default()); + let headers = response.headers_mut(); + headers.insert( + tonic::Status::GRPC_STATUS, + (tonic::Code::Unimplemented as i32).into(), + ); + headers.insert( + http::header::CONTENT_TYPE, + tonic::metadata::GRPC_CONTENT_TYPE, + ); + Ok(response) + }), } } } diff --git a/quickwit/quickwit-common/src/io.rs b/quickwit/quickwit-common/src/io.rs index 8509ee96b5b..5b1fc2d0429 100644 --- a/quickwit/quickwit-common/src/io.rs +++ b/quickwit/quickwit-common/src/io.rs @@ -34,7 +34,7 @@ use async_speed_limit::clock::StandardClock; use async_speed_limit::limiter::Consume; use bytesize::ByteSize; use pin_project::pin_project; -use quickwit_metrics::{Counter, Labels, counter}; +use quickwit_metrics::{Counter, counter}; use tokio::io::AsyncWrite; use crate::metrics::MaybeRegisteredCounter; @@ -56,8 +56,6 @@ static WRITE_BYTES: LazyLock = LazyLock::new(|| { ) }); -const COMPONENT_LABELS: Labels<1> = Labels::new(["component"]); - /// Parameter used in `async_speed_limit`. /// /// The default value is good and does not need to be tweaked. @@ -108,10 +106,11 @@ impl IoControls { Ok(guard) } - pub fn set_component(mut self, component: &str) -> Self { - let labels = COMPONENT_LABELS.with_values([component.to_string()]); - self.bytes_counter = - MaybeRegisteredCounter::registered(counter!(parent: WRITE_BYTES, labels: &labels)); + pub fn set_component(mut self, component: &'static str) -> Self { + self.bytes_counter = MaybeRegisteredCounter::registered(counter!( + parent: WRITE_BYTES, + "component" => component, + )); self } diff --git a/quickwit/quickwit-common/src/metrics.rs b/quickwit/quickwit-common/src/metrics.rs index 7c932843c02..37c71947f4a 100644 --- a/quickwit/quickwit-common/src/metrics.rs +++ b/quickwit/quickwit-common/src/metrics.rs @@ -20,7 +20,7 @@ use std::time::Duration; use metrics_exporter_prometheus::PrometheusHandle; pub use prometheus::{exponential_buckets, linear_buckets}; -use quickwit_metrics::{Counter, Gauge, Labels, gauge}; +use quickwit_metrics::{Counter, Gauge, gauge}; static PROMETHEUS_HANDLE: OnceLock = OnceLock::new(); @@ -163,8 +163,6 @@ static IN_FLIGHT_DATA_BYTES: LazyLock = LazyLock::new(|| { ) }); -const COMPONENT_LABELS: Labels<1> = Labels::new(["component"]); - pub static IN_FLIGHT_REST_SERVER: LazyLock = LazyLock::new(|| in_flight_data_gauge("rest_server")); @@ -216,8 +214,7 @@ pub static IN_FLIGHT_OTHER_SOURCE: LazyLock = LazyLock::new(|| in_flight_data_gauge("pulsar_source")); fn in_flight_data_gauge(component: &'static str) -> Gauge { - let labels = COMPONENT_LABELS.with_values([component]); - gauge!(parent: IN_FLIGHT_DATA_BYTES, labels: &labels) + gauge!(parent: IN_FLIGHT_DATA_BYTES, "component" => component) } #[cfg(test)] diff --git a/quickwit/quickwit-common/src/runtimes.rs b/quickwit/quickwit-common/src/runtimes.rs index 600f7e2f8a5..d2436b1f61d 100644 --- a/quickwit/quickwit-common/src/runtimes.rs +++ b/quickwit/quickwit-common/src/runtimes.rs @@ -17,7 +17,7 @@ use std::sync::OnceLock; use std::sync::atomic::{AtomicUsize, Ordering}; use std::time::Duration; -use quickwit_metrics::{Counter, Gauge, Labels, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, counter, gauge}; use tokio::runtime::Runtime; use tokio_metrics::{RuntimeMetrics, RuntimeMonitor}; @@ -56,8 +56,6 @@ static TOKIO_WORKER_THREADS: std::sync::LazyLock = std::sync::LazyLock::n ) }); -const RUNTIME_TYPE_LABELS: Labels<1> = Labels::new(["runtime_type"]); - /// Describes which runtime an actor should run on. #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] pub enum RuntimeType { @@ -216,15 +214,14 @@ struct RuntimeMetricsRecorder { impl RuntimeMetricsRecorder { pub fn new(label: &'static str) -> Self { - let labels = RUNTIME_TYPE_LABELS.with_values([label]); Self { - scheduled_tasks: gauge!(parent: TOKIO_SCHEDULED_TASKS, labels: &labels), + scheduled_tasks: gauge!(parent: TOKIO_SCHEDULED_TASKS, "runtime_type" => label), worker_busy_duration_milliseconds_total: counter!( parent: TOKIO_WORKER_BUSY_DURATION_MILLISECONDS_TOTAL, - labels: &labels, + "runtime_type" => label, ), - worker_busy_ratio: gauge!(parent: TOKIO_WORKER_BUSY_RATIO, labels: &labels), - worker_threads: gauge!(parent: TOKIO_WORKER_THREADS, labels: &labels), + worker_busy_ratio: gauge!(parent: TOKIO_WORKER_BUSY_RATIO, "runtime_type" => label), + worker_threads: gauge!(parent: TOKIO_WORKER_THREADS, "runtime_type" => label), } } diff --git a/quickwit/quickwit-common/src/thread_pool.rs b/quickwit/quickwit-common/src/thread_pool.rs index 00ef5dea3ad..63b80894c15 100644 --- a/quickwit/quickwit-common/src/thread_pool.rs +++ b/quickwit/quickwit-common/src/thread_pool.rs @@ -16,7 +16,7 @@ use std::fmt; use std::sync::Arc; use futures::{Future, TryFutureExt}; -use quickwit_metrics::{Gauge, GaugeGuard, Labels, gauge}; +use quickwit_metrics::{Gauge, GaugeGuard, gauge}; use tokio::sync::oneshot; use tracing::error; @@ -36,8 +36,6 @@ static THREAD_POOL_PENDING_TASKS: std::sync::LazyLock = std::sync::LazyLo ) }); -const THREAD_POOL_LABELS: Labels<1> = Labels::new(["pool"]); - /// An executor backed by a thread pool to run CPU-intensive tasks. /// /// tokio::spawn_blocking should only used for IO-bound tasks, as it has not limit on its @@ -62,9 +60,8 @@ impl ThreadPool { let thread_pool = rayon_pool_builder .build() .expect("failed to spawn thread pool"); - let labels = THREAD_POOL_LABELS.with_values([name]); - let ongoing_tasks = gauge!(parent: THREAD_POOL_ONGOING_TASKS, labels: &labels); - let pending_tasks = gauge!(parent: THREAD_POOL_PENDING_TASKS, labels: &labels); + let ongoing_tasks = gauge!(parent: THREAD_POOL_ONGOING_TASKS, "pool" => name); + let pending_tasks = gauge!(parent: THREAD_POOL_PENDING_TASKS, "pool" => name); ThreadPool { thread_pool: Arc::new(thread_pool), ongoing_tasks, diff --git a/quickwit/quickwit-common/src/tower/circuit_breaker.rs b/quickwit/quickwit-common/src/tower/circuit_breaker.rs index c9e54750882..53bebeef413 100644 --- a/quickwit/quickwit-common/src/tower/circuit_breaker.rs +++ b/quickwit/quickwit-common/src/tower/circuit_breaker.rs @@ -268,6 +268,7 @@ where mod tests { use std::sync::atomic::{AtomicBool, Ordering}; + use quickwit_metrics::counter; use tower::{ServiceBuilder, ServiceExt}; use super::*; @@ -301,7 +302,7 @@ mod tests { const TIMEOUT: Duration = Duration::from_millis(500); - let int_counter = quickwit_metrics::counter!( + let int_counter = counter!( name: "circuit_break_total_test", description: "test circuit breaker counter", subsystem: "", diff --git a/quickwit/quickwit-common/src/tower/metrics.rs b/quickwit/quickwit-common/src/tower/metrics.rs index caf8353d7f0..a7ec95c029e 100644 --- a/quickwit/quickwit-common/src/tower/metrics.rs +++ b/quickwit/quickwit-common/src/tower/metrics.rs @@ -19,7 +19,7 @@ use std::time::Instant; use futures::{Future, ready}; use pin_project::{pin_project, pinned_drop}; -use quickwit_metrics::{Counter, Gauge, Histogram, Labels, counter, gauge, histogram}; +use quickwit_metrics::{Counter, Gauge, Histogram, counter, gauge, histogram}; use tower::{Layer, Service}; use crate::metrics::exponential_buckets; @@ -53,10 +53,6 @@ static GRPC_REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { ) }); -const GRPC_SERVICE_LABELS: Labels<2> = Labels::new(["service", "kind"]); -const GRPC_RPC_LABELS: Labels<1> = Labels::new(["rpc"]); -const GRPC_RPC_STATUS_LABELS: Labels<2> = Labels::new(["rpc", "status"]); - #[derive(Clone)] pub struct GrpcMetrics { inner: S, @@ -83,8 +79,11 @@ where let rpc_name = R::rpc_name(); let inner = self.inner.call(request); - let labels = GRPC_RPC_LABELS.with_values([rpc_name]); - gauge!(parent: &self.requests_in_flight, labels: &labels).increment(1.0); + gauge!( + parent: self.requests_in_flight, + "rpc" => rpc_name, + ) + .increment(1.0); ResponseFuture { inner, @@ -107,13 +106,13 @@ pub struct GrpcMetricsLayer { impl GrpcMetricsLayer { pub fn new(subsystem: &'static str, kind: &'static str) -> Self { - let labels = GRPC_SERVICE_LABELS.with_values([subsystem, kind]); Self { - requests_total: counter!(parent: GRPC_REQUESTS_TOTAL, labels: &labels), - requests_in_flight: gauge!(parent: GRPC_REQUESTS_IN_FLIGHT, labels: &labels), + requests_total: counter!(parent: GRPC_REQUESTS_TOTAL, "service" => subsystem, "kind" => kind), + requests_in_flight: gauge!(parent: GRPC_REQUESTS_IN_FLIGHT, "service" => subsystem, "kind" => kind), request_duration_seconds: histogram!( parent: GRPC_REQUEST_DURATION_SECONDS, - labels: &labels, + "service" => subsystem, + "kind" => kind, ), } } @@ -149,15 +148,19 @@ pub struct ResponseFuture { impl PinnedDrop for ResponseFuture { fn drop(self: Pin<&mut Self>) { let elapsed = self.start.elapsed().as_secs_f64(); - let rpc_status_labels = GRPC_RPC_STATUS_LABELS.with_values([self.rpc_name, self.status]); - counter!(parent: &self.requests_total, labels: &rpc_status_labels).increment(1); + counter!(parent: self.requests_total, "rpc" => self.rpc_name, "status" => self.status) + .increment(1); histogram!( - parent: &self.request_duration_seconds, - labels: &rpc_status_labels, + parent: self.request_duration_seconds, + "rpc" => self.rpc_name, + "status" => self.status, ) .record(elapsed); - let rpc_labels = GRPC_RPC_LABELS.with_values([self.rpc_name]); - gauge!(parent: &self.requests_in_flight, labels: &rpc_labels).decrement(1.0); + gauge!( + parent: self.requests_in_flight, + "rpc" => self.rpc_name, + ) + .decrement(1.0); } } diff --git a/quickwit/quickwit-config/src/source_config/serialize.rs b/quickwit/quickwit-config/src/source_config/serialize.rs index 224689019d2..3f580ee8fa3 100644 --- a/quickwit/quickwit-config/src/source_config/serialize.rs +++ b/quickwit/quickwit-config/src/source_config/serialize.rs @@ -129,7 +129,9 @@ impl SourceConfigForSerialization { | SourceParams::File(FileSourceParams::Notifications(_)) => {} _ => { if self.num_pipelines > 1 { - bail!("Quickwit currently supports multiple pipelines only for GCP PubSub or Kafka sources. open an issue https://github.com/quickwit-oss/quickwit/issues if you need the feature for other source types"); + bail!( + "Quickwit currently supports multiple pipelines only for GCP PubSub or Kafka sources. open an issue https://github.com/quickwit-oss/quickwit/issues if you need the feature for other source types" + ); } } } diff --git a/quickwit/quickwit-control-plane/src/control_plane.rs b/quickwit/quickwit-control-plane/src/control_plane.rs index 4453c4596da..11cc76298af 100644 --- a/quickwit/quickwit-control-plane/src/control_plane.rs +++ b/quickwit/quickwit-control-plane/src/control_plane.rs @@ -61,6 +61,7 @@ use crate::debouncer::Debouncer; use crate::indexing_scheduler::{IndexingScheduler, IndexingSchedulerState}; use crate::ingest::IngestController; use crate::ingest::ingest_controller::{IngestControllerStats, RebalanceShardsCallback}; +use crate::metrics::{METASTORE_ERROR_ABORTED, METASTORE_ERROR_MAYBE_EXECUTED, RESTART_TOTAL}; use crate::model::ControlPlaneModel; /// Interval between two controls (or checks) of the desired plan VS running plan. @@ -219,7 +220,7 @@ impl Actor for ControlPlane { } async fn initialize(&mut self, ctx: &ActorContext) -> Result<(), ActorExitStatus> { - crate::metrics::RESTART_TOTAL.increment(1); + RESTART_TOTAL.increment(1); self.model .load_from_metastore(&mut self.metastore, ctx.progress()) @@ -568,13 +569,13 @@ fn convert_metastore_error( // It will be up to the client to decide what to do there. error!(err=?metastore_error, transaction_outcome="aborted", "metastore error"); } - crate::metrics::METASTORE_ERROR_ABORTED.increment(1); + METASTORE_ERROR_ABORTED.increment(1); Ok(Err(ControlPlaneError::Metastore(metastore_error))) } else { // If the metastore transaction may have been executed, we need to restart the control plane // so that it gets resynced with the metastore state. error!(error=?metastore_error, transaction_outcome="maybe-executed", "metastore error"); - crate::metrics::METASTORE_ERROR_MAYBE_EXECUTED.increment(1); + METASTORE_ERROR_MAYBE_EXECUTED.increment(1); Err(ActorExitStatus::from(anyhow::anyhow!(metastore_error))) } } diff --git a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs index 9a395269a2a..7883ec80447 100644 --- a/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs +++ b/quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs @@ -38,7 +38,7 @@ use tracing::{debug, info, warn}; use crate::indexing_plan::PhysicalIndexingPlan; use crate::indexing_scheduler::change_tracker::{NotifyChangeOnDrop, RebuildNotifier}; use crate::indexing_scheduler::scheduling::build_physical_indexing_plan; -use crate::metrics::ShardLocalityMetrics; +use crate::metrics::{APPLY_PLAN_TOTAL, SCHEDULE_TOTAL, ShardLocalityMetrics}; use crate::model::{ControlPlaneModel, ShardEntry, ShardLocations}; use crate::{IndexerNodeInfo, IndexerPool}; @@ -295,7 +295,7 @@ impl IndexingScheduler { // Prefer not calling this method directly, and instead call // `ControlPlane::rebuild_indexing_plan_debounced`. pub(crate) fn rebuild_plan(&mut self, model: &ControlPlaneModel) { - crate::metrics::SCHEDULE_TOTAL.increment(1); + SCHEDULE_TOTAL.increment(1); let notify_on_drop = self.next_rebuild_tracker.start_rebuild(); @@ -397,7 +397,7 @@ impl IndexingScheduler { notify_on_drop: Option>, ) { debug!(new_physical_plan=?new_physical_plan, "apply physical indexing plan"); - crate::metrics::APPLY_PLAN_TOTAL.increment(1); + APPLY_PLAN_TOTAL.increment(1); for (node_id, indexing_tasks) in new_physical_plan.indexing_tasks_per_indexer() { // We don't want to block on a slow indexer so we apply this change asynchronously // TODO not blocking is cool, but we need to make sure there is not accumulation diff --git a/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs b/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs index 5eafeb86296..8dc70b45b28 100644 --- a/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs +++ b/quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs @@ -58,6 +58,7 @@ use ulid::Ulid; use super::scaling_arbiter::ScalingArbiter; use crate::control_plane::ControlPlane; use crate::ingest::wait_handle::WaitHandle; +use crate::metrics::REBALANCE_SHARDS; use crate::model::{ControlPlaneModel, ScalingMode, ShardEntry, ShardStats}; const CLOSE_SHARDS_REQUEST_TIMEOUT: Duration = if cfg!(test) { @@ -1024,7 +1025,7 @@ impl IngestController { let shards_to_rebalance: Vec = self.compute_shards_to_rebalance(model); - crate::metrics::REBALANCE_SHARDS.set(shards_to_rebalance.len() as f64); + REBALANCE_SHARDS.set(shards_to_rebalance.len() as f64); if shards_to_rebalance.is_empty() { debug!("skipping rebalance: no shards to rebalance"); @@ -1047,12 +1048,12 @@ impl IngestController { .await .inspect_err(|error| { error!(%error, "failed to open shards during rebalance"); - crate::metrics::REBALANCE_SHARDS.set(0.0); + REBALANCE_SHARDS.set(0.0); })?; let num_opened_shards: usize = per_source_num_opened_shards.values().sum(); - crate::metrics::REBALANCE_SHARDS.set(num_opened_shards as f64); + REBALANCE_SHARDS.set(num_opened_shards as f64); for source_uid in per_source_num_opened_shards.keys() { // We temporarily disable the ability the scale down the number of shards for diff --git a/quickwit/quickwit-control-plane/src/model/mod.rs b/quickwit/quickwit-control-plane/src/model/mod.rs index 8ffa677e315..38eb0ed6cbe 100644 --- a/quickwit/quickwit-control-plane/src/model/mod.rs +++ b/quickwit/quickwit-control-plane/src/model/mod.rs @@ -39,6 +39,8 @@ use quickwit_proto::types::{IndexId, IndexUid, NodeId, ShardId, SourceId, Source pub(super) use shard_table::{ScalingMode, ShardEntry, ShardLocations, ShardStats, ShardTable}; use tracing::{debug, error, info, instrument, warn}; +use crate::metrics::INDEXES_TOTAL; + /// The control plane maintains a model in sync with the metastore. /// /// The model stays consistent with the metastore, because all @@ -167,7 +169,7 @@ impl ControlPlaneModel { } fn update_metrics(&self) { - crate::metrics::INDEXES_TOTAL.set(self.index_table.len() as f64); + INDEXES_TOTAL.set(self.index_table.len() as f64); } pub(crate) fn source_configs(&self) -> impl Iterator + '_ { diff --git a/quickwit/quickwit-control-plane/src/model/shard_table.rs b/quickwit/quickwit-control-plane/src/model/shard_table.rs index aafa344c17d..300344d012b 100644 --- a/quickwit/quickwit-control-plane/src/model/shard_table.rs +++ b/quickwit/quickwit-control-plane/src/model/shard_table.rs @@ -22,10 +22,13 @@ use quickwit_common::metrics::index_label; use quickwit_common::rate_limiter::{RateLimiter, RateLimiterSettings}; use quickwit_common::tower::ConstantRate; use quickwit_ingest::{RateMibPerSec, ShardInfo, ShardInfos}; +use quickwit_metrics::{gauge, label_values}; use quickwit_proto::ingest::{Shard, ShardState}; use quickwit_proto::types::{IndexUid, NodeId, ShardId, SourceId, SourceUid}; use tracing::{error, info, warn}; +use crate::metrics::{CLOSED_SHARDS, INDEX_ID_LABELS, OPEN_SHARDS}; + /// Limits the number of scale up operations that can happen to a source to 5 per minute. const SCALING_UP_RATE_LIMITER_SETTINGS: RateLimiterSettings = RateLimiterSettings { burst_limit: 5, @@ -461,15 +464,15 @@ impl ShardTable { // can update the metrics for this specific index. if index_label == index_id { let shard_stats = table_entry.shards_stats(); - let labels = crate::metrics::INDEX_ID_LABELS.with_values([index_label.to_string()]); - quickwit_metrics::gauge!( - parent: &crate::metrics::OPEN_SHARDS, - labels: &labels, + let labels = label_values!(INDEX_ID_LABELS, [index_label.to_string()]); + gauge!( + parent: OPEN_SHARDS, + labels: labels, ) .set(shard_stats.num_open_shards as f64); - quickwit_metrics::gauge!( - parent: &crate::metrics::CLOSED_SHARDS, - labels: &labels, + gauge!( + parent: CLOSED_SHARDS, + labels: labels, ) .set(shard_stats.num_closed_shards as f64); return; @@ -485,15 +488,15 @@ impl ShardTable { num_closed_shards += 1; } } - let labels = crate::metrics::INDEX_ID_LABELS.with_values([index_label.to_string()]); - quickwit_metrics::gauge!( - parent: &crate::metrics::OPEN_SHARDS, - labels: &labels, + let labels = label_values!(INDEX_ID_LABELS, [index_label.to_string()]); + gauge!( + parent: OPEN_SHARDS, + labels: labels, ) .set(num_open_shards as f64); - quickwit_metrics::gauge!( - parent: &crate::metrics::CLOSED_SHARDS, - labels: &labels, + gauge!( + parent: CLOSED_SHARDS, + labels: labels, ) .set(num_closed_shards as f64); } diff --git a/quickwit/quickwit-indexing/src/actors/doc_processor.rs b/quickwit/quickwit-indexing/src/actors/doc_processor.rs index d0320023544..b2f5e4a95f8 100644 --- a/quickwit/quickwit-indexing/src/actors/doc_processor.rs +++ b/quickwit/quickwit-indexing/src/actors/doc_processor.rs @@ -40,6 +40,7 @@ use tokio::runtime::Handle; #[cfg(feature = "vrl")] use super::vrl_processing::*; use crate::actors::Indexer; +use crate::metrics::{PROCESSED_BYTES, PROCESSED_DOCS_TOTAL}; use crate::models::{ NewPublishLock, NewPublishToken, ProcessedDoc, ProcessedDocBatch, PublishLock, RawDocBatch, }; @@ -282,18 +283,19 @@ impl Serialize for DocProcessorCounter { impl DocProcessorCounter { fn for_index_and_doc_processor_outcome(index: &str, outcome: &str) -> DocProcessorCounter { - let index_label = quickwit_common::metrics::index_label(index); - let labels = crate::metrics::INDEX_DOCS_PROCESSED_STATUS_LABELS - .with_values([index_label.to_string(), outcome.to_string()]); + let index_label = quickwit_common::metrics::index_label(index).to_string(); + let outcome = outcome.to_string(); DocProcessorCounter { num_docs: Default::default(), num_docs_metric: counter!( - parent: &crate::metrics::PROCESSED_DOCS_TOTAL, - labels: &labels, + parent: PROCESSED_DOCS_TOTAL, + "index" => index_label.clone(), + "docs_processed_status" => outcome.clone(), ), num_bytes_metric: counter!( - parent: &crate::metrics::PROCESSED_BYTES, - labels: &labels, + parent: PROCESSED_BYTES, + "index" => index_label, + "docs_processed_status" => outcome, ), } } diff --git a/quickwit/quickwit-indexing/src/actors/indexer.rs b/quickwit/quickwit-indexing/src/actors/indexer.rs index 43673f86c12..b39f53345c1 100644 --- a/quickwit/quickwit-indexing/src/actors/indexer.rs +++ b/quickwit/quickwit-indexing/src/actors/indexer.rs @@ -51,6 +51,7 @@ use ulid::Ulid; use super::IndexSerializer; use super::cooperative_indexing::{CooperativeIndexingCycle, CooperativeIndexingPeriod}; +use crate::metrics::SPLIT_BUILDERS; use crate::models::{ CommitTrigger, EmptySplit, IndexedSplitBatchBuilder, IndexedSplitBuilder, NewPublishLock, NewPublishToken, ProcessedDoc, ProcessedDocBatch, PublishLock, @@ -219,7 +220,7 @@ impl IndexerState { let publish_lock = self.publish_lock.clone(); let publish_token_opt = self.publish_token_opt.clone(); - let split_builders_guard = GaugeGuard::from_gauge(&crate::metrics::SPLIT_BUILDERS); + let split_builders_guard = GaugeGuard::from_gauge(&SPLIT_BUILDERS); split_builders_guard.increment(1.0); let workbench = IndexingWorkbench { diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index 107869932e8..b7204de53bd 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -45,6 +45,7 @@ use crate::actors::sequencer::Sequencer; use crate::actors::uploader::UploaderType; use crate::actors::{Publisher, Uploader}; use crate::merge_policy::MergePolicy; +use crate::metrics::{BACKPRESSURE_MICROS, INDEXING_PIPELINES}; use crate::models::IndexingStatistics; use crate::source::{ AssignShards, Assignment, SourceActor, SourceRuntime, quickwit_supported_sources, @@ -123,14 +124,9 @@ impl Actor for IndexingPipeline { impl IndexingPipeline { pub fn new(params: IndexingPipelineParams) -> Self { - let labels = crate::metrics::INDEX_LABELS.with_values([params - .pipeline_id - .index_uid - .index_id - .clone()]); let indexing_pipelines_gauge = gauge!( - parent: &crate::metrics::INDEXING_PIPELINES, - labels: &labels, + parent: INDEXING_PIPELINES, + "index" => params.pipeline_id.index_uid.index_id.clone(), ); let indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); indexing_pipelines_gauge_guard.increment(1.0); @@ -319,7 +315,7 @@ impl IndexingPipeline { .spawn_actor() .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::BACKPRESSURE_MICROS, + parent: BACKPRESSURE_MICROS, "actor_name" => "publisher", )) .spawn(publisher); @@ -328,7 +324,7 @@ impl IndexingPipeline { let (sequencer_mailbox, sequencer_handle) = ctx .spawn_actor() .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::BACKPRESSURE_MICROS, + parent: BACKPRESSURE_MICROS, "actor_name" => "sequencer", )) .set_kill_switch(self.kill_switch.clone()) @@ -348,7 +344,7 @@ impl IndexingPipeline { let (uploader_mailbox, uploader_handle) = ctx .spawn_actor() .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::BACKPRESSURE_MICROS, + parent: BACKPRESSURE_MICROS, "actor_name" => "uploader", )) .set_kill_switch(self.kill_switch.clone()) @@ -382,7 +378,7 @@ impl IndexingPipeline { let (indexer_mailbox, indexer_handle) = ctx .spawn_actor() .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::BACKPRESSURE_MICROS, + parent: BACKPRESSURE_MICROS, "actor_name" => "indexer", )) .set_kill_switch(self.kill_switch.clone()) @@ -399,7 +395,7 @@ impl IndexingPipeline { let (doc_processor_mailbox, doc_processor_handle) = ctx .spawn_actor() .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::BACKPRESSURE_MICROS, + parent: BACKPRESSURE_MICROS, "actor_name" => "doc_processor", )) .set_kill_switch(self.kill_switch.clone()) diff --git a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs index 587b567ab30..ee399259d72 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs @@ -45,6 +45,7 @@ use crate::actors::pipeline_shared::wait_duration_before_retry; use crate::actors::publisher::DisconnectMergePlanner; use crate::actors::{MergeSchedulerService, Publisher, Uploader, UploaderType}; use crate::merge_policy::MergePolicy; +use crate::metrics::{BACKPRESSURE_MICROS, ONGOING_MERGE_OPERATIONS}; use crate::models::MergeStatistics; use crate::split_store::IndexingSplitStore; @@ -274,7 +275,7 @@ impl MergePipeline { .spawn_actor() .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::BACKPRESSURE_MICROS, + parent: BACKPRESSURE_MICROS, "actor_name" => "merge_publisher", )) .spawn(merge_publisher); @@ -323,7 +324,7 @@ impl MergePipeline { .spawn_actor() .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::BACKPRESSURE_MICROS, + parent: BACKPRESSURE_MICROS, "actor_name" => "merge_executor", )) .spawn(merge_executor); @@ -338,7 +339,7 @@ impl MergePipeline { .spawn_actor() .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( - parent: &crate::metrics::BACKPRESSURE_MICROS, + parent: BACKPRESSURE_MICROS, "actor_name" => "merge_split_downloader", )) .spawn(merge_split_downloader); @@ -395,7 +396,7 @@ impl MergePipeline { handles.merge_planner.refresh_observe(); handles.merge_uploader.refresh_observe(); handles.merge_publisher.refresh_observe(); - let num_ongoing_merges = crate::metrics::ONGOING_MERGE_OPERATIONS.get(); + let num_ongoing_merges = ONGOING_MERGE_OPERATIONS.get(); self.statistics = self .previous_generations_statistics .clone() diff --git a/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs b/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs index fc8bfff5567..ae77c982532 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs @@ -30,6 +30,7 @@ use super::MergeSplitDownloader; #[cfg(feature = "metrics")] use super::metrics_pipeline::{ParquetMergeSplitDownloader, ParquetMergeTask}; use crate::merge_policy::{MergeOperation, MergeTask}; +use crate::metrics::{ONGOING_MERGE_OPERATIONS, PENDING_MERGE_BYTES, PENDING_MERGE_OPERATIONS}; pub struct MergePermit { _semaphore_permit: Option, @@ -226,8 +227,8 @@ impl MergeSchedulerService { _merge_permit: merge_permit, }; self.pending_merge_bytes -= merge_task.merge_operation.total_num_bytes(); - crate::metrics::PENDING_MERGE_OPERATIONS.set(self.pending_merge_queue.len() as f64); - crate::metrics::PENDING_MERGE_BYTES.set(self.pending_merge_bytes as f64); + PENDING_MERGE_OPERATIONS.set(self.pending_merge_queue.len() as f64); + PENDING_MERGE_BYTES.set(self.pending_merge_bytes as f64); match split_downloader_mailbox.try_send_message(merge_task) { Ok(_) => {} Err(quickwit_actors::TrySendError::Full(_)) => { @@ -269,10 +270,10 @@ impl MergeSchedulerService { merge_permit, }; self.pending_merge_bytes -= parquet_merge_task.merge_operation.total_size_bytes(); - crate::metrics::PENDING_MERGE_OPERATIONS.set( + PENDING_MERGE_OPERATIONS.set( (self.pending_merge_queue.len() + self.pending_parquet_merge_queue.len()) as f64, ); - crate::metrics::PENDING_MERGE_BYTES.set(self.pending_merge_bytes as f64); + PENDING_MERGE_BYTES.set(self.pending_merge_bytes as f64); match split_downloader_mailbox.try_send_message(parquet_merge_task) { Ok(_) => {} Err(quickwit_actors::TrySendError::Full(_)) => { @@ -286,7 +287,7 @@ impl MergeSchedulerService { let num_merges = self.merge_concurrency as i64 - self.merge_semaphore.available_permits() as i64; - crate::metrics::ONGOING_MERGE_OPERATIONS.set(num_merges as f64); + ONGOING_MERGE_OPERATIONS.set(num_merges as f64); } } @@ -370,8 +371,8 @@ impl Handler for MergeSchedulerService { }; self.pending_merge_bytes += scheduled_merge.merge_operation.total_num_bytes(); self.pending_merge_queue.push(scheduled_merge); - crate::metrics::PENDING_MERGE_OPERATIONS.set(self.pending_merge_queue.len() as f64); - crate::metrics::PENDING_MERGE_BYTES.set(self.pending_merge_bytes as f64); + PENDING_MERGE_OPERATIONS.set(self.pending_merge_queue.len() as f64); + PENDING_MERGE_BYTES.set(self.pending_merge_bytes as f64); self.schedule_pending_merges(ctx); Ok(()) } @@ -452,9 +453,9 @@ impl Handler for MergeSchedulerService { }; self.pending_merge_bytes += scheduled.merge_operation.total_size_bytes(); self.pending_parquet_merge_queue.push(scheduled); - crate::metrics::PENDING_MERGE_OPERATIONS + PENDING_MERGE_OPERATIONS .set((self.pending_merge_queue.len() + self.pending_parquet_merge_queue.len()) as f64); - crate::metrics::PENDING_MERGE_BYTES.set(self.pending_merge_bytes as f64); + PENDING_MERGE_BYTES.set(self.pending_merge_bytes as f64); self.schedule_pending_merges(ctx); Ok(()) } diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_merge_pipeline.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_merge_pipeline.rs index 7ce7b312d17..5567485f6d6 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_merge_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_merge_pipeline.rs @@ -54,6 +54,7 @@ use super::{METRICS_PUBLISHER_NAME, ParquetUploader}; use crate::actors::pipeline_shared::wait_duration_before_retry; use crate::actors::publisher::DisconnectMergePlanner; use crate::actors::{MergeSchedulerService, Publisher, Sequencer, UploaderType}; +use crate::metrics::ONGOING_MERGE_OPERATIONS; use crate::models::MergeStatistics; /// Limits concurrent Parquet merge pipeline spawns to avoid overwhelming the @@ -367,7 +368,7 @@ impl ParquetMergePipeline { handles.merge_planner.refresh_observe(); handles.merge_uploader.refresh_observe(); handles.merge_publisher.refresh_observe(); - let num_ongoing_merges = crate::metrics::ONGOING_MERGE_OPERATIONS.get(); + let num_ongoing_merges = ONGOING_MERGE_OPERATIONS.get(); self.statistics = self .previous_generations_statistics .clone() diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs index 5e77d9a2a05..8781e7570e9 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs @@ -37,6 +37,7 @@ use tracing::{Instrument, Span, debug, info, instrument, warn}; use super::{ParquetSplitBatch, ParquetSplitsUpdate}; use crate::actors::sequencer::{Sequencer, SequencerCommand}; use crate::actors::{Publisher, UploaderCounters, UploaderType}; +use crate::metrics::AVAILABLE_CONCURRENT_UPLOAD_PERMITS; /// Concurrent upload permits for metrics uploader. /// Uses same permit pool as indexer uploads. @@ -123,7 +124,7 @@ impl ParquetUploader { let concurrent_upload_permits = CONCURRENT_UPLOAD_PERMITS_METRICS .get_or_init(|| Semaphore::const_new(self.max_concurrent_uploads)); let gauge = gauge!( - parent: &crate::metrics::AVAILABLE_CONCURRENT_UPLOAD_PERMITS, + parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, "component" => "metrics", ); gauge.set(concurrent_upload_permits.available_permits() as f64); diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs index 98b366861f6..d2226efbcf2 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/pipeline.rs @@ -50,6 +50,7 @@ use crate::actors::pipeline_shared::{ }; use crate::actors::sequencer::Sequencer; use crate::actors::{Publisher, UploaderType}; +use crate::metrics::INDEXING_PIPELINES; use crate::models::IndexingStatistics; use crate::source::{ AssignShards, Assignment, SourceActor, SourceRuntime, quickwit_supported_sources, @@ -144,14 +145,9 @@ impl Actor for MetricsPipeline { impl MetricsPipeline { pub fn new(params: MetricsPipelineParams) -> Self { - let labels = crate::metrics::INDEX_LABELS.with_values([params - .pipeline_id - .index_uid - .index_id - .clone()]); let indexing_pipelines_gauge = gauge!( - parent: &crate::metrics::INDEXING_PIPELINES, - labels: &labels, + parent: INDEXING_PIPELINES, + "index" => params.pipeline_id.index_uid.index_id.clone(), ); let indexing_pipelines_gauge_guard = GaugeGuard::from_gauge(&indexing_pipelines_gauge); indexing_pipelines_gauge_guard.increment(1.0); diff --git a/quickwit/quickwit-indexing/src/actors/uploader.rs b/quickwit/quickwit-indexing/src/actors/uploader.rs index e9c80e1b051..b8b13ddc606 100644 --- a/quickwit/quickwit-indexing/src/actors/uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/uploader.rs @@ -41,6 +41,7 @@ use tracing::{Instrument, Span, debug, info, instrument, warn}; use crate::actors::Publisher; use crate::actors::sequencer::{Sequencer, SequencerCommand}; use crate::merge_policy::{MergePolicy, MergeTask}; +use crate::metrics::AVAILABLE_CONCURRENT_UPLOAD_PERMITS; use crate::models::{ EmptySplit, PackagedSplit, PackagedSplitBatch, PublishLock, SplitsUpdate, create_split_metadata, }; @@ -204,21 +205,21 @@ impl Uploader { UploaderType::IndexUploader => ( &CONCURRENT_UPLOAD_PERMITS_INDEX, gauge!( - parent: &crate::metrics::AVAILABLE_CONCURRENT_UPLOAD_PERMITS, + parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, "component" => "indexer", ), ), UploaderType::MergeUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, gauge!( - parent: &crate::metrics::AVAILABLE_CONCURRENT_UPLOAD_PERMITS, + parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, "component" => "merger", ), ), UploaderType::DeleteUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, gauge!( - parent: &crate::metrics::AVAILABLE_CONCURRENT_UPLOAD_PERMITS, + parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, "component" => "merger", ), ), diff --git a/quickwit/quickwit-indexing/src/metrics.rs b/quickwit/quickwit-indexing/src/metrics.rs index 09eba98fec4..b15b0e6eb8d 100644 --- a/quickwit/quickwit-indexing/src/metrics.rs +++ b/quickwit/quickwit-indexing/src/metrics.rs @@ -14,10 +14,7 @@ use std::sync::LazyLock; -use quickwit_metrics::{Counter, Gauge, Labels, counter, gauge}; - -pub(crate) const INDEX_DOCS_PROCESSED_STATUS_LABELS: Labels<2> = - Labels::new(["index", "docs_processed_status"]); +use quickwit_metrics::{Counter, Gauge, counter, gauge}; pub(crate) static PROCESSED_DOCS_TOTAL: LazyLock = LazyLock::new(|| { counter!( @@ -43,8 +40,6 @@ pub(crate) static INDEXING_PIPELINES: LazyLock = LazyLock::new(|| { ) }); -pub(crate) const INDEX_LABELS: Labels<1> = Labels::new(["index"]); - pub(crate) static BACKPRESSURE_MICROS: LazyLock = LazyLock::new(|| { counter!( name: "backpressure_micros", diff --git a/quickwit/quickwit-indexing/src/source/kafka_source.rs b/quickwit/quickwit-indexing/src/source/kafka_source.rs index 9b654a6b02f..b15430a4909 100644 --- a/quickwit/quickwit-indexing/src/source/kafka_source.rs +++ b/quickwit/quickwit-indexing/src/source/kafka_source.rs @@ -40,6 +40,7 @@ use tokio::task::{JoinHandle, spawn_blocking}; use tokio::time; use tracing::{debug, info, warn}; +use crate::metrics::KAFKA_REBALANCE_TOTAL; use crate::models::{NewPublishLock, PublishLock}; use crate::source::{ BATCH_NUM_BYTES_LIMIT, BatchBuilder, EMIT_BATCHES_TIMEOUT, Source, SourceContext, @@ -127,7 +128,7 @@ macro_rules! return_if_err { /// impl ConsumerContext for RdKafkaContext { fn pre_rebalance(&self, _consumer: &BaseConsumer, rebalance: &Rebalance) { - crate::metrics::KAFKA_REBALANCE_TOTAL.increment(1); + KAFKA_REBALANCE_TOTAL.increment(1); quickwit_common::rate_limited_info!(limit_per_min = 3, topic = self.topic, "rebalance"); if let Rebalance::Revoke(tpl) = rebalance { let partitions = collect_partitions(tpl, &self.topic); diff --git a/quickwit/quickwit-indexing/src/source/queue_sources/shared_state.rs b/quickwit/quickwit-indexing/src/source/queue_sources/shared_state.rs index b839c968043..b3551186341 100644 --- a/quickwit/quickwit-indexing/src/source/queue_sources/shared_state.rs +++ b/quickwit/quickwit-indexing/src/source/queue_sources/shared_state.rs @@ -155,7 +155,9 @@ impl QueueSharedState { info!(previous_token = shard.publish_token, "shard re-acquired"); re_acquired_shards.push(shard.shard_id().clone()); } else if is_owned && !position.is_beginning() { - bail!("Partition is owned by this indexing pipeline but is not at the beginning. This should never happen! Please, report on https://github.com/quickwit-oss/quickwit/issues.") + bail!( + "Partition is owned by this indexing pipeline but is not at the beginning. This should never happen! Please, report on https://github.com/quickwit-oss/quickwit/issues." + ) } } diff --git a/quickwit/quickwit-ingest/src/codegen/ingest_service.rs b/quickwit/quickwit-ingest/src/codegen/ingest_service.rs index 435c9b6d2f9..e0faef82d76 100644 --- a/quickwit/quickwit-ingest/src/codegen/ingest_service.rs +++ b/quickwit/quickwit-ingest/src/codegen/ingest_service.rs @@ -1,53 +1,111 @@ // This file is @generated by prost-build. -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct QueueExistsRequest { #[prost(string, tag = "1")] pub queue_id: ::prost::alloc::string::String, } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct CreateQueueRequest { #[prost(string, tag = "1")] pub queue_id: ::prost::alloc::string::String, } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct CreateQueueIfNotExistsRequest { #[prost(string, tag = "1")] pub queue_id: ::prost::alloc::string::String, } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct CreateQueueIfNotExistsResponse { #[prost(string, tag = "1")] pub queue_id: ::prost::alloc::string::String, #[prost(bool, tag = "2")] pub created: bool, } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct DropQueueRequest { #[prost(string, tag = "1")] pub queue_id: ::prost::alloc::string::String, } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive( + serde::Serialize, serde::Deserialize, utoipa::ToSchema, Clone, PartialEq, ::prost::Message, +)] pub struct IngestRequest { #[prost(message, repeated, tag = "1")] pub doc_batches: ::prost::alloc::vec::Vec, #[prost(enumeration = "CommitType", tag = "2")] pub commit: i32, } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + Copy, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct IngestResponse { #[prost(uint64, tag = "1")] pub num_docs_for_processing: u64, } /// Fetch messages with position strictly after `start_after`. -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct FetchRequest { #[prost(string, tag = "1")] pub index_id: ::prost::alloc::string::String, @@ -56,16 +114,32 @@ pub struct FetchRequest { #[prost(uint64, optional, tag = "3")] pub num_bytes_limit: ::core::option::Option, } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct FetchResponse { #[prost(uint64, optional, tag = "1")] pub first_position: ::core::option::Option, #[prost(message, optional, tag = "2")] pub doc_batch: ::core::option::Option, } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct DocBatch { #[prost(string, tag = "1")] pub index_id: ::prost::alloc::string::String, @@ -88,25 +162,58 @@ pub struct DocBatch { /// In other words, truncating from a position, and fetching records starting /// earlier than this position can yield undefined result: /// the truncated records may or may not be returned. -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct SuggestTruncateRequest { #[prost(string, tag = "1")] pub index_id: ::prost::alloc::string::String, #[prost(uint64, tag = "2")] pub up_to_position_included: u64, } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct TailRequest { #[prost(string, tag = "1")] pub index_id: ::prost::alloc::string::String, } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + Copy, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct ListQueuesRequest {} -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive( + serde::Serialize, + serde::Deserialize, + utoipa::ToSchema, + Clone, + PartialEq, + Eq, + Hash, + ::prost::Message, +)] pub struct ListQueuesResponse { #[prost(string, repeated, tag = "1")] pub queues: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, @@ -147,11 +254,11 @@ impl CommitType { } } } +use quickwit_common::tower::RpcName; /// BEGIN quickwit-codegen #[allow(unused_imports)] use std::str::FromStr; use tower::{Layer, Service, ServiceExt}; -use quickwit_common::tower::RpcName; impl RpcName for IngestRequest { fn rpc_name() -> &'static str { "ingest" @@ -210,8 +317,7 @@ impl IngestServiceClient { { #[cfg(any(test, feature = "testsuite"))] assert!( - std::any::TypeId::of:: < T > () != std::any::TypeId::of:: < MockIngestService - > (), + std::any::TypeId::of::() != std::any::TypeId::of::(), "`MockIngestService` must be wrapped in a `MockIngestServiceWrapper`: use `IngestServiceClient::from_mock(mock)` to instantiate the client" ); Self { @@ -221,9 +327,7 @@ impl IngestServiceClient { pub fn as_grpc_service( &self, max_message_size: bytesize::ByteSize, - ) -> ingest_service_grpc_server::IngestServiceGrpcServer< - IngestServiceGrpcServerAdapter, - > { + ) -> ingest_service_grpc_server::IngestServiceGrpcServer { let adapter = IngestServiceGrpcServerAdapter::new(self.clone()); ingest_service_grpc_server::IngestServiceGrpcServer::new(adapter) .accept_compressed(tonic::codec::CompressionEncoding::Gzip) @@ -239,12 +343,9 @@ impl IngestServiceClient { max_message_size: bytesize::ByteSize, compression_encoding_opt: Option, ) -> Self { - let (_, connection_keys_watcher) = tokio::sync::watch::channel( - std::collections::HashSet::from_iter([addr]), - ); - let mut client = ingest_service_grpc_client::IngestServiceGrpcClient::new( - channel, - ) + let (_, connection_keys_watcher) = + tokio::sync::watch::channel(std::collections::HashSet::from_iter([addr])); + let mut client = ingest_service_grpc_client::IngestServiceGrpcClient::new(channel) .max_decoding_message_size(max_message_size.0 as usize) .max_encoding_message_size(max_message_size.0 as usize); if let Some(compression_encoding) = compression_encoding_opt { @@ -252,10 +353,7 @@ impl IngestServiceClient { .accept_compressed(compression_encoding) .send_compressed(compression_encoding); } - let adapter = IngestServiceGrpcClientAdapter::new( - client, - connection_keys_watcher, - ); + let adapter = IngestServiceGrpcClientAdapter::new(client, connection_keys_watcher); Self::new(adapter) } pub fn from_balance_channel( @@ -264,9 +362,7 @@ impl IngestServiceClient { compression_encoding_opt: Option, ) -> IngestServiceClient { let connection_keys_watcher = balance_channel.connection_keys_watcher(); - let mut client = ingest_service_grpc_client::IngestServiceGrpcClient::new( - balance_channel, - ) + let mut client = ingest_service_grpc_client::IngestServiceGrpcClient::new(balance_channel) .max_decoding_message_size(max_message_size.0 as usize) .max_encoding_message_size(max_message_size.0 as usize); if let Some(compression_encoding) = compression_encoding_opt { @@ -274,10 +370,7 @@ impl IngestServiceClient { .accept_compressed(compression_encoding) .send_compressed(compression_encoding); } - let adapter = IngestServiceGrpcClientAdapter::new( - client, - connection_keys_watcher, - ); + let adapter = IngestServiceGrpcClientAdapter::new(client, connection_keys_watcher); Self::new(adapter) } pub fn from_mailbox(mailbox: quickwit_actors::Mailbox) -> Self @@ -329,23 +422,16 @@ pub mod mock_ingest_service { ) -> crate::Result { self.inner.lock().await.ingest(request).await } - async fn fetch( - &self, - request: super::FetchRequest, - ) -> crate::Result { + async fn fetch(&self, request: super::FetchRequest) -> crate::Result { self.inner.lock().await.fetch(request).await } - async fn tail( - &self, - request: super::TailRequest, - ) -> crate::Result { + async fn tail(&self, request: super::TailRequest) -> crate::Result { self.inner.lock().await.tail(request).await } } } -pub type BoxFuture = std::pin::Pin< - Box> + Send + 'static>, ->; +pub type BoxFuture = + std::pin::Pin> + Send + 'static>>; impl tower::Service for InnerIngestServiceClient { type Response = IngestResponse; type Error = crate::IngestServiceError; @@ -404,16 +490,10 @@ struct IngestServiceTowerServiceStack { IngestResponse, crate::IngestServiceError, >, - fetch_svc: quickwit_common::tower::BoxService< - FetchRequest, - FetchResponse, - crate::IngestServiceError, - >, - tail_svc: quickwit_common::tower::BoxService< - TailRequest, - FetchResponse, - crate::IngestServiceError, - >, + fetch_svc: + quickwit_common::tower::BoxService, + tail_svc: + quickwit_common::tower::BoxService, } #[async_trait::async_trait] impl IngestService for IngestServiceTowerServiceStack { @@ -428,31 +508,19 @@ impl IngestService for IngestServiceTowerServiceStack { } } type IngestLayer = quickwit_common::tower::BoxLayer< - quickwit_common::tower::BoxService< - IngestRequest, - IngestResponse, - crate::IngestServiceError, - >, + quickwit_common::tower::BoxService, IngestRequest, IngestResponse, crate::IngestServiceError, >; type FetchLayer = quickwit_common::tower::BoxLayer< - quickwit_common::tower::BoxService< - FetchRequest, - FetchResponse, - crate::IngestServiceError, - >, + quickwit_common::tower::BoxService, FetchRequest, FetchResponse, crate::IngestServiceError, >; type TailLayer = quickwit_common::tower::BoxLayer< - quickwit_common::tower::BoxService< - TailRequest, - FetchResponse, - crate::IngestServiceError, - >, + quickwit_common::tower::BoxService, TailRequest, FetchResponse, crate::IngestServiceError, @@ -472,7 +540,10 @@ impl IngestServiceTowerLayerStack { IngestResponse, crate::IngestServiceError, >, - > + Clone + Send + Sync + 'static, + > + Clone + + Send + + Sync + + 'static, + Clone + Send + Sync + 'static, + > + Clone + + Send + + Sync + + 'static, <, - > + Clone + Send + Sync + 'static, + > + Clone + + Send + + Sync + + 'static, + Clone + Send + Sync + 'static, + > + Clone + + Send + + Sync + + 'static, <, - > + Clone + Send + Sync + 'static, + > + Clone + + Send + + Sync + + 'static, , - >>::Service: tower::Service< - TailRequest, - Response = FetchResponse, - Error = crate::IngestServiceError, - > + Clone + Send + Sync + 'static, + >>::Service: tower::Service + + Clone + + Send + + Sync + + 'static, <, >>::Service as tower::Service>::Future: Send + 'static, { - self.ingest_layers.push(quickwit_common::tower::BoxLayer::new(layer.clone())); - self.fetch_layers.push(quickwit_common::tower::BoxLayer::new(layer.clone())); - self.tail_layers.push(quickwit_common::tower::BoxLayer::new(layer.clone())); + self.ingest_layers + .push(quickwit_common::tower::BoxLayer::new(layer.clone())); + self.fetch_layers + .push(quickwit_common::tower::BoxLayer::new(layer.clone())); + self.tail_layers + .push(quickwit_common::tower::BoxLayer::new(layer.clone())); self } pub fn stack_ingest_layer(mut self, layer: L) -> Self @@ -555,15 +641,21 @@ impl IngestServiceTowerLayerStack { IngestResponse, crate::IngestServiceError, >, - > + Send + Sync + 'static, + > + Send + + Sync + + 'static, L::Service: tower::Service< IngestRequest, Response = IngestResponse, Error = crate::IngestServiceError, - > + Clone + Send + Sync + 'static, + > + Clone + + Send + + Sync + + 'static, >::Future: Send + 'static, { - self.ingest_layers.push(quickwit_common::tower::BoxLayer::new(layer)); + self.ingest_layers + .push(quickwit_common::tower::BoxLayer::new(layer)); self } pub fn stack_fetch_layer(mut self, layer: L) -> Self @@ -574,15 +666,21 @@ impl IngestServiceTowerLayerStack { FetchResponse, crate::IngestServiceError, >, - > + Send + Sync + 'static, + > + Send + + Sync + + 'static, L::Service: tower::Service< FetchRequest, Response = FetchResponse, Error = crate::IngestServiceError, - > + Clone + Send + Sync + 'static, + > + Clone + + Send + + Sync + + 'static, >::Future: Send + 'static, { - self.fetch_layers.push(quickwit_common::tower::BoxLayer::new(layer)); + self.fetch_layers + .push(quickwit_common::tower::BoxLayer::new(layer)); self } pub fn stack_tail_layer(mut self, layer: L) -> Self @@ -593,15 +691,18 @@ impl IngestServiceTowerLayerStack { FetchResponse, crate::IngestServiceError, >, - > + Send + Sync + 'static, - L::Service: tower::Service< - TailRequest, - Response = FetchResponse, - Error = crate::IngestServiceError, - > + Clone + Send + Sync + 'static, + > + Send + + Sync + + 'static, + L::Service: tower::Service + + Clone + + Send + + Sync + + 'static, >::Future: Send + 'static, { - self.tail_layers.push(quickwit_common::tower::BoxLayer::new(layer)); + self.tail_layers + .push(quickwit_common::tower::BoxLayer::new(layer)); self } pub fn build(self, instance: T) -> IngestServiceClient @@ -641,17 +742,13 @@ impl IngestServiceTowerLayerStack { let inner_client = client.inner; self.build_from_inner_client(inner_client) } - pub fn build_from_mailbox( - self, - mailbox: quickwit_actors::Mailbox, - ) -> IngestServiceClient + pub fn build_from_mailbox(self, mailbox: quickwit_actors::Mailbox) -> IngestServiceClient where A: quickwit_actors::Actor + std::fmt::Debug + Send + 'static, IngestServiceMailbox: IngestService, { - let inner_client = InnerIngestServiceClient( - std::sync::Arc::new(IngestServiceMailbox::new(mailbox)), - ); + let inner_client = + InnerIngestServiceClient(std::sync::Arc::new(IngestServiceMailbox::new(mailbox))); self.build_from_inner_client(inner_client) } #[cfg(any(test, feature = "testsuite"))] @@ -664,30 +761,18 @@ impl IngestServiceTowerLayerStack { self, inner_client: InnerIngestServiceClient, ) -> IngestServiceClient { - let ingest_svc = self - .ingest_layers - .into_iter() - .rev() - .fold( - quickwit_common::tower::BoxService::new(inner_client.clone()), - |svc, layer| layer.layer(svc), - ); - let fetch_svc = self - .fetch_layers - .into_iter() - .rev() - .fold( - quickwit_common::tower::BoxService::new(inner_client.clone()), - |svc, layer| layer.layer(svc), - ); - let tail_svc = self - .tail_layers - .into_iter() - .rev() - .fold( - quickwit_common::tower::BoxService::new(inner_client.clone()), - |svc, layer| layer.layer(svc), - ); + let ingest_svc = self.ingest_layers.into_iter().rev().fold( + quickwit_common::tower::BoxService::new(inner_client.clone()), + |svc, layer| layer.layer(svc), + ); + let fetch_svc = self.fetch_layers.into_iter().rev().fold( + quickwit_common::tower::BoxService::new(inner_client.clone()), + |svc, layer| layer.layer(svc), + ); + let tail_svc = self.tail_layers.into_iter().rev().fold( + quickwit_common::tower::BoxService::new(inner_client.clone()), + |svc, layer| layer.layer(svc), + ); let tower_svc_stack = IngestServiceTowerServiceStack { inner: inner_client, ingest_svc, @@ -736,7 +821,8 @@ impl Clone for IngestServiceMailbox { impl tower::Service for IngestServiceMailbox where A: quickwit_actors::Actor - + quickwit_actors::DeferableReplyHandler> + Send + + quickwit_actors::DeferableReplyHandler> + + Send + 'static, M: std::fmt::Debug + Send + 'static, T: Send + 'static, @@ -758,7 +844,10 @@ where fn call(&mut self, message: M) -> Self::Future { let mailbox = self.inner.clone(); let fut = async move { - mailbox.ask_for_res(message).await.map_err(|error| error.into()) + mailbox + .ask_for_res(message) + .await + .map_err(|error| error.into()) }; Box::pin(fut) } @@ -767,21 +856,17 @@ where impl IngestService for IngestServiceMailbox where A: quickwit_actors::Actor + std::fmt::Debug, - IngestServiceMailbox< - A, - >: tower::Service< + IngestServiceMailbox: tower::Service< IngestRequest, Response = IngestResponse, Error = crate::IngestServiceError, Future = BoxFuture, - > - + tower::Service< + > + tower::Service< FetchRequest, Response = FetchResponse, Error = crate::IngestServiceError, Future = BoxFuture, - > - + tower::Service< + > + tower::Service< TailRequest, Response = FetchResponse, Error = crate::IngestServiceError, @@ -802,9 +887,8 @@ where pub struct IngestServiceGrpcClientAdapter { inner: T, #[allow(dead_code)] - connection_addrs_rx: tokio::sync::watch::Receiver< - std::collections::HashSet, - >, + connection_addrs_rx: + tokio::sync::watch::Receiver>, } impl IngestServiceGrpcClientAdapter { pub fn new( @@ -821,15 +905,16 @@ impl IngestServiceGrpcClientAdapter { } #[async_trait::async_trait] impl IngestService -for IngestServiceGrpcClientAdapter< - ingest_service_grpc_client::IngestServiceGrpcClient, -> + for IngestServiceGrpcClientAdapter> where - T: tonic::client::GrpcService + std::fmt::Debug + Clone + Send - + Sync + 'static, + T: tonic::client::GrpcService + + std::fmt::Debug + + Clone + + Send + + Sync + + 'static, T::ResponseBody: tonic::codegen::Body + Send + 'static, - ::Error: Into - + Send, + ::Error: Into + Send, T::Future: Send, { async fn ingest(&self, request: IngestRequest) -> crate::Result { @@ -838,10 +923,9 @@ where .ingest(request) .await .map(|response| response.into_inner()) - .map_err(|status| crate::error::grpc_status_to_service_error( - status, - IngestRequest::rpc_name(), - )) + .map_err(|status| { + crate::error::grpc_status_to_service_error(status, IngestRequest::rpc_name()) + }) } async fn fetch(&self, request: FetchRequest) -> crate::Result { self.inner @@ -849,10 +933,9 @@ where .fetch(request) .await .map(|response| response.into_inner()) - .map_err(|status| crate::error::grpc_status_to_service_error( - status, - FetchRequest::rpc_name(), - )) + .map_err(|status| { + crate::error::grpc_status_to_service_error(status, FetchRequest::rpc_name()) + }) } async fn tail(&self, request: TailRequest) -> crate::Result { self.inner @@ -860,10 +943,9 @@ where .tail(request) .await .map(|response| response.into_inner()) - .map_err(|status| crate::error::grpc_status_to_service_error( - status, - TailRequest::rpc_name(), - )) + .map_err(|status| { + crate::error::grpc_status_to_service_error(status, TailRequest::rpc_name()) + }) } } #[derive(Debug)] @@ -923,10 +1005,10 @@ pub mod ingest_service_grpc_client { dead_code, missing_docs, clippy::wildcard_imports, - clippy::let_unit_value, + clippy::let_unit_value )] - use tonic::codegen::*; use tonic::codegen::http::Uri; + use tonic::codegen::*; #[derive(Debug, Clone)] pub struct IngestServiceGrpcClient { inner: tonic::client::Grpc, @@ -965,14 +1047,13 @@ pub mod ingest_service_grpc_client { F: tonic::service::Interceptor, T::ResponseBody: Default, T: tonic::codegen::Service< - http::Request, - Response = http::Response< - >::ResponseBody, + http::Request, + Response = http::Response< + >::ResponseBody, + >, >, - >, - , - >>::Error: Into + std::marker::Send + std::marker::Sync, + >>::Error: + Into + std::marker::Send + std::marker::Sync, { IngestServiceGrpcClient::new(InterceptedService::new(inner, interceptor)) } @@ -1019,18 +1100,11 @@ pub mod ingest_service_grpc_client { &mut self, request: impl tonic::IntoRequest, ) -> std::result::Result, tonic::Status> { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::unknown( - format!("Service was not ready: {}", e.into()), - ) - })?; + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; let codec = tonic_prost::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static( - "/ingest_service.IngestService/Ingest", - ); + let path = http::uri::PathAndQuery::from_static("/ingest_service.IngestService/Ingest"); let mut req = request.into_request(); req.extensions_mut() .insert(GrpcMethod::new("ingest_service.IngestService", "Ingest")); @@ -1050,18 +1124,11 @@ pub mod ingest_service_grpc_client { &mut self, request: impl tonic::IntoRequest, ) -> std::result::Result, tonic::Status> { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::unknown( - format!("Service was not ready: {}", e.into()), - ) - })?; + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; let codec = tonic_prost::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static( - "/ingest_service.IngestService/Fetch", - ); + let path = http::uri::PathAndQuery::from_static("/ingest_service.IngestService/Fetch"); let mut req = request.into_request(); req.extensions_mut() .insert(GrpcMethod::new("ingest_service.IngestService", "Fetch")); @@ -1076,18 +1143,11 @@ pub mod ingest_service_grpc_client { &mut self, request: impl tonic::IntoRequest, ) -> std::result::Result, tonic::Status> { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::unknown( - format!("Service was not ready: {}", e.into()), - ) - })?; + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; let codec = tonic_prost::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static( - "/ingest_service.IngestService/Tail", - ); + let path = http::uri::PathAndQuery::from_static("/ingest_service.IngestService/Tail"); let mut req = request.into_request(); req.extensions_mut() .insert(GrpcMethod::new("ingest_service.IngestService", "Tail")); @@ -1102,7 +1162,7 @@ pub mod ingest_service_grpc_server { dead_code, missing_docs, clippy::wildcard_imports, - clippy::let_unit_value, + clippy::let_unit_value )] use tonic::codegen::*; /// Generated trait containing gRPC methods that should be implemented for use with IngestServiceGrpcServer. @@ -1165,10 +1225,7 @@ pub mod ingest_service_grpc_server { max_encoding_message_size: None, } } - pub fn with_interceptor( - inner: T, - interceptor: F, - ) -> InterceptedService + pub fn with_interceptor(inner: T, interceptor: F) -> InterceptedService where F: tonic::service::Interceptor, { @@ -1223,15 +1280,9 @@ pub mod ingest_service_grpc_server { "/ingest_service.IngestService/Ingest" => { #[allow(non_camel_case_types)] struct IngestSvc(pub Arc); - impl< - T: IngestServiceGrpc, - > tonic::server::UnaryService - for IngestSvc { + impl tonic::server::UnaryService for IngestSvc { type Response = super::IngestResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; + type Future = BoxFuture, tonic::Status>; fn call( &mut self, request: tonic::Request, @@ -1268,14 +1319,9 @@ pub mod ingest_service_grpc_server { "/ingest_service.IngestService/Fetch" => { #[allow(non_camel_case_types)] struct FetchSvc(pub Arc); - impl< - T: IngestServiceGrpc, - > tonic::server::UnaryService for FetchSvc { + impl tonic::server::UnaryService for FetchSvc { type Response = super::FetchResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; + type Future = BoxFuture, tonic::Status>; fn call( &mut self, request: tonic::Request, @@ -1312,14 +1358,9 @@ pub mod ingest_service_grpc_server { "/ingest_service.IngestService/Tail" => { #[allow(non_camel_case_types)] struct TailSvc(pub Arc); - impl< - T: IngestServiceGrpc, - > tonic::server::UnaryService for TailSvc { + impl tonic::server::UnaryService for TailSvc { type Response = super::FetchResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; + type Future = BoxFuture, tonic::Status>; fn call( &mut self, request: tonic::Request, @@ -1353,25 +1394,19 @@ pub mod ingest_service_grpc_server { }; Box::pin(fut) } - _ => { - Box::pin(async move { - let mut response = http::Response::new( - tonic::body::Body::default(), - ); - let headers = response.headers_mut(); - headers - .insert( - tonic::Status::GRPC_STATUS, - (tonic::Code::Unimplemented as i32).into(), - ); - headers - .insert( - http::header::CONTENT_TYPE, - tonic::metadata::GRPC_CONTENT_TYPE, - ); - Ok(response) - }) - } + _ => Box::pin(async move { + let mut response = http::Response::new(tonic::body::Body::default()); + let headers = response.headers_mut(); + headers.insert( + tonic::Status::GRPC_STATUS, + (tonic::Code::Unimplemented as i32).into(), + ); + headers.insert( + http::header::CONTENT_TYPE, + tonic::metadata::GRPC_CONTENT_TYPE, + ); + Ok(response) + }), } } } diff --git a/quickwit/quickwit-ingest/src/ingest_api_service.rs b/quickwit/quickwit-ingest/src/ingest_api_service.rs index 8dc08487459..c7d2d67d31b 100644 --- a/quickwit/quickwit-ingest/src/ingest_api_service.rs +++ b/quickwit/quickwit-ingest/src/ingest_api_service.rs @@ -27,6 +27,7 @@ use quickwit_proto::ingest::RateLimitingCause; use tracing::{error, info}; use ulid::Ulid; +use crate::metrics::{DOCS_BYTES_TOTAL, DOCS_TOTAL}; use crate::notifications::Notifications; use crate::{ CommitType, CreateQueueIfNotExistsRequest, CreateQueueIfNotExistsResponse, CreateQueueRequest, @@ -202,12 +203,11 @@ impl IngestApiService { num_docs += batch_num_docs; counter!( - parent: &crate::metrics::DOCS_BYTES_TOTAL, + parent: DOCS_BYTES_TOTAL, "validity" => "valid", ) .increment(batch_num_bytes as u64); - counter!(parent: &crate::metrics::DOCS_TOTAL, "validity" => "valid") - .increment(batch_num_docs as u64); + counter!(parent: DOCS_TOTAL, "validity" => "valid").increment(batch_num_docs as u64); } // TODO we could fsync here and disable autosync to have better i/o perfs. Ok(( diff --git a/quickwit/quickwit-ingest/src/ingest_v2/broadcast/local_shards.rs b/quickwit/quickwit-ingest/src/ingest_v2/broadcast/local_shards.rs index 6306399121a..c16689a9b41 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/broadcast/local_shards.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/broadcast/local_shards.rs @@ -30,6 +30,9 @@ use tracing::{debug, warn}; use super::{BROADCAST_INTERVAL_PERIOD, make_key, parse_key}; use crate::RateMibPerSec; +use crate::ingest_v2::metrics::{ + CLOSED_SHARDS, OPEN_SHARDS, SHARD_LT_THROUGHPUT_MIB, SHARD_ST_THROUGHPUT_MIB, +}; use crate::ingest_v2::state::WeakIngesterState; const ONE_MIB: ByteSize = ByteSize::mib(1); @@ -194,10 +197,8 @@ impl ShardThroughputTimeSeriesMap { .average() .as_u64() .div_ceil(ONE_MIB.as_u64()); - crate::ingest_v2::metrics::SHARD_ST_THROUGHPUT_MIB - .record(short_term_ingestion_rate_mib_per_sec_u64 as f64); - crate::ingest_v2::metrics::SHARD_LT_THROUGHPUT_MIB - .record(long_term_ingestion_rate_mib_per_sec_u64 as f64); + SHARD_ST_THROUGHPUT_MIB.record(short_term_ingestion_rate_mib_per_sec_u64 as f64); + SHARD_LT_THROUGHPUT_MIB.record(long_term_ingestion_rate_mib_per_sec_u64 as f64); let short_term_ingestion_rate = RateMibPerSec(short_term_ingestion_rate_mib_per_sec_u64 as u16); @@ -297,8 +298,8 @@ impl BroadcastLocalShardsTask { } } } - crate::ingest_v2::metrics::OPEN_SHARDS.set(num_open_shards as f64); - crate::ingest_v2::metrics::CLOSED_SHARDS.set(num_closed_shards as f64); + OPEN_SHARDS.set(num_open_shards as f64); + CLOSED_SHARDS.set(num_closed_shards as f64); let snapshot = LocalShardsSnapshot { per_source_shard_infos, diff --git a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs index ef4a4b1a6ad..480e4dcdd74 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs @@ -62,8 +62,9 @@ use super::replication::{ }; use super::state::{IngesterState, InnerIngesterState, WeakIngesterState}; use crate::ingest_v2::doc_mapper::get_or_try_build_doc_mapper; -use crate::ingest_v2::metrics::report_wal_usage; +use crate::ingest_v2::metrics::{RESET_SHARDS_OPERATIONS_TOTAL, report_wal_usage}; use crate::ingest_v2::models::IngesterShardType; +use crate::metrics::{DOCS_BYTES_TOTAL, DOCS_TOTAL}; use crate::mrecordlog_async::MultiRecordLogAsync; use crate::{FollowerId, estimate_size, with_lock_metrics}; @@ -332,7 +333,7 @@ impl Ingester { now.elapsed().pretty_display() ); counter!( - parent: &crate::ingest_v2::metrics::RESET_SHARDS_OPERATIONS_TOTAL, + parent: RESET_SHARDS_OPERATIONS_TOTAL, "status" => "success", ) .increment(1); @@ -344,7 +345,7 @@ impl Ingester { warn!("advise reset shards request failed: {error}"); counter!( - parent: &crate::ingest_v2::metrics::RESET_SHARDS_OPERATIONS_TOTAL, + parent: RESET_SHARDS_OPERATIONS_TOTAL, "status" => "error", ) .increment(1); @@ -353,7 +354,7 @@ impl Ingester { warn!("advise reset shards request timed out"); counter!( - parent: &crate::ingest_v2::metrics::RESET_SHARDS_OPERATIONS_TOTAL, + parent: RESET_SHARDS_OPERATIONS_TOTAL, "status" => "timeout", ) .increment(1); @@ -570,12 +571,12 @@ impl Ingester { if valid_doc_batch.is_empty() { counter!( - parent: &crate::metrics::DOCS_TOTAL, + parent: DOCS_TOTAL, "validity" => "invalid", ) .increment(parse_failures.len() as u64); counter!( - parent: &crate::metrics::DOCS_BYTES_TOTAL, + parent: DOCS_BYTES_TOTAL, "validity" => "invalid", ) .increment(original_batch_num_bytes); @@ -593,23 +594,23 @@ impl Ingester { }; counter!( - parent: &crate::metrics::DOCS_TOTAL, + parent: DOCS_TOTAL, "validity" => "valid", ) .increment(valid_doc_batch.num_docs() as u64); counter!( - parent: &crate::metrics::DOCS_BYTES_TOTAL, + parent: DOCS_BYTES_TOTAL, "validity" => "valid", ) .increment(valid_doc_batch.num_bytes() as u64); if !parse_failures.is_empty() { counter!( - parent: &crate::metrics::DOCS_TOTAL, + parent: DOCS_TOTAL, "validity" => "invalid", ) .increment(parse_failures.len() as u64); counter!( - parent: &crate::metrics::DOCS_BYTES_TOTAL, + parent: DOCS_BYTES_TOTAL, "validity" => "invalid", ) .increment(original_batch_num_bytes - valid_doc_batch.num_bytes() as u64); diff --git a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs index 50b8504eb81..6e7bf351722 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs @@ -16,7 +16,7 @@ use std::sync::LazyLock; use mrecordlog::ResourceUsage; use quickwit_common::metrics::{exponential_buckets, linear_buckets}; -use quickwit_metrics::{Counter, Gauge, Histogram, Labels, counter, gauge, histogram}; +use quickwit_metrics::{Counter, Gauge, Histogram, counter, gauge, histogram}; static INGEST_RESULT_TOTAL: LazyLock = LazyLock::new(|| { counter!( @@ -79,8 +79,6 @@ pub(super) static INGEST_ATTEMPTS: LazyLock = LazyLock::new(|| { ) }); -pub(super) const AZ_ROUTING_LABELS: Labels<1> = Labels::new(["az_routing"]); - pub(super) static RESET_SHARDS_OPERATIONS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "reset_shards_operations_total", @@ -139,8 +137,6 @@ pub(super) static WAL_ACQUIRE_LOCK_REQUEST_DURATION_SECS: LazyLock = ) }); -pub(super) const WAL_LOCK_METRIC_LABELS: Labels<2> = Labels::new(["operation", "type"]); - pub(super) static WAL_DISK_USED_BYTES: LazyLock = LazyLock::new(|| { gauge!( name: "wal_disk_used_bytes", diff --git a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs index adf562f9bc1..5764120459a 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs @@ -38,6 +38,7 @@ use super::models::IngesterShard; use super::mrecordlog_utils::check_enough_capacity; use super::state::IngesterState; use crate::ingest_v2::mrecordlog_utils::{AppendDocBatchError, append_non_empty_doc_batch}; +use crate::metrics::{REPLICATED_NUM_BYTES_TOTAL, REPLICATED_NUM_DOCS_TOTAL}; use crate::{estimate_size, with_lock_metrics}; pub(super) const SYN_REPLICATION_STREAM_CAPACITY: usize = 5; @@ -667,8 +668,8 @@ impl ReplicationTask { .expect("replica shard should be initialized") .set_replication_position_inclusive(current_position_inclusive.clone(), now); - crate::metrics::REPLICATED_NUM_BYTES_TOTAL.increment(batch_num_bytes); - crate::metrics::REPLICATED_NUM_DOCS_TOTAL.increment(batch_num_docs); + REPLICATED_NUM_BYTES_TOTAL.increment(batch_num_bytes); + REPLICATED_NUM_DOCS_TOTAL.increment(batch_num_docs); let replicate_success = ReplicateSuccess { subrequest_id: subrequest.subrequest_id, diff --git a/quickwit/quickwit-ingest/src/ingest_v2/router.rs b/quickwit/quickwit-ingest/src/ingest_v2/router.rs index fb03c76609d..93bc4b8a1eb 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/router.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/router.rs @@ -49,6 +49,7 @@ use super::routing_table::RoutingTable; use super::workbench::IngestWorkbench; use super::{IngesterPool, metrics, pending_subrequests}; use crate::get_ingest_router_buffer_size; +use crate::ingest_v2::metrics::INGEST_ATTEMPTS; /// Duration after which ingest requests time out with [`IngestV2Error::Timeout`]. fn ingest_request_timeout() -> Duration { @@ -369,10 +370,9 @@ impl IngestRouter { let az_locality = state_guard .routing_table .classify_az_locality(&ingester_node.node_id, &self.ingester_pool); - let labels = crate::ingest_v2::metrics::AZ_ROUTING_LABELS.with_values([az_locality]); counter!( - parent: &crate::ingest_v2::metrics::INGEST_ATTEMPTS, - labels: &labels, + parent: INGEST_ATTEMPTS, + "az_routing" => az_locality, ) .increment(1); let persist_subrequest = PersistSubrequest { diff --git a/quickwit/quickwit-ingest/src/lib.rs b/quickwit/quickwit-ingest/src/lib.rs index 07e96f75731..e437fdb2704 100644 --- a/quickwit/quickwit-ingest/src/lib.rs +++ b/quickwit/quickwit-ingest/src/lib.rs @@ -109,11 +109,10 @@ pub async fn start_ingest_api_service( macro_rules! with_lock_metrics { ($future:expr, $operation:expr, $kind:expr) => { { - let labels = - $crate::ingest_v2::metrics::WAL_LOCK_METRIC_LABELS.with_values([$operation, $kind]); quickwit_metrics::gauge!( - parent: &$crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT, - labels: &labels, + parent: $crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT, + "operation" => $operation, + "type" => $kind, ) .increment(1.0); @@ -128,13 +127,15 @@ macro_rules! with_lock_metrics { ); } quickwit_metrics::gauge!( - parent: &$crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT, - labels: &labels, + parent: $crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT, + "operation" => $operation, + "type" => $kind, ) .decrement(1.0); quickwit_metrics::histogram!( - parent: &$crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUEST_DURATION_SECS, - labels: &labels, + parent: $crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUEST_DURATION_SECS, + "operation" => $operation, + "type" => $kind, ) .record(elapsed.as_secs_f64()); diff --git a/quickwit/quickwit-jaeger/src/lib.rs b/quickwit/quickwit-jaeger/src/lib.rs index caac5c4c2e9..8a89b6984a8 100644 --- a/quickwit/quickwit-jaeger/src/lib.rs +++ b/quickwit/quickwit-jaeger/src/lib.rs @@ -22,7 +22,7 @@ use itertools::{Either, Itertools}; use prost::Message; use prost_types::{Duration as WellKnownDuration, Timestamp as WellKnownTimestamp}; use quickwit_config::JaegerConfig; -use quickwit_metrics::{counter, histogram}; +use quickwit_metrics::{counter, histogram, label_values}; use quickwit_opentelemetry::otlp::{ Event as QwEvent, Link as QwLink, OTEL_TRACES_INDEX_ID, Span as QwSpan, SpanFingerprint, SpanId, SpanKind as QwSpanKind, SpanStatus as QwSpanStatus, TraceId, @@ -52,6 +52,12 @@ use tonic::Status; use tracing::field::Empty; use tracing::{Span as RuntimeSpan, debug, error, instrument, warn}; +use crate::metrics::{ + FETCHED_SPANS_TOTAL, FETCHED_TRACES_TOTAL, OPERATION_INDEX_ERROR_LABELS, + OPERATION_INDEX_LABELS, REQUEST_DURATION_SECONDS, REQUEST_ERRORS_TOTAL, + TRANSFERRED_BYTES_TOTAL, +}; + mod metrics; mod v1; mod v2; @@ -414,20 +420,22 @@ impl JaegerService { current_span.record("num_spans", num_spans_total); current_span.record("num_bytes", num_bytes_total); - let labels = crate::metrics::OPERATION_INDEX_LABELS - .with_values([operation_name, OTEL_TRACES_INDEX_ID]); - counter!(parent: &crate::metrics::FETCHED_TRACES_TOTAL, labels: &labels) - .increment(num_traces); + counter!( + parent: FETCHED_TRACES_TOTAL, + labels: label_values!( + OPERATION_INDEX_LABELS, + [operation_name, OTEL_TRACES_INDEX_ID] + ), + ) + .increment(num_traces); let elapsed = request_start.elapsed().as_secs_f64(); - let duration_labels = crate::metrics::OPERATION_INDEX_ERROR_LABELS.with_values([ - operation_name, - OTEL_TRACES_INDEX_ID, - "false", - ]); histogram!( - parent: &crate::metrics::REQUEST_DURATION_SECONDS, - labels: &duration_labels, + parent: REQUEST_DURATION_SECONDS, + labels: label_values!( + OPERATION_INDEX_ERROR_LABELS, + [operation_name, OTEL_TRACES_INDEX_ID, "false",] + ), ) .record(elapsed); }); @@ -436,30 +444,33 @@ impl JaegerService { } pub(crate) fn record_error(operation_name: &'static str, request_start: Instant) { - let labels = - crate::metrics::OPERATION_INDEX_LABELS.with_values([operation_name, OTEL_TRACES_INDEX_ID]); - counter!(parent: &crate::metrics::REQUEST_ERRORS_TOTAL, labels: &labels).increment(1); + counter!( + parent: REQUEST_ERRORS_TOTAL, + labels: label_values!( + OPERATION_INDEX_LABELS, + [operation_name, OTEL_TRACES_INDEX_ID] + ), + ) + .increment(1); let elapsed = request_start.elapsed().as_secs_f64(); - let duration_labels = crate::metrics::OPERATION_INDEX_ERROR_LABELS.with_values([ - operation_name, - OTEL_TRACES_INDEX_ID, - "true", - ]); histogram!( - parent: &crate::metrics::REQUEST_DURATION_SECONDS, - labels: &duration_labels, + parent: REQUEST_DURATION_SECONDS, + labels: label_values!( + OPERATION_INDEX_ERROR_LABELS, + [operation_name, OTEL_TRACES_INDEX_ID, "true",] + ), ) .record(elapsed); } pub(crate) fn record_send(operation_name: &'static str, num_spans: usize, num_bytes: usize) { - let labels = - crate::metrics::OPERATION_INDEX_LABELS.with_values([operation_name, OTEL_TRACES_INDEX_ID]); - counter!(parent: &crate::metrics::FETCHED_SPANS_TOTAL, labels: &labels) - .increment(num_spans as u64); - counter!(parent: &crate::metrics::TRANSFERRED_BYTES_TOTAL, labels: &labels) - .increment(num_bytes as u64); + let labels = label_values!( + OPERATION_INDEX_LABELS, + [operation_name, OTEL_TRACES_INDEX_ID] + ); + counter!(parent: FETCHED_SPANS_TOTAL, labels: labels).increment(num_spans as u64); + counter!(parent: TRANSFERRED_BYTES_TOTAL, labels: labels).increment(num_bytes as u64); } #[allow(deprecated)] diff --git a/quickwit/quickwit-jaeger/src/v1.rs b/quickwit/quickwit-jaeger/src/v1.rs index dff0b08e26b..ab9f0379db1 100644 --- a/quickwit/quickwit-jaeger/src/v1.rs +++ b/quickwit/quickwit-jaeger/src/v1.rs @@ -17,7 +17,7 @@ use std::time::Instant; use async_trait::async_trait; -use quickwit_metrics::{counter, histogram}; +use quickwit_metrics::{counter, histogram, label_values}; use quickwit_opentelemetry::otlp::{ OTEL_TRACES_INDEX_ID, extract_otel_traces_index_id_patterns_from_metadata, }; @@ -28,6 +28,10 @@ use quickwit_proto::jaeger::storage::v1::{ }; use tonic::{Request, Response, Status}; +use crate::metrics::{ + OPERATION_INDEX_ERROR_LABELS, OPERATION_INDEX_LABELS, REQUEST_DURATION_SECONDS, + REQUEST_ERRORS_TOTAL, REQUESTS_TOTAL, +}; use crate::{JaegerService, SpanStream}; macro_rules! metrics { @@ -35,10 +39,10 @@ macro_rules! metrics { let start = std::time::Instant::now(); let operation = stringify!($operation); let index = $index; - let labels = crate::metrics::OPERATION_INDEX_LABELS.with_values([operation, index]); + let labels = label_values!(OPERATION_INDEX_LABELS, [operation, index]); counter!( - parent: &crate::metrics::REQUESTS_TOTAL, - labels: &labels, + parent: REQUESTS_TOTAL, + labels: labels, ) .increment(1); let (res, is_error) = match $expr { @@ -47,19 +51,17 @@ macro_rules! metrics { }, err @ Err(_) => { counter!( - parent: &crate::metrics::REQUEST_ERRORS_TOTAL, - labels: &labels, + parent: REQUEST_ERRORS_TOTAL, + labels: labels, ) .increment(1); (err, "true") }, }; let elapsed = start.elapsed().as_secs_f64(); - let duration_labels = - crate::metrics::OPERATION_INDEX_ERROR_LABELS.with_values([operation, index, is_error]); histogram!( - parent: &crate::metrics::REQUEST_DURATION_SECONDS, - labels: &duration_labels, + parent: REQUEST_DURATION_SECONDS, + labels: label_values!(OPERATION_INDEX_ERROR_LABELS, [operation, index, is_error]), ) .record(elapsed); diff --git a/quickwit/quickwit-jaeger/src/v2.rs b/quickwit/quickwit-jaeger/src/v2.rs index 4e8f4cf8532..bee978dbd30 100644 --- a/quickwit/quickwit-jaeger/src/v2.rs +++ b/quickwit/quickwit-jaeger/src/v2.rs @@ -19,7 +19,7 @@ use std::time::Instant; use async_trait::async_trait; use prost_types::Timestamp as WellKnownTimestamp; -use quickwit_metrics::{counter, histogram}; +use quickwit_metrics::{counter, histogram, label_values}; use quickwit_opentelemetry::otlp::{ OTEL_TRACES_INDEX_ID, Span as QwSpan, TraceId, extract_otel_traces_index_id_patterns_from_metadata, @@ -51,6 +51,10 @@ use tonic::{Request, Response, Status}; use tracing::field::Empty; use tracing::{Span as RuntimeSpan, debug, error, instrument}; +use crate::metrics::{ + FETCHED_TRACES_TOTAL, OPERATION_INDEX_ERROR_LABELS, OPERATION_INDEX_LABELS, + REQUEST_DURATION_SECONDS, REQUEST_ERRORS_TOTAL, REQUESTS_TOTAL, +}; use crate::{ JaegerService, TimeIntervalSecs, TracesDataStream, get_operations_impl, get_services_impl, json_deserialize, record_error, record_send, to_duration_millis, @@ -61,10 +65,10 @@ macro_rules! metrics { let start = std::time::Instant::now(); let operation = stringify!($operation); let index = $index; - let labels = crate::metrics::OPERATION_INDEX_LABELS.with_values([operation, index]); + let labels = label_values!(OPERATION_INDEX_LABELS, [operation, index]); counter!( - parent: &crate::metrics::REQUESTS_TOTAL, - labels: &labels, + parent: REQUESTS_TOTAL, + labels: labels, ) .increment(1); let (res, is_error) = match $expr { @@ -73,19 +77,17 @@ macro_rules! metrics { }, err @ Err(_) => { counter!( - parent: &crate::metrics::REQUEST_ERRORS_TOTAL, - labels: &labels, + parent: REQUEST_ERRORS_TOTAL, + labels: labels, ) .increment(1); (err, "true") }, }; let elapsed = start.elapsed().as_secs_f64(); - let duration_labels = - crate::metrics::OPERATION_INDEX_ERROR_LABELS.with_values([operation, index, is_error]); histogram!( - parent: &crate::metrics::REQUEST_DURATION_SECONDS, - labels: &duration_labels, + parent: REQUEST_DURATION_SECONDS, + labels: label_values!(OPERATION_INDEX_ERROR_LABELS, [operation, index, is_error]), ) .record(elapsed); @@ -441,20 +443,22 @@ async fn stream_otel_spans_impl( record_send(operation_name, num_spans, num_bytes); - let labels = - crate::metrics::OPERATION_INDEX_LABELS.with_values([operation_name, OTEL_TRACES_INDEX_ID]); - counter!(parent: &crate::metrics::FETCHED_TRACES_TOTAL, labels: &labels) - .increment(trace_ids.len() as u64); + counter!( + parent: FETCHED_TRACES_TOTAL, + labels: label_values!( + OPERATION_INDEX_LABELS, + [operation_name, OTEL_TRACES_INDEX_ID] + ), + ) + .increment(trace_ids.len() as u64); let elapsed = request_start.elapsed().as_secs_f64(); - let duration_labels = crate::metrics::OPERATION_INDEX_ERROR_LABELS.with_values([ - operation_name, - OTEL_TRACES_INDEX_ID, - "false", - ]); histogram!( - parent: &crate::metrics::REQUEST_DURATION_SECONDS, - labels: &duration_labels, + parent: REQUEST_DURATION_SECONDS, + labels: label_values!( + OPERATION_INDEX_ERROR_LABELS, + [operation_name, OTEL_TRACES_INDEX_ID, "false",] + ), ) .record(elapsed); diff --git a/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs b/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs index b7b17845c57..bf42e949c36 100644 --- a/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs +++ b/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs @@ -38,6 +38,8 @@ use serde::Serialize; use tantivy::Inventory; use tracing::{debug, info}; +use crate::metrics::ONGOING_NUM_DELETE_OPERATIONS_TOTAL; + const PLANNER_REFRESH_INTERVAL: Duration = Duration::from_secs(60); const NUM_STALE_SPLITS_TO_FETCH: usize = 1000; @@ -206,10 +208,9 @@ impl DeleteTaskPlanner { let index_label = quickwit_common::metrics::index_label(self.index_uid.index_id.as_str()) .to_string(); - let labels = crate::metrics::INDEX_LABELS.with_values([index_label]); gauge!( - parent: &crate::metrics::ONGOING_NUM_DELETE_OPERATIONS_TOTAL, - labels: &labels, + parent: ONGOING_NUM_DELETE_OPERATIONS_TOTAL, + "index" => index_label, ) .set(self.ongoing_delete_operations_inventory.list().len() as f64); } diff --git a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs index 7f0df9d89f6..00661c9741d 100644 --- a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs +++ b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::borrow::Cow; use std::collections::HashMap; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -33,6 +32,8 @@ use quickwit_storage::{Storage, StorageResolver}; use serde::Serialize; use tracing::{debug, error, info}; +use crate::metrics::{GC_DELETED_BYTES, GC_DELETED_SPLITS, GC_RUNS, GC_SECONDS_TOTAL}; + const RUN_INTERVAL: Duration = Duration::from_secs(10 * 60); // 10 minutes /// Result of a GC run (tantivy or parquet). @@ -54,25 +55,21 @@ impl GcRunResult { } } -fn gc_metrics(split_type: &str) -> GcMetrics { - let split_type = split_type.to_string(); - let success_labels = crate::metrics::GC_RESULT_SPLIT_TYPE_LABELS - .with_values([Cow::Borrowed("success"), Cow::Owned(split_type.clone())]); - let split_type_labels = crate::metrics::GC_SPLIT_TYPE_LABELS.with_values([split_type.clone()]); - let error_labels = crate::metrics::GC_RESULT_SPLIT_TYPE_LABELS - .with_values([Cow::Borrowed("error"), Cow::Owned(split_type)]); +fn gc_metrics(split_type: &'static str) -> GcMetrics { GcMetrics { deleted_splits: counter!( - parent: &crate::metrics::GC_DELETED_SPLITS, - labels: &success_labels, - ), - deleted_bytes: counter!( - parent: &crate::metrics::GC_DELETED_BYTES, - labels: &split_type_labels, + parent: GC_DELETED_SPLITS, + "result" => "success", + "split_type" => split_type, ), failed_splits: counter!( - parent: &crate::metrics::GC_DELETED_SPLITS, - labels: &error_labels, + parent: GC_DELETED_SPLITS, + "result" => "error", + "split_type" => split_type, + ), + deleted_bytes: counter!( + parent: GC_DELETED_BYTES, + "split_type" => split_type, ), } } @@ -209,7 +206,7 @@ impl GarbageCollector { let tantivy_run_duration = tantivy_start.elapsed().as_secs(); counter!( - parent: &crate::metrics::GC_SECONDS_TOTAL, + parent: GC_SECONDS_TOTAL, "split_type" => "tantivy", ) .increment(tantivy_run_duration); @@ -218,7 +215,7 @@ impl GarbageCollector { Ok(removal_info) => { self.counters.num_successful_gc_run += 1; counter!( - parent: &crate::metrics::GC_RUNS, + parent: GC_RUNS, "result" => "success", "split_type" => "tantivy", ) @@ -242,7 +239,7 @@ impl GarbageCollector { Err(error) => { self.counters.num_failed_gc_run += 1; counter!( - parent: &crate::metrics::GC_RUNS, + parent: GC_RUNS, "result" => "error", "split_type" => "tantivy", ) @@ -270,7 +267,7 @@ impl GarbageCollector { let parquet_run_duration = parquet_start.elapsed().as_secs(); counter!( - parent: &crate::metrics::GC_SECONDS_TOTAL, + parent: GC_SECONDS_TOTAL, "split_type" => "parquet", ) .increment(parquet_run_duration); @@ -279,7 +276,7 @@ impl GarbageCollector { Ok(removal_info) => { self.counters.num_successful_gc_run += 1; counter!( - parent: &crate::metrics::GC_RUNS, + parent: GC_RUNS, "result" => "success", "split_type" => "parquet", ) @@ -299,7 +296,7 @@ impl GarbageCollector { Err(error) => { self.counters.num_failed_gc_run += 1; counter!( - parent: &crate::metrics::GC_RUNS, + parent: GC_RUNS, "result" => "error", "split_type" => "parquet", ) diff --git a/quickwit/quickwit-janitor/src/metrics.rs b/quickwit/quickwit-janitor/src/metrics.rs index a6e33562dcb..46bc94bc012 100644 --- a/quickwit/quickwit-janitor/src/metrics.rs +++ b/quickwit/quickwit-janitor/src/metrics.rs @@ -14,43 +14,37 @@ use std::sync::LazyLock; -use quickwit_metrics::{Counter, Gauge, Labels, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, counter, gauge}; pub(crate) static ONGOING_NUM_DELETE_OPERATIONS_TOTAL: LazyLock = LazyLock::new(|| { gauge!( name: "ongoing_num_delete_operations_total", description: "Num of ongoing delete operations (per index).", - subsystem: "quickwit_janitor", + subsystem: "janitor", ) }); -pub(crate) const INDEX_LABELS: Labels<1> = Labels::new(["index"]); - pub(crate) static GC_DELETED_SPLITS: LazyLock = LazyLock::new(|| { counter!( name: "gc_deleted_splits_total", description: "Total number of splits deleted by the garbage collector.", - subsystem: "quickwit_janitor", + subsystem: "janitor", ) }); -pub(crate) const GC_RESULT_SPLIT_TYPE_LABELS: Labels<2> = Labels::new(["result", "split_type"]); - pub(crate) static GC_DELETED_BYTES: LazyLock = LazyLock::new(|| { counter!( name: "gc_deleted_bytes_total", description: "Total number of bytes deleted by the garbage collector.", - subsystem: "quickwit_janitor", + subsystem: "janitor", ) }); -pub(crate) const GC_SPLIT_TYPE_LABELS: Labels<1> = Labels::new(["split_type"]); - pub(crate) static GC_RUNS: LazyLock = LazyLock::new(|| { counter!( name: "gc_runs_total", description: "Total number of garbage collector execition.", - subsystem: "quickwit_janitor", + subsystem: "janitor", ) }); @@ -58,6 +52,6 @@ pub(crate) static GC_SECONDS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "gc_seconds_total", description: "Total time spent running the garbage collector", - subsystem: "quickwit_janitor", + subsystem: "janitor", ) }); diff --git a/quickwit/quickwit-lambda-client/src/invoker.rs b/quickwit/quickwit-lambda-client/src/invoker.rs index 6b67431fdc5..4035b3a23b4 100644 --- a/quickwit/quickwit-lambda-client/src/invoker.rs +++ b/quickwit/quickwit-lambda-client/src/invoker.rs @@ -30,6 +30,11 @@ use quickwit_proto::search::{LambdaSearchResponses, LambdaSingleSplitResult, Lea use quickwit_search::{LambdaLeafSearchInvoker, SearchError}; use tracing::{debug, info, instrument, warn}; +use crate::metrics::{ + LEAF_SEARCH_DURATION_SECONDS, LEAF_SEARCH_REQUEST_PAYLOAD_SIZE_BYTES, + LEAF_SEARCH_REQUESTS_TOTAL, LEAF_SEARCH_RESPONSE_PAYLOAD_SIZE_BYTES, +}; + /// Upper bound on the retry-after hint we will honor from Lambda rate-limit responses. const MAX_RETRY_AFTER: Duration = Duration::from_secs(10); @@ -170,15 +175,14 @@ impl LambdaLeafSearchInvoker for AwsLambdaInvoker { let result = self.invoke_leaf_search_with_retry(request).await; let elapsed = start.elapsed().as_secs_f64(); let status = if result.is_ok() { "success" } else { "error" }; - let labels = crate::metrics::STATUS_LABELS.with_values([status]); counter!( - parent: &crate::metrics::LEAF_SEARCH_REQUESTS_TOTAL, - labels: &labels, + parent: LEAF_SEARCH_REQUESTS_TOTAL, + "status" => status, ) .increment(1); histogram!( - parent: &crate::metrics::LEAF_SEARCH_DURATION_SECONDS, - labels: &labels, + parent: LEAF_SEARCH_DURATION_SECONDS, + "status" => status, ) .record(elapsed); result @@ -234,7 +238,7 @@ impl AwsLambdaInvoker { let payload_json = serde_json::to_vec(&payload) .map_err(|e| SearchError::Internal(format!("JSON serialization error: {}", e)))?; - crate::metrics::LEAF_SEARCH_REQUEST_PAYLOAD_SIZE_BYTES.record(payload_json.len() as f64); + LEAF_SEARCH_REQUEST_PAYLOAD_SIZE_BYTES.record(payload_json.len() as f64); debug!( payload_size = payload_json.len(), @@ -274,8 +278,7 @@ impl AwsLambdaInvoker { .payload() .ok_or_else(|| SearchError::Internal("no response payload from Lambda".into()))?; - crate::metrics::LEAF_SEARCH_RESPONSE_PAYLOAD_SIZE_BYTES - .record(response_payload.as_ref().len() as f64); + LEAF_SEARCH_RESPONSE_PAYLOAD_SIZE_BYTES.record(response_payload.as_ref().len() as f64); let lambda_response: LambdaSearchResponsePayload = serde_json::from_slice(response_payload.as_ref()) diff --git a/quickwit/quickwit-lambda-client/src/metrics.rs b/quickwit/quickwit-lambda-client/src/metrics.rs index dd2ff87f189..75e56577816 100644 --- a/quickwit/quickwit-lambda-client/src/metrics.rs +++ b/quickwit/quickwit-lambda-client/src/metrics.rs @@ -17,9 +17,7 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; -use quickwit_metrics::{Counter, Histogram, Labels, counter, histogram}; - -pub(crate) const STATUS_LABELS: Labels<1> = Labels::new(["status"]); +use quickwit_metrics::{Counter, Histogram, counter, histogram}; /// From 100ms to 73s seconds fn duration_buckets() -> Vec { diff --git a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs index 61e2b75bd50..07c0fb1e768 100644 --- a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs +++ b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs @@ -751,7 +751,7 @@ fn labels_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: LABELS_1.with_values(["GET"]) + labels: label_values!(LABELS_1, ["GET"]) ) .increment(1); }); @@ -761,7 +761,7 @@ fn labels_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: LABELS_3.with_values(["GET", "/health", "200"]) + labels: label_values!(LABELS_3, ["GET", "/health", "200"]) ) .increment(1); }); @@ -771,7 +771,7 @@ fn labels_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: LABELS_1.with_values(["GET".to_string()]) + labels: label_values!(LABELS_1, ["GET".to_string()]) ) .increment(1); }); @@ -787,7 +787,7 @@ fn labels_counter(c: &mut Criterion) { idx += 1; counter!( parent: PARENT_COUNTER, - labels: LABELS_1.with_values([m]) + labels: label_values!(LABELS_1, [m]) ) .increment(1); }); @@ -806,7 +806,7 @@ fn labels_gauge(c: &mut Criterion) { b.iter(|| { gauge!( parent: PARENT_GAUGE, - labels: LABELS_1.with_values(["GET"]) + labels: label_values!(LABELS_1, ["GET"]) ) .set(42.0); }); @@ -816,7 +816,7 @@ fn labels_gauge(c: &mut Criterion) { b.iter(|| { gauge!( parent: PARENT_GAUGE, - labels: LABELS_3.with_values(["GET", "/health", "200"]) + labels: label_values!(LABELS_3, ["GET", "/health", "200"]) ) .set(42.0); }); @@ -835,7 +835,7 @@ fn labels_histogram(c: &mut Criterion) { b.iter(|| { histogram!( parent: PARENT_HISTOGRAM, - labels: LABELS_1.with_values(["GET"]) + labels: label_values!(LABELS_1, ["GET"]) ) .record(0.123); }); @@ -845,7 +845,7 @@ fn labels_histogram(c: &mut Criterion) { b.iter(|| { histogram!( parent: PARENT_HISTOGRAM, - labels: LABELS_3.with_values(["GET", "/health", "200"]) + labels: label_values!(LABELS_3, ["GET", "/health", "200"]) ) .record(0.123); }); diff --git a/quickwit/quickwit-metrics/examples/http_service.rs b/quickwit/quickwit-metrics/examples/http_service.rs index 70e9012b112..17c6cceeab9 100644 --- a/quickwit/quickwit-metrics/examples/http_service.rs +++ b/quickwit/quickwit-metrics/examples/http_service.rs @@ -86,22 +86,19 @@ static HTTP_ACTIVE_CONNECTIONS_BY_REGION: LazyLock = LazyLock::new(|| { const ROUTE_LABELS: Labels<2> = Labels::new(["method", "path"]); fn record_request(method: &'static str, path: &'static str, duration: f64, size: f64) { - let route = ROUTE_LABELS.with_values([method, path]); - histogram!(parent: HTTP_REQUEST_DURATION, labels: &route).record(duration); - histogram!(parent: HTTP_RESPONSE_SIZE, labels: &route).record(size); - counter!(parent: HTTP_REQUESTS_TOTAL, labels: &route).increment(1); + let route = label_values!(ROUTE_LABELS, [method, path]); + histogram!(parent: HTTP_REQUEST_DURATION, labels: route).record(duration); + histogram!(parent: HTTP_RESPONSE_SIZE, labels: route).record(size); + counter!(parent: HTTP_REQUESTS_TOTAL, labels: route).increment(1); } fn record_dynamic_request(method: String, path: String, duration: f64) { - let route = ROUTE_LABELS.with_values([method, path]); - histogram!(parent: HTTP_REQUEST_DURATION, labels: &route).record(duration); + let route = label_values!(ROUTE_LABELS, [method, path]); + histogram!(parent: HTTP_REQUEST_DURATION, labels: route).record(duration); } -const REGION_LABEL: Labels<1> = Labels::new(["region"]); - fn track_connection(region: &'static str) -> GaugeGuard { - let lv = REGION_LABEL.with_values([region]); - let g = gauge!(parent: HTTP_ACTIVE_CONNECTIONS, labels: &lv); + let g = gauge!(parent: HTTP_ACTIVE_CONNECTIONS, "region" => region); let guard = GaugeGuard::from_gauge(&g); guard.increment(1.0); guard diff --git a/quickwit/quickwit-metrics/src/counter.rs b/quickwit/quickwit-metrics/src/counter.rs index edb04d06b79..8e4ab12ea82 100644 --- a/quickwit/quickwit-metrics/src/counter.rs +++ b/quickwit/quickwit-metrics/src/counter.rs @@ -270,7 +270,7 @@ macro_rules! counter { parent: $parent:expr, labels: $labels:expr $(,)? ) => {{ - let label_values = $labels; + let label_values = &($labels); $crate::__metric_extension!( metric_type: $crate::Counter, register_fn: $crate::__counter_get_or_register, diff --git a/quickwit/quickwit-metrics/src/gauge.rs b/quickwit/quickwit-metrics/src/gauge.rs index 1d66ee36f63..4fc7f671144 100644 --- a/quickwit/quickwit-metrics/src/gauge.rs +++ b/quickwit/quickwit-metrics/src/gauge.rs @@ -321,7 +321,7 @@ macro_rules! gauge { parent: $parent:expr, labels: $labels:expr $(,)? ) => {{ - let label_values = $labels; + let label_values = &($labels); $crate::__metric_extension!( metric_type: $crate::Gauge, register_fn: $crate::__gauge_get_or_register, diff --git a/quickwit/quickwit-metrics/src/histogram.rs b/quickwit/quickwit-metrics/src/histogram.rs index 2a99f3bc9d6..47d86a77c1f 100644 --- a/quickwit/quickwit-metrics/src/histogram.rs +++ b/quickwit/quickwit-metrics/src/histogram.rs @@ -313,7 +313,7 @@ macro_rules! histogram { parent: $parent:expr, labels: $labels:expr $(,)? ) => {{ - let label_values = $labels; + let label_values = &($labels); $crate::__metric_extension!( metric_type: $crate::Histogram, register_fn: $crate::__histogram_get_or_register, diff --git a/quickwit/quickwit-metrics/src/labels.rs b/quickwit/quickwit-metrics/src/labels.rs index 147d7481bdb..b01edbf4e92 100644 --- a/quickwit/quickwit-metrics/src/labels.rs +++ b/quickwit/quickwit-metrics/src/labels.rs @@ -15,18 +15,43 @@ //! Reusable label templates for metric extension. //! //! [`Labels`] holds label *names* at compile time; pair them with -//! values via [`Labels::with_values`] to get a [`LabelValues`] that +//! values via the [`label_values!`] macro to get a [`LabelValues`] that //! the `labels:` macro arm can consume. This avoids repeating the same //! label names at every call site and lets a single `LabelValues` be //! shared across counter, gauge, and histogram extensions. use crate::__key_hash; +/// Pairs a [`Labels`] template with concrete values, one per label name. +/// +/// Each value is converted individually via `Into`, so you +/// can freely mix `&'static str`, `String`, `Cow<'static, str>`, etc. +/// +/// # Example +/// +/// ```rust,ignore +/// const GC_LABELS: Labels<2> = Labels::new(["status", "split_type"]); +/// +/// // All-static — zero allocation: +/// let lv = label_values!(GC_LABELS, ["success", "tantivy"]); +/// +/// // Mixed types — &'static str and String — just work: +/// let lv = label_values!(GC_LABELS, ["success", split_type.to_string()]); +/// +/// counter!(parent: *GC_COUNTER, labels: lv).increment(1); +/// ``` +#[macro_export] +macro_rules! label_values { + ($labels:expr, [$($val:expr),+ $(,)?]) => { + $labels.__with_values([$(Into::<$crate::__metrics::SharedString>::into($val)),+]) + }; +} + /// A label-name template with a fixed number of slots. /// /// `Labels` holds only the label *names* — it is `const`-constructible -/// and carries no runtime data. Call [`with_values`](Self::with_values) to -/// pair the names with concrete values, producing a [`LabelValues`] that +/// and carries no runtime data. Use the [`label_values!`] macro to pair +/// the names with concrete values, producing a [`LabelValues`] that /// the metric macros can consume. /// /// # Example @@ -34,15 +59,15 @@ use crate::__key_hash; /// ```rust,ignore /// const SPLIT_LABELS: Labels<2> = Labels::new(["source", "level"]); /// -/// // &'static str values — zero allocation (Cow::Borrowed). -/// let lv = SPLIT_LABELS.with_values(["prod", "info"]); +/// // All the same type: +/// let lv = label_values!(SPLIT_LABELS, ["prod", "info"]); /// -/// // Runtime String values — allocates (Cow::Owned). -/// let lv = SPLIT_LABELS.with_values([source_uid, level.to_string()]); +/// // Mixed types: +/// let lv = label_values!(SPLIT_LABELS, [source_uid, level.to_string()]); /// -/// // Pass by reference — reuse the same LabelValues across metrics. -/// let c = counter!(parent: BASE_COUNTER, labels: &lv); -/// let g = gauge!(parent: BASE_GAUGE, labels: &lv); +/// // Reuse the same LabelValues across metrics: +/// let c = counter!(parent: BASE_COUNTER, labels: lv); +/// let g = gauge!(parent: BASE_GAUGE, labels: lv); /// ``` pub struct Labels { names: [&'static str; N], @@ -54,13 +79,9 @@ impl Labels { Self { names } } - /// Pairs this template's names with values, returning a - /// [`LabelValues`] ready to be passed to a metric macro. - /// - /// Each value can be anything that converts to [`metrics::SharedString`] - /// (`Cow<'static, str>`): `&'static str` is zero-alloc, `String` or - /// non-`'static` `&str` allocates. - pub fn with_values>(&self, values: [V; N]) -> LabelValues { + /// Internal plumbing used by [`label_values!`]. Not part of the public API. + #[doc(hidden)] + pub fn __with_values>(&self, values: [V; N]) -> LabelValues { LabelValues { names: self.names, values: values.map(Into::into), @@ -68,7 +89,7 @@ impl Labels { } } -/// Concrete label names + values produced by [`Labels::with_values`]. +/// Concrete label names + values produced by [`label_values!`]. /// /// Passed by reference (`&LabelValues`) to the `labels:` macro arm so /// a single instance can be reused across multiple metric calls. Cloning diff --git a/quickwit/quickwit-metrics/src/lib.rs b/quickwit/quickwit-metrics/src/lib.rs index 6dcc22b1517..c67f01b665b 100644 --- a/quickwit/quickwit-metrics/src/lib.rs +++ b/quickwit/quickwit-metrics/src/lib.rs @@ -91,9 +91,15 @@ //! const ROUTE: Labels<2> = Labels::new(["method", "path"]); //! //! fn on_request(method: &'static str, path: &'static str, duration: f64) { -//! let route = ROUTE.with_values([method, path]); -//! histogram!(parent: REQUEST_DURATION, labels: &route).record(duration); -//! counter!(parent: HTTP_REQUESTS, labels: &route).increment(1); +//! let route = label_values!(ROUTE, [method, path]); +//! histogram!(parent: REQUEST_DURATION, labels: route).record(duration); +//! counter!(parent: HTTP_REQUESTS, labels: route).increment(1); +//! } +//! +//! // Mixed types work too — Into is called per-element: +//! fn on_dynamic_request(method: &'static str, path: String, duration: f64) { +//! let route = label_values!(ROUTE, [method, path]); +//! histogram!(parent: REQUEST_DURATION, labels: route).record(duration); //! } //! ``` //! diff --git a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs index b452cc04267..6f6a07ecab5 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::borrow::Cow; use std::collections::HashMap; use async_trait::async_trait; @@ -21,7 +20,7 @@ use quickwit_common::thread_pool::run_cpu_intensive; use quickwit_common::uri::Uri; use quickwit_config::{ConfigFormat, IndexConfig, load_index_config_from_user_config}; use quickwit_ingest::{CommitType, JsonDocBatchV2Builder}; -use quickwit_metrics::{counter, histogram}; +use quickwit_metrics::{counter, histogram, label_values}; use quickwit_proto::ingest::DocBatchV2; use quickwit_proto::ingest::router::IngestRouterServiceClient; use quickwit_proto::opentelemetry::proto::collector::logs::v1::logs_service_server::LogsService; @@ -41,6 +40,10 @@ use super::{ extract_otel_index_id_from_metadata, ingest_doc_batch_v2, is_zero, parse_log_record_body, }; use crate::otlp::extract_attributes; +use crate::otlp::metrics::{ + INGESTED_BYTES_TOTAL, INGESTED_LOG_RECORDS_TOTAL, OTLP_GRPC_ERROR_LABELS, OTLP_GRPC_LABELS, + REQUEST_DURATION_SECONDS, REQUEST_ERRORS_TOTAL, REQUESTS_TOTAL, +}; pub const OTEL_LOGS_INDEX_ID: &str = "otel-logs-v0_9"; @@ -240,19 +243,13 @@ impl OtlpGrpcLogsService { let num_bytes = doc_batch.num_bytes() as u64; self.store_logs(index_id.clone(), doc_batch).await?; - let labels = crate::otlp::metrics::OTLP_GRPC_LABELS.with_values([ - Cow::Borrowed("logs"), - Cow::Owned(index_id), - Cow::Borrowed("grpc"), - Cow::Borrowed("protobuf"), - ]); + let labels = label_values!(OTLP_GRPC_LABELS, ["logs", index_id, "grpc", "protobuf",]); counter!( - parent: &crate::otlp::metrics::INGESTED_LOG_RECORDS_TOTAL, - labels: &labels, + parent: INGESTED_LOG_RECORDS_TOTAL, + labels: labels, ) .increment(num_log_records); - counter!(parent: &crate::otlp::metrics::INGESTED_BYTES_TOTAL, labels: &labels) - .increment(num_bytes); + counter!(parent: INGESTED_BYTES_TOTAL, labels: labels).increment(num_bytes); let response = ExportLogsServiceResponse { // `rejected_log_records=0` and `error_message=""` is consided a "full" success. @@ -323,39 +320,33 @@ impl OtlpGrpcLogsService { ) -> Result { let start = std::time::Instant::now(); - let labels = crate::otlp::metrics::OTLP_GRPC_LABELS.with_values([ - Cow::Borrowed("logs"), - Cow::Owned(index_id.clone()), - Cow::Borrowed("grpc"), - Cow::Borrowed("protobuf"), - ]); + let labels = label_values!( + OTLP_GRPC_LABELS, + ["logs", index_id.clone(), "grpc", "protobuf",] + ); counter!( - parent: &crate::otlp::metrics::REQUESTS_TOTAL, - labels: &labels, + parent: REQUESTS_TOTAL, + labels: labels, ) .increment(1); let (export_res, is_error) = match self.export_inner(request, index_id.clone()).await { ok @ Ok(_) => (ok, "false"), err @ Err(_) => { counter!( - parent: &crate::otlp::metrics::REQUEST_ERRORS_TOTAL, - labels: &labels, + parent: REQUEST_ERRORS_TOTAL, + labels: labels, ) .increment(1); (err, "true") } }; let elapsed = start.elapsed().as_secs_f64(); - let duration_labels = crate::otlp::metrics::OTLP_GRPC_ERROR_LABELS.with_values([ - Cow::Borrowed("logs"), - Cow::Owned(index_id), - Cow::Borrowed("grpc"), - Cow::Borrowed("protobuf"), - Cow::Borrowed(is_error), - ]); histogram!( - parent: &crate::otlp::metrics::REQUEST_DURATION_SECONDS, - labels: &duration_labels, + parent: REQUEST_DURATION_SECONDS, + labels: label_values!( + OTLP_GRPC_ERROR_LABELS, + ["logs", index_id, "grpc", "protobuf", is_error,] + ), ) .record(elapsed); diff --git a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs index 60f59e889e0..67455bef9f2 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::borrow::Cow; use std::collections::HashMap; use async_trait::async_trait; @@ -20,7 +19,7 @@ use quickwit_common::thread_pool::run_cpu_intensive; use quickwit_common::uri::Uri; use quickwit_config::{ConfigFormat, IndexConfig, load_index_config_from_user_config}; use quickwit_ingest::CommitType; -use quickwit_metrics::{counter, histogram}; +use quickwit_metrics::{counter, histogram, label_values}; use quickwit_parquet_engine::schema::REQUIRED_FIELDS; use quickwit_proto::ingest::DocBatchV2; use quickwit_proto::ingest::router::IngestRouterServiceClient; @@ -40,6 +39,10 @@ use tracing::{Span as RuntimeSpan, error, instrument, warn}; use super::arrow_metrics::{ArrowDocBatchV2Builder, ArrowMetricsBatchBuilder}; use super::{OtelSignal, extract_otel_index_id_from_metadata, ingest_doc_batch_v2}; use crate::otlp::extract_attributes; +use crate::otlp::metrics::{ + INGESTED_BYTES_TOTAL, INGESTED_DATA_POINTS_TOTAL, OTLP_GRPC_ERROR_LABELS, OTLP_GRPC_LABELS, + REQUEST_DURATION_SECONDS, REQUEST_ERRORS_TOTAL, REQUESTS_TOTAL, +}; pub const OTEL_METRICS_INDEX_ID: &str = "otel-metrics-v0_9"; @@ -236,19 +239,13 @@ impl OtlpGrpcMetricsService { let num_bytes = doc_batch.num_bytes() as u64; self.store_metrics(index_id.clone(), doc_batch).await?; - let labels = crate::otlp::metrics::OTLP_GRPC_LABELS.with_values([ - Cow::Borrowed("metrics"), - Cow::Owned(index_id), - Cow::Borrowed("grpc"), - Cow::Borrowed("protobuf"), - ]); + let labels = label_values!(OTLP_GRPC_LABELS, ["metrics", index_id, "grpc", "protobuf",]); counter!( - parent: &crate::otlp::metrics::INGESTED_DATA_POINTS_TOTAL, - labels: &labels, + parent: INGESTED_DATA_POINTS_TOTAL, + labels: labels, ) .increment(num_data_points - num_parse_errors); - counter!(parent: &crate::otlp::metrics::INGESTED_BYTES_TOTAL, labels: &labels) - .increment(num_bytes); + counter!(parent: INGESTED_BYTES_TOTAL, labels: labels).increment(num_bytes); let response = ExportMetricsServiceResponse { partial_success: Some(ExportMetricsPartialSuccess { @@ -337,15 +334,13 @@ impl OtlpGrpcMetricsService { ) -> Result { let start = std::time::Instant::now(); - let labels = crate::otlp::metrics::OTLP_GRPC_LABELS.with_values([ - Cow::Borrowed("metrics"), - Cow::Owned(index_id.clone()), - Cow::Borrowed("grpc"), - Cow::Borrowed("protobuf"), - ]); + let labels = label_values!( + OTLP_GRPC_LABELS, + ["metrics", index_id.clone(), "grpc", "protobuf"] + ); counter!( - parent: &crate::otlp::metrics::REQUESTS_TOTAL, - labels: &labels, + parent: REQUESTS_TOTAL, + labels: labels, ) .increment(1); @@ -353,8 +348,8 @@ impl OtlpGrpcMetricsService { ok @ Ok(_) => (ok, "false"), err @ Err(_) => { counter!( - parent: &crate::otlp::metrics::REQUEST_ERRORS_TOTAL, - labels: &labels, + parent: REQUEST_ERRORS_TOTAL, + labels: labels, ) .increment(1); (err, "true") @@ -362,16 +357,12 @@ impl OtlpGrpcMetricsService { }; let elapsed = start.elapsed().as_secs_f64(); - let duration_labels = crate::otlp::metrics::OTLP_GRPC_ERROR_LABELS.with_values([ - Cow::Borrowed("metrics"), - Cow::Owned(index_id), - Cow::Borrowed("grpc"), - Cow::Borrowed("protobuf"), - Cow::Borrowed(is_error), - ]); histogram!( - parent: &crate::otlp::metrics::REQUEST_DURATION_SECONDS, - labels: &duration_labels, + parent: REQUEST_DURATION_SECONDS, + labels: label_values!( + OTLP_GRPC_ERROR_LABELS, + ["metrics", index_id, "grpc", "protobuf", is_error] + ), ) .record(elapsed); diff --git a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs index 097869df351..71a185eed37 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::borrow::Cow; use std::cmp::PartialEq; use std::collections::HashMap; use std::str::FromStr; @@ -23,7 +22,7 @@ use quickwit_common::thread_pool::run_cpu_intensive; use quickwit_common::uri::Uri; use quickwit_config::{ConfigFormat, IndexConfig, load_index_config_from_user_config}; use quickwit_ingest::{CommitType, JsonDocBatchV2Builder}; -use quickwit_metrics::{counter, histogram}; +use quickwit_metrics::{counter, histogram, label_values}; use quickwit_proto::ingest::DocBatchV2; use quickwit_proto::ingest::router::IngestRouterServiceClient; use quickwit_proto::opentelemetry::proto::collector::trace::v1::trace_service_server::TraceService; @@ -46,6 +45,10 @@ use super::{ OtelSignal, TryFromSpanIdError, TryFromTraceIdError, extract_otel_index_id_from_metadata, ingest_doc_batch_v2, is_zero, }; +use crate::otlp::metrics::{ + INGESTED_BYTES_TOTAL, INGESTED_SPANS_TOTAL, OTLP_GRPC_ERROR_LABELS, OTLP_GRPC_LABELS, + REQUEST_DURATION_SECONDS, REQUEST_ERRORS_TOTAL, REQUESTS_TOTAL, +}; use crate::otlp::{SpanId, TraceId, extract_attributes}; pub const OTEL_TRACES_INDEX_ID: &str = "otel-traces-v0_9"; @@ -702,19 +705,16 @@ impl OtlpGrpcTracesService { let num_bytes = doc_batch.num_bytes() as u64; self.store_spans(index_id.clone(), doc_batch).await?; - let labels = crate::otlp::metrics::OTLP_GRPC_LABELS.with_values([ - Cow::Borrowed("trace"), - Cow::Owned(index_id), - Cow::Borrowed("grpc"), - Cow::Borrowed("protobuf"), - ]); + let labels = label_values!(OTLP_GRPC_LABELS, ["trace", index_id, "grpc", "protobuf",]); counter!( - parent: &crate::otlp::metrics::INGESTED_SPANS_TOTAL, - labels: &labels, + parent: INGESTED_SPANS_TOTAL, + labels: labels, ) .increment(num_spans); - counter!(parent: &crate::otlp::metrics::INGESTED_BYTES_TOTAL, labels: &labels) - .increment(num_bytes); + counter!(parent: INGESTED_BYTES_TOTAL, + labels: labels + ) + .increment(num_bytes); let response = ExportTraceServiceResponse { // `rejected_spans=0` and `error_message=""` is considered a "full" success. @@ -785,39 +785,33 @@ impl OtlpGrpcTracesService { ) -> Result { let start = std::time::Instant::now(); - let labels = crate::otlp::metrics::OTLP_GRPC_LABELS.with_values([ - Cow::Borrowed("trace"), - Cow::Owned(index_id.clone()), - Cow::Borrowed("grpc"), - Cow::Borrowed("protobuf"), - ]); + let labels = label_values!( + OTLP_GRPC_LABELS, + ["trace", index_id.clone(), "grpc", "protobuf",] + ); counter!( - parent: &crate::otlp::metrics::REQUESTS_TOTAL, - labels: &labels, + parent: REQUESTS_TOTAL, + labels: labels, ) .increment(1); let (export_res, is_error) = match self.export_inner(request, index_id.clone()).await { ok @ Ok(_) => (ok, "false"), err @ Err(_) => { counter!( - parent: &crate::otlp::metrics::REQUEST_ERRORS_TOTAL, - labels: &labels, + parent: REQUEST_ERRORS_TOTAL, + labels: labels, ) .increment(1); (err, "true") } }; let elapsed = start.elapsed().as_secs_f64(); - let duration_labels = crate::otlp::metrics::OTLP_GRPC_ERROR_LABELS.with_values([ - Cow::Borrowed("trace"), - Cow::Owned(index_id), - Cow::Borrowed("grpc"), - Cow::Borrowed("protobuf"), - Cow::Borrowed(is_error), - ]); histogram!( - parent: &crate::otlp::metrics::REQUEST_DURATION_SECONDS, - labels: &duration_labels, + parent: REQUEST_DURATION_SECONDS, + labels: label_values!( + OTLP_GRPC_ERROR_LABELS, + ["trace", index_id, "grpc", "protobuf", is_error,] + ), ) .record(elapsed); diff --git a/quickwit/quickwit-parquet-engine/src/index/accumulator.rs b/quickwit/quickwit-parquet-engine/src/index/accumulator.rs index 1c97bf894a2..16ab430a685 100644 --- a/quickwit/quickwit-parquet-engine/src/index/accumulator.rs +++ b/quickwit/quickwit-parquet-engine/src/index/accumulator.rs @@ -25,6 +25,7 @@ use arrow::record_batch::RecordBatch; use tracing::{debug, info}; use super::config::ParquetIndexingConfig; +use crate::metrics::{INDEX_BATCH_DURATION_SECONDS, INDEX_BATCHES_TOTAL, INDEX_ROWS_TOTAL}; /// Error type for index operations. #[derive(Debug, thiserror::Error)] @@ -88,8 +89,8 @@ impl ParquetBatchAccumulator { let batch_bytes = estimate_batch_bytes(&batch); // Record index metrics - crate::metrics::INDEX_BATCHES_TOTAL.increment(1); - crate::metrics::INDEX_ROWS_TOTAL.increment(batch_rows as u64); + INDEX_BATCHES_TOTAL.increment(1); + INDEX_ROWS_TOTAL.increment(batch_rows as u64); // Merge fields into union schema before pushing (we need the schema reference) for field in batch.schema().fields() { @@ -124,7 +125,7 @@ impl ParquetBatchAccumulator { }; // Record batch processing duration - crate::metrics::INDEX_BATCH_DURATION_SECONDS.record(start.elapsed().as_secs_f64()); + INDEX_BATCH_DURATION_SECONDS.record(start.elapsed().as_secs_f64()); Ok(flushed) } diff --git a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs index f028e6b2e06..316d8be1dc6 100644 --- a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs +++ b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs @@ -21,6 +21,7 @@ use arrow::record_batch::RecordBatch; use quickwit_metrics::counter; use tracing::{debug, instrument, warn}; +use crate::metrics::{ERRORS_TOTAL, INGEST_BYTES_TOTAL}; use crate::schema::validate_required_fields; /// Error type for ingest operations. @@ -64,7 +65,7 @@ impl ParquetIngestProcessor { pub fn process_ipc(&self, ipc_bytes: &[u8]) -> Result { // Record bytes ingested counter!( - parent: &crate::metrics::INGEST_BYTES_TOTAL, + parent: INGEST_BYTES_TOTAL, "kind" => "points", ) .increment(ipc_bytes.len() as u64); @@ -73,7 +74,7 @@ impl ParquetIngestProcessor { Ok(batch) => batch, Err(e) => { counter!( - parent: &crate::metrics::ERRORS_TOTAL, + parent: ERRORS_TOTAL, "operation" => "ingest", "kind" => "points", ) @@ -84,7 +85,7 @@ impl ParquetIngestProcessor { if let Err(e) = self.validate_schema(&batch) { counter!( - parent: &crate::metrics::ERRORS_TOTAL, + parent: ERRORS_TOTAL, "operation" => "ingest", "kind" => "points", ) diff --git a/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs b/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs index 8913200d474..9a45a7c2d6a 100644 --- a/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs +++ b/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs @@ -20,6 +20,7 @@ use quickwit_metrics::counter; use tracing::{debug, instrument, warn}; use super::processor::IngestError; +use crate::metrics::{ERRORS_TOTAL, INGEST_BYTES_TOTAL}; use crate::schema::validate_required_sketch_fields; /// Processor that converts Arrow IPC bytes to RecordBatch for DDSketch data. @@ -42,7 +43,7 @@ impl SketchParquetIngestProcessor { #[instrument(skip(self, ipc_bytes), fields(bytes_len = ipc_bytes.len()))] pub fn process_ipc(&self, ipc_bytes: &[u8]) -> Result { counter!( - parent: &crate::metrics::INGEST_BYTES_TOTAL, + parent: INGEST_BYTES_TOTAL, "kind" => "sketches", ) .increment(ipc_bytes.len() as u64); @@ -51,7 +52,7 @@ impl SketchParquetIngestProcessor { Ok(batch) => batch, Err(err) => { counter!( - parent: &crate::metrics::ERRORS_TOTAL, + parent: ERRORS_TOTAL, "operation" => "ingest", "kind" => "sketches", ) @@ -62,7 +63,7 @@ impl SketchParquetIngestProcessor { if let Err(err) = self.validate_schema(&batch) { counter!( - parent: &crate::metrics::ERRORS_TOTAL, + parent: ERRORS_TOTAL, "operation" => "ingest", "kind" => "sketches", ) @@ -72,7 +73,7 @@ impl SketchParquetIngestProcessor { if let Err(err) = self.validate_sketch_arrays(&batch) { counter!( - parent: &crate::metrics::ERRORS_TOTAL, + parent: ERRORS_TOTAL, "operation" => "ingest", "kind" => "sketches", ) diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index 38fb94793e0..07b4f4be003 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -54,7 +54,10 @@ use tracing::*; use crate::collector::{IncrementalCollector, make_collector_for_split, make_merge_collector}; use crate::leaf_cache::LeafSearchCache; -use crate::metrics::SplitSearchOutcomeCounters; +use crate::metrics::{ + LEAF_SEARCH_SINGLE_SPLIT_WARMUP_NUM_BYTES, LEAF_SEARCH_SPLIT_DURATION_SECS, + SPLIT_SEARCH_OUTCOME_TOTAL, SplitSearchOutcomeCounters, +}; use crate::root::is_metadata_count_request_with_ast; use crate::search_permit_provider::{ SearchPermit, SearchPermitFuture, compute_initial_memory_allocation, @@ -591,7 +594,7 @@ async fn leaf_search_single_split( "current leaf search is consuming more memory than the initial allocation" ); } - crate::metrics::LEAF_SEARCH_SINGLE_SPLIT_WARMUP_NUM_BYTES.record(warmup_size.as_u64() as f64); + LEAF_SEARCH_SINGLE_SPLIT_WARMUP_NUM_BYTES.record(warmup_size.as_u64() as f64); search_permit.update_memory_usage(warmup_size); search_permit.free_warmup_slot(); @@ -1819,8 +1822,7 @@ impl SplitSearchState { impl Drop for SplitSearchStateGuard { fn drop(&mut self) { - self.state - .increment(&crate::metrics::SPLIT_SEARCH_OUTCOME_TOTAL); + self.state.increment(&SPLIT_SEARCH_OUTCOME_TOTAL); self.state .increment(&self.local_split_search_outcome_counters); } @@ -1861,7 +1863,7 @@ async fn leaf_search_single_split_wrapper( split: SplitIdAndFooterOffsets, mut search_permit: SearchPermit, ) { - let timer = crate::metrics::LEAF_SEARCH_SPLIT_DURATION_SECS.start_timer(); + let timer = LEAF_SEARCH_SPLIT_DURATION_SECS.start_timer(); let leaf_search_single_split_opt_res: crate::Result> = leaf_search_single_split( request, diff --git a/quickwit/quickwit-search/src/list_terms.rs b/quickwit/quickwit-search/src/list_terms.rs index b3d6ef8bb6a..42deafb550a 100644 --- a/quickwit/quickwit-search/src/list_terms.rs +++ b/quickwit/quickwit-search/src/list_terms.rs @@ -35,6 +35,7 @@ use tantivy::{ReloadPolicy, Term}; use tracing::{debug, error, info, instrument}; use crate::leaf::open_index_with_caches; +use crate::metrics::{LEAF_LIST_TERMS_SPLITS_TOTAL, LEAF_SEARCH_SPLIT_DURATION_SECS}; use crate::search_job_placer::group_jobs_by_index_id; use crate::search_permit_provider::compute_initial_memory_allocation; use crate::{ClusterClient, SearchError, SearchJob, SearcherContext, resolve_index_patterns}; @@ -353,8 +354,8 @@ pub async fn leaf_list_terms( async move { let leaf_split_search_permit = search_permit_recv.await; // TODO dedicated counter and timer? - crate::metrics::LEAF_LIST_TERMS_SPLITS_TOTAL.increment(1); - let timer = crate::metrics::LEAF_SEARCH_SPLIT_DURATION_SECS.start_timer(); + LEAF_LIST_TERMS_SPLITS_TOTAL.increment(1); + let timer = LEAF_SEARCH_SPLIT_DURATION_SECS.start_timer(); let leaf_search_single_split_res = leaf_list_terms_single_split( &searcher_context_clone, request, diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 7a63db2d038..7c7d4c15851 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -22,7 +22,6 @@ use quickwit_common::metrics::{MaybeRegisteredCounter, exponential_buckets, line use quickwit_metrics::{Counter, Gauge, Histogram, Labels, counter, gauge, histogram}; pub(crate) const STATUS_LABELS: Labels<1> = Labels::new(["status"]); -pub(crate) const AFFINITY_LABELS: Labels<1> = Labels::new(["affinity"]); fn print_if_not_null( field_name: &'static str, diff --git a/quickwit/quickwit-search/src/metrics_trackers.rs b/quickwit/quickwit-search/src/metrics_trackers.rs index 032bcde5a22..8add6b1484f 100644 --- a/quickwit/quickwit-search/src/metrics_trackers.rs +++ b/quickwit/quickwit-search/src/metrics_trackers.rs @@ -19,10 +19,15 @@ use std::task::{Context, Poll, ready}; use std::time::Instant; use pin_project::{pin_project, pinned_drop}; -use quickwit_metrics::{counter, histogram}; +use quickwit_metrics::{counter, histogram, label_values}; use quickwit_proto::search::LeafSearchResponse; use crate::SearchError; +use crate::metrics::{ + LEAF_SEARCH_REQUEST_DURATION_SECONDS, LEAF_SEARCH_REQUESTS_TOTAL, LEAF_SEARCH_TARGETED_SPLITS, + ROOT_SEARCH_REQUEST_DURATION_SECONDS, ROOT_SEARCH_REQUESTS_TOTAL, ROOT_SEARCH_TARGETED_SPLITS, + STATUS_LABELS, +}; // root @@ -69,20 +74,20 @@ impl PinnedDrop for RootSearchMetricsFuture { ) => (*num_targeted_splits, "cancelled"), }; - let labels = crate::metrics::STATUS_LABELS.with_values([status]); + let labels = label_values!(STATUS_LABELS, [status]); counter!( - parent: &crate::metrics::ROOT_SEARCH_REQUESTS_TOTAL, - labels: &labels, + parent: ROOT_SEARCH_REQUESTS_TOTAL, + labels: labels, ) .increment(1); histogram!( - parent: &crate::metrics::ROOT_SEARCH_REQUEST_DURATION_SECONDS, - labels: &labels, + parent: ROOT_SEARCH_REQUEST_DURATION_SECONDS, + labels: labels, ) .record(self.start.elapsed().as_secs_f64()); histogram!( - parent: &crate::metrics::ROOT_SEARCH_TARGETED_SPLITS, - labels: &labels, + parent: ROOT_SEARCH_TARGETED_SPLITS, + labels: labels, ) .record(num_targeted_splits as f64); } @@ -121,20 +126,20 @@ where F: Future> { fn drop(self: Pin<&mut Self>) { let status = self.status.unwrap_or("cancelled"); - let labels = crate::metrics::STATUS_LABELS.with_values([status]); + let labels = label_values!(STATUS_LABELS, [status]); counter!( - parent: &crate::metrics::LEAF_SEARCH_REQUESTS_TOTAL, - labels: &labels, + parent: LEAF_SEARCH_REQUESTS_TOTAL, + labels: labels, ) .increment(1); histogram!( - parent: &crate::metrics::LEAF_SEARCH_REQUEST_DURATION_SECONDS, - labels: &labels, + parent: LEAF_SEARCH_REQUEST_DURATION_SECONDS, + labels: labels, ) .record(self.start.elapsed().as_secs_f64()); histogram!( - parent: &crate::metrics::LEAF_SEARCH_TARGETED_SPLITS, - labels: &labels, + parent: LEAF_SEARCH_TARGETED_SPLITS, + labels: labels, ) .record(self.targeted_splits as f64); } diff --git a/quickwit/quickwit-search/src/scroll_context.rs b/quickwit/quickwit-search/src/scroll_context.rs index 3165534b1e9..b8c85e07bfb 100644 --- a/quickwit/quickwit-search/src/scroll_context.rs +++ b/quickwit/quickwit-search/src/scroll_context.rs @@ -33,6 +33,7 @@ use ttl_cache::TtlCache; use ulid::Ulid; use crate::ClusterClient; +use crate::metrics::SEARCHER_LOCAL_KV_STORE_SIZE_BYTES; use crate::root::IndexMetasForLeafSearch; use crate::service::SearcherContext; @@ -148,8 +149,7 @@ impl Default for MiniKV { impl MiniKV { pub async fn put(&self, key: Vec, payload: Vec, ttl: Duration) { - let metric_guard = - GaugeGuard::from_gauge(&crate::metrics::SEARCHER_LOCAL_KV_STORE_SIZE_BYTES); + let metric_guard = GaugeGuard::from_gauge(&SEARCHER_LOCAL_KV_STORE_SIZE_BYTES); metric_guard.increment(payload.len() as f64); let mut cache_lock = self.ttl_with_cache.write().await; cache_lock.insert( diff --git a/quickwit/quickwit-search/src/search_job_placer.rs b/quickwit/quickwit-search/src/search_job_placer.rs index 8e60de3a6f8..f11dce3bd30 100644 --- a/quickwit/quickwit-search/src/search_job_placer.rs +++ b/quickwit/quickwit-search/src/search_job_placer.rs @@ -27,6 +27,7 @@ use quickwit_metrics::counter; use quickwit_proto::search::{ReportSplit, ReportSplitsRequest}; use tracing::{info, warn}; +use crate::metrics::JOB_ASSIGNED_TOTAL; use crate::{SearchJob, SearchServiceClient, SearcherPool}; /// Job. @@ -218,8 +219,7 @@ impl SearchJobPlacer { 1 => "1", _ => "> 1", }; - let labels = crate::metrics::AFFINITY_LABELS.with_values([metric_node_idx]); - counter!(parent: &crate::metrics::JOB_ASSIGNED_TOTAL, labels: &labels).increment(1); + counter!(parent: JOB_ASSIGNED_TOTAL, "affinity" => metric_node_idx).increment(1); chosen_node.load += job.cost(); job_assignments diff --git a/quickwit/quickwit-search/src/search_permit_provider.rs b/quickwit/quickwit-search/src/search_permit_provider.rs index e8cb09643f1..aba98e3a052 100644 --- a/quickwit/quickwit-search/src/search_permit_provider.rs +++ b/quickwit/quickwit-search/src/search_permit_provider.rs @@ -24,6 +24,10 @@ use quickwit_metrics::GaugeGuard; use quickwit_proto::search::SplitIdAndFooterOffsets; use tokio::sync::{mpsc, oneshot}; +use crate::metrics::{ + LEAF_SEARCH_SINGLE_SPLIT_TASKS_ONGOING, LEAF_SEARCH_SINGLE_SPLIT_TASKS_PENDING, +}; + /// Distributor of permits to perform split search operation. /// /// Requests are served in order. Each permit initially reserves a slot for the @@ -333,7 +337,7 @@ impl SearchPermitActor { fn assign_available_permits(&mut self) { while let Some(permit_request) = self.pop_next_request_if_serviceable() { let ongoing_gauge_guard = - GaugeGuard::from_gauge(&crate::metrics::LEAF_SEARCH_SINGLE_SPLIT_TASKS_ONGOING); + GaugeGuard::from_gauge(&LEAF_SEARCH_SINGLE_SPLIT_TASKS_ONGOING); ongoing_gauge_guard.increment(1.0); self.total_memory_allocated += permit_request.permit_size; self.num_warmup_slots_available -= 1; @@ -349,8 +353,7 @@ impl SearchPermitActor { // created SearchPermit which releases the resources .ok(); } - crate::metrics::LEAF_SEARCH_SINGLE_SPLIT_TASKS_PENDING - .set(self.permits_requests.len() as f64); + LEAF_SEARCH_SINGLE_SPLIT_TASKS_PENDING.set(self.permits_requests.len() as f64); } } @@ -415,7 +418,9 @@ impl Future for SearchPermitFuture { let receiver = Pin::new(&mut self.get_mut().0); match receiver.poll(cx) { Poll::Ready(Ok(search_permit)) => Poll::Ready(search_permit), - Poll::Ready(Err(_)) => panic!("Failed to acquire permit. This should never happen! Please, report on https://github.com/quickwit-oss/quickwit/issues."), + Poll::Ready(Err(_)) => panic!( + "Failed to acquire permit. This should never happen! Please, report on https://github.com/quickwit-oss/quickwit/issues." + ), Poll::Pending => Poll::Pending, } } diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index cae1f3c37cc..2026f65dcf1 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -127,6 +127,7 @@ use warp::{Filter, Rejection}; pub use crate::build_info::{BuildInfo, RuntimeInfo}; pub use crate::index_api::{ListSplitsQueryParams, ListSplitsResponse}; pub use crate::ingest_api::{RestIngestResponse, RestParseFailure}; +use crate::metrics::CIRCUIT_BREAK_TOTAL; use crate::rate_modulator::RateModulator; #[cfg(test)] use crate::rest::recover_fn; @@ -925,7 +926,7 @@ fn ingester_service_layer_stack( PersistCircuitBreakerEvaluator.make_layer( 3, Duration::from_millis(500), - crate::metrics::CIRCUIT_BREAK_TOTAL.clone(), + CIRCUIT_BREAK_TOTAL.clone(), ), ) .stack_open_replication_stream_layer(quickwit_common::tower::OneTaskPerCallLayer) diff --git a/quickwit/quickwit-serve/src/load_shield.rs b/quickwit/quickwit-serve/src/load_shield.rs index 37b95cedee0..08339902525 100644 --- a/quickwit/quickwit-serve/src/load_shield.rs +++ b/quickwit/quickwit-serve/src/load_shield.rs @@ -17,6 +17,7 @@ use std::time::Duration; use quickwit_metrics::{Gauge, GaugeGuard, gauge}; use tokio::sync::{Semaphore, SemaphorePermit}; +use crate::metrics::{ONGOING_REQUESTS, PENDING_REQUESTS}; use crate::rest::TooManyRequests; pub struct LoadShield { @@ -43,9 +44,8 @@ impl LoadShield { quickwit_common::get_from_env_opt(&max_concurrency_env_key, false); let in_flight_semaphore_opt = max_in_flight_opt.map(Semaphore::new); let concurrency_semaphore_opt = max_concurrency_opt.map(Semaphore::new); - let labels = crate::metrics::ENDPOINT_GROUP_LABELS.with_values([endpoint_group]); - let pending_gauge = gauge!(parent: &crate::metrics::PENDING_REQUESTS, labels: &labels); - let ongoing_gauge = gauge!(parent: &crate::metrics::ONGOING_REQUESTS, labels: &labels); + let pending_gauge = gauge!(parent: PENDING_REQUESTS, "endpoint_group" => endpoint_group); + let ongoing_gauge = gauge!(parent: ONGOING_REQUESTS, "endpoint_group" => endpoint_group); LoadShield { in_flight_semaphore_opt, concurrency_semaphore_opt, diff --git a/quickwit/quickwit-serve/src/metrics.rs b/quickwit/quickwit-serve/src/metrics.rs index 798fbe56235..79e8aac6861 100644 --- a/quickwit/quickwit-serve/src/metrics.rs +++ b/quickwit/quickwit-serve/src/metrics.rs @@ -15,10 +15,7 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; -use quickwit_metrics::{Counter, Gauge, Histogram, Labels, counter, gauge, histogram}; - -pub(crate) const HTTP_REQUEST_LABELS: Labels<2> = Labels::new(["method", "status_code"]); -pub(crate) const ENDPOINT_GROUP_LABELS: Labels<1> = Labels::new(["endpoint_group"]); +use quickwit_metrics::{Counter, Gauge, Histogram, counter, gauge, histogram}; pub(crate) static HTTP_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index 6911671a2cd..84eb103dc60 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -48,6 +48,7 @@ use crate::index_api::index_management_handlers; use crate::indexing_api::indexing_get_handler; use crate::ingest_api::ingest_api_handlers; use crate::jaeger_api::jaeger_api_handlers; +use crate::metrics::{HTTP_REQUESTS_TOTAL, REQUEST_DURATION_SECS}; use crate::metrics_api::metrics_handler; use crate::node_info_handler::node_info_handler; use crate::otlp_api::otlp_ingest_api_handlers; @@ -140,13 +141,18 @@ pub(crate) async fn start_rest_server( let status = info.status(); let method = info.method().as_str().to_string(); let status_code = status.as_str().to_string(); - let labels = crate::metrics::HTTP_REQUEST_LABELS.with_values([method, status_code]); histogram!( - parent: &crate::metrics::REQUEST_DURATION_SECS, - labels: &labels, + parent: REQUEST_DURATION_SECS, + "method" => method.clone(), + "status_code" => status_code.clone(), ) .record(elapsed.as_secs_f64()); - counter!(parent: &crate::metrics::HTTP_REQUESTS_TOTAL, labels: &labels).increment(1); + counter!( + parent: HTTP_REQUESTS_TOTAL, + "method" => method, + "status_code" => status_code, + ) + .increment(1); }); // Docs routes let api_doc = warp::path("openapi.json") diff --git a/quickwit/quickwit-storage/src/cache/quickwit_cache.rs b/quickwit/quickwit-storage/src/cache/quickwit_cache.rs index f5bc0a5d5ae..37144164edd 100644 --- a/quickwit/quickwit-storage/src/cache/quickwit_cache.rs +++ b/quickwit/quickwit-storage/src/cache/quickwit_cache.rs @@ -19,9 +19,9 @@ use std::sync::Arc; use async_trait::async_trait; use quickwit_config::CacheConfig; -use crate::OwnedBytes; use crate::cache::{MemorySizedCache, StorageCache}; use crate::metrics::CacheMetrics; +use crate::{FAST_FIELD_CACHE, OwnedBytes}; const FULL_SLICE: Range = 0..usize::MAX; @@ -41,7 +41,7 @@ impl QuickwitCache { /// Creates a [`QuickwitCache`] with a cache on fast fields. pub fn new(cache_config: &CacheConfig) -> Self { let mut quickwit_cache = QuickwitCache::empty(); - let fast_field_cache_counters: &'static CacheMetrics = &crate::FAST_FIELD_CACHE; + let fast_field_cache_counters: &'static CacheMetrics = &FAST_FIELD_CACHE; quickwit_cache.add_route( ".fast", Arc::new(SimpleCache::from_config( diff --git a/quickwit/quickwit-storage/src/file_descriptor_cache.rs b/quickwit/quickwit-storage/src/file_descriptor_cache.rs index 28d37f19ae8..175e6f1f220 100644 --- a/quickwit/quickwit-storage/src/file_descriptor_cache.rs +++ b/quickwit/quickwit-storage/src/file_descriptor_cache.rs @@ -23,6 +23,7 @@ use tantivy::directory::OwnedBytes; use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use ulid::Ulid; +use crate::FD_CACHE_METRICS; use crate::metrics::SingleCacheMetrics; pub struct FileDescriptorCache { @@ -88,7 +89,7 @@ impl FileDescriptorCache { Self::new( NonZeroU32::new(max_fd_limit).unwrap(), fd_cache_capacity, - crate::FD_CACHE_METRICS.cache_metrics.clone(), + FD_CACHE_METRICS.cache_metrics.clone(), ) } diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs index 3173b6f80c8..149236eaf89 100644 --- a/quickwit/quickwit-storage/src/metrics.rs +++ b/quickwit/quickwit-storage/src/metrics.rs @@ -19,7 +19,7 @@ use std::collections::HashMap; use std::sync::{LazyLock, RwLock}; use quickwit_config::CacheConfig; -use quickwit_metrics::{Counter, Gauge, GaugeGuard, Histogram, Labels, counter, gauge, histogram}; +use quickwit_metrics::{Counter, Gauge, GaugeGuard, Histogram, counter, gauge, histogram}; static GET_SLICE_TIMEOUT_OUTCOME_TOTAL: LazyLock = LazyLock::new(|| { counter!( @@ -119,8 +119,6 @@ pub static OBJECT_STORAGE_GET_ERRORS_TOTAL: LazyLock = LazyLock::new(|| ) }); -pub(crate) const OBJECT_STORAGE_GET_ERROR_LABELS: Labels<1> = Labels::new(["code"]); - pub static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT: LazyLock = LazyLock::new(|| { gauge!( name: "object_storage_get_slice_in_flight_count", @@ -190,17 +188,16 @@ pub struct SingleCacheMetrics { impl CacheMetrics { pub fn for_component(component_name: &str) -> Self { let component_name = component_name.to_string(); - let labels = CACHE_LABELS.with_values([component_name.clone()]); CacheMetrics { - component_name, + component_name: component_name.clone(), cache_metrics: SingleCacheMetrics { - in_cache_count: gauge!(parent: CACHE_IN_CACHE_COUNT, labels: &labels), - in_cache_num_bytes: gauge!(parent: CACHE_IN_CACHE_NUM_BYTES, labels: &labels), - hits_num_items: counter!(parent: CACHE_HITS_TOTAL, labels: &labels), - hits_num_bytes: counter!(parent: CACHE_HITS_BYTES, labels: &labels), - misses_num_items: counter!(parent: CACHE_MISSES_TOTAL, labels: &labels), - evict_num_items: counter!(parent: CACHE_EVICT_TOTAL, labels: &labels), - evict_num_bytes: counter!(parent: CACHE_EVICT_BYTES, labels: &labels), + in_cache_count: gauge!(parent: CACHE_IN_CACHE_COUNT, "component_name" => component_name.clone()), + in_cache_num_bytes: gauge!(parent: CACHE_IN_CACHE_NUM_BYTES, "component_name" => component_name.clone()), + hits_num_items: counter!(parent: CACHE_HITS_TOTAL, "component_name" => component_name.clone()), + hits_num_bytes: counter!(parent: CACHE_HITS_BYTES, "component_name" => component_name.clone()), + misses_num_items: counter!(parent: CACHE_MISSES_TOTAL, "component_name" => component_name.clone()), + evict_num_items: counter!(parent: CACHE_EVICT_TOTAL, "component_name" => component_name.clone()), + evict_num_bytes: counter!(parent: CACHE_EVICT_BYTES, "component_name" => component_name), }, virtual_caches_metrics: RwLock::default(), } @@ -214,36 +211,49 @@ impl CacheMetrics { let capacity = config.capacity().as_u64().to_string(); let policy = config.policy().to_string(); - let labels = - VIRTUAL_CACHE_LABELS.with_values([self.component_name.clone(), capacity, policy]); + let component_name = self.component_name.clone(); let new_virtual_cache_metrics = SingleCacheMetrics { in_cache_count: gauge!( parent: VIRTUAL_CACHE_IN_CACHE_COUNT, - labels: &labels, + "component_name" => component_name.clone(), + "capacity" => capacity.clone(), + "policy" => policy.clone(), ), in_cache_num_bytes: gauge!( parent: VIRTUAL_CACHE_IN_CACHE_NUM_BYTES, - labels: &labels, + "component_name" => component_name.clone(), + "capacity" => capacity.clone(), + "policy" => policy.clone(), ), hits_num_items: counter!( parent: VIRTUAL_CACHE_HITS_TOTAL, - labels: &labels, + "component_name" => component_name.clone(), + "capacity" => capacity.clone(), + "policy" => policy.clone(), ), hits_num_bytes: counter!( parent: VIRTUAL_CACHE_HITS_BYTES, - labels: &labels, + "component_name" => component_name.clone(), + "capacity" => capacity.clone(), + "policy" => policy.clone(), ), misses_num_items: counter!( parent: VIRTUAL_CACHE_MISSES_TOTAL, - labels: &labels, + "component_name" => component_name.clone(), + "capacity" => capacity.clone(), + "policy" => policy.clone(), ), evict_num_items: counter!( parent: VIRTUAL_CACHE_EVICT_TOTAL, - labels: &labels, + "component_name" => component_name.clone(), + "capacity" => capacity.clone(), + "policy" => policy.clone(), ), evict_num_bytes: counter!( parent: VIRTUAL_CACHE_EVICT_BYTES, - labels: &labels, + "component_name" => component_name, + "capacity" => capacity, + "policy" => policy, ), }; @@ -256,9 +266,6 @@ impl CacheMetrics { } } -const CACHE_LABELS: Labels<1> = Labels::new(["component_name"]); -const VIRTUAL_CACHE_LABELS: Labels<3> = Labels::new(["component_name", "capacity", "policy"]); - static CACHE_IN_CACHE_COUNT: LazyLock = LazyLock::new(|| { gauge!( name: "in_cache_count", diff --git a/quickwit/quickwit-storage/src/object_storage/error.rs b/quickwit/quickwit-storage/src/object_storage/error.rs index 24b1e3b118e..9c80cbef34a 100644 --- a/quickwit/quickwit-storage/src/object_storage/error.rs +++ b/quickwit/quickwit-storage/src/object_storage/error.rs @@ -24,7 +24,7 @@ use aws_sdk_s3::operation::put_object::PutObjectError; use aws_sdk_s3::operation::upload_part::UploadPartError; use quickwit_metrics::counter; -use crate::{StorageError, StorageErrorKind}; +use crate::{OBJECT_STORAGE_GET_ERRORS_TOTAL, StorageError, StorageErrorKind}; impl From> for StorageError where E: std::error::Error + ToStorageErrorKind + Send + Sync + 'static @@ -64,8 +64,11 @@ pub trait ToStorageErrorKind { impl ToStorageErrorKind for GetObjectError { fn to_storage_error_kind(&self) -> StorageErrorKind { let error_code = self.code().unwrap_or("unknown").to_string(); - let labels = crate::metrics::OBJECT_STORAGE_GET_ERROR_LABELS.with_values([error_code]); - counter!(parent: &crate::OBJECT_STORAGE_GET_ERRORS_TOTAL, labels: &labels).increment(1); + counter!( + parent: OBJECT_STORAGE_GET_ERRORS_TOTAL, + "code" => error_code, + ) + .increment(1); match self { GetObjectError::InvalidObjectState(_) => StorageErrorKind::Service, GetObjectError::NoSuchKey(_) => StorageErrorKind::NotFound, diff --git a/quickwit/quickwit-storage/src/split_cache/split_table.rs b/quickwit/quickwit-storage/src/split_cache/split_table.rs index 50e841025d4..ec4d7c23bfe 100644 --- a/quickwit/quickwit-storage/src/split_cache/split_table.rs +++ b/quickwit/quickwit-storage/src/split_cache/split_table.rs @@ -21,6 +21,8 @@ use quickwit_common::uri::Uri; use quickwit_config::SplitCacheLimits; use ulid::Ulid; +use crate::metrics::SEARCHER_SPLIT_CACHE; + type LastAccessDate = u64; /// Maximum number of splits to track. @@ -152,19 +154,19 @@ impl SplitTable { Status::Downloading { .. } => &mut self.downloading_splits, Status::OnDisk { num_bytes } => { self.on_disk_bytes -= num_bytes; - crate::metrics::SEARCHER_SPLIT_CACHE + SEARCHER_SPLIT_CACHE .cache_metrics .in_cache_count .decrement(1.0); - crate::metrics::SEARCHER_SPLIT_CACHE + SEARCHER_SPLIT_CACHE .cache_metrics .in_cache_num_bytes .decrement(num_bytes as f64); - crate::metrics::SEARCHER_SPLIT_CACHE + SEARCHER_SPLIT_CACHE .cache_metrics .evict_num_items .increment(1); - crate::metrics::SEARCHER_SPLIT_CACHE + SEARCHER_SPLIT_CACHE .cache_metrics .evict_num_bytes .increment(num_bytes); @@ -216,11 +218,11 @@ impl SplitTable { Status::Downloading { .. } => self.downloading_splits.insert(split_info.split_key), Status::OnDisk { num_bytes } => { self.on_disk_bytes += num_bytes; - crate::metrics::SEARCHER_SPLIT_CACHE + SEARCHER_SPLIT_CACHE .cache_metrics .in_cache_count .increment(1.0); - crate::metrics::SEARCHER_SPLIT_CACHE + SEARCHER_SPLIT_CACHE .cache_metrics .in_cache_num_bytes .increment(num_bytes as f64); From 39da9cfb91d2a38868b45062c0856a23cec45e10 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 10:46:21 +0200 Subject: [PATCH 32/54] Minor formatting cleanup after merge conflict resolution Co-authored-by: Cursor --- quickwit/quickwit-jaeger/src/lib.rs | 4 ++-- quickwit/quickwit-opentelemetry/src/otlp/logs.rs | 2 +- quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs | 2 +- quickwit/quickwit-search/src/scroll_context.rs | 6 ++---- quickwit/quickwit-search/src/search_permit_provider.rs | 3 +-- 5 files changed, 7 insertions(+), 10 deletions(-) diff --git a/quickwit/quickwit-jaeger/src/lib.rs b/quickwit/quickwit-jaeger/src/lib.rs index 8a89b6984a8..9e781130210 100644 --- a/quickwit/quickwit-jaeger/src/lib.rs +++ b/quickwit/quickwit-jaeger/src/lib.rs @@ -434,7 +434,7 @@ impl JaegerService { parent: REQUEST_DURATION_SECONDS, labels: label_values!( OPERATION_INDEX_ERROR_LABELS, - [operation_name, OTEL_TRACES_INDEX_ID, "false",] + [operation_name, OTEL_TRACES_INDEX_ID, "false"] ), ) .record(elapsed); @@ -458,7 +458,7 @@ pub(crate) fn record_error(operation_name: &'static str, request_start: Instant) parent: REQUEST_DURATION_SECONDS, labels: label_values!( OPERATION_INDEX_ERROR_LABELS, - [operation_name, OTEL_TRACES_INDEX_ID, "true",] + [operation_name, OTEL_TRACES_INDEX_ID, "true"] ), ) .record(elapsed); diff --git a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs index 6f6a07ecab5..0b2fb5b50bc 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs @@ -243,7 +243,7 @@ impl OtlpGrpcLogsService { let num_bytes = doc_batch.num_bytes() as u64; self.store_logs(index_id.clone(), doc_batch).await?; - let labels = label_values!(OTLP_GRPC_LABELS, ["logs", index_id, "grpc", "protobuf",]); + let labels = label_values!(OTLP_GRPC_LABELS, ["logs", index_id, "grpc", "protobuf"]); counter!( parent: INGESTED_LOG_RECORDS_TOTAL, labels: labels, diff --git a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs index 67455bef9f2..95e9d0f9262 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs @@ -239,7 +239,7 @@ impl OtlpGrpcMetricsService { let num_bytes = doc_batch.num_bytes() as u64; self.store_metrics(index_id.clone(), doc_batch).await?; - let labels = label_values!(OTLP_GRPC_LABELS, ["metrics", index_id, "grpc", "protobuf",]); + let labels = label_values!(OTLP_GRPC_LABELS, ["metrics", index_id, "grpc", "protobuf"]); counter!( parent: INGESTED_DATA_POINTS_TOTAL, labels: labels, diff --git a/quickwit/quickwit-search/src/scroll_context.rs b/quickwit/quickwit-search/src/scroll_context.rs index 52121caf647..df23f91f9ca 100644 --- a/quickwit/quickwit-search/src/scroll_context.rs +++ b/quickwit/quickwit-search/src/scroll_context.rs @@ -149,10 +149,8 @@ impl Default for MiniKV { impl MiniKV { pub async fn put(&self, key: Vec, payload: Vec, ttl: Duration) { - let metric_guard = GaugeGuard::new( - &SEARCHER_LOCAL_KV_STORE_SIZE_BYTES, - payload.len() as f64, - ); + let metric_guard = + GaugeGuard::new(&SEARCHER_LOCAL_KV_STORE_SIZE_BYTES, payload.len() as f64); let mut cache_lock = self.ttl_with_cache.write().await; cache_lock.insert( key, diff --git a/quickwit/quickwit-search/src/search_permit_provider.rs b/quickwit/quickwit-search/src/search_permit_provider.rs index 9244f4e96c0..383047c5a1a 100644 --- a/quickwit/quickwit-search/src/search_permit_provider.rs +++ b/quickwit/quickwit-search/src/search_permit_provider.rs @@ -336,8 +336,7 @@ impl SearchPermitActor { fn assign_available_permits(&mut self) { while let Some(permit_request) = self.pop_next_request_if_serviceable() { - let ongoing_gauge_guard = - GaugeGuard::new(&LEAF_SEARCH_SINGLE_SPLIT_TASKS_ONGOING, 1.0); + let ongoing_gauge_guard = GaugeGuard::new(&LEAF_SEARCH_SINGLE_SPLIT_TASKS_ONGOING, 1.0); self.total_memory_allocated += permit_request.permit_size; self.num_warmup_slots_available -= 1; permit_request From 53250c8ddf5ee8a3e522e4f905798cf25a5f440f Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 11:01:44 +0200 Subject: [PATCH 33/54] Improve labels.rs docs and prefix doc(hidden) methods with __ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename LabelValues::hash → __hash and to_labels → __to_labels to match the __with_values convention for internal API. Update label_values! doc to recommend inline "key" => value for single-use labels and clarify that the labels: macro arm borrows internally. Co-authored-by: Cursor --- quickwit/quickwit-metrics/src/counter.rs | 4 ++-- quickwit/quickwit-metrics/src/gauge.rs | 4 ++-- quickwit/quickwit-metrics/src/histogram.rs | 4 ++-- quickwit/quickwit-metrics/src/labels.rs | 26 +++++++++++++--------- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/quickwit/quickwit-metrics/src/counter.rs b/quickwit/quickwit-metrics/src/counter.rs index 8e4ab12ea82..ba209f9b0d1 100644 --- a/quickwit/quickwit-metrics/src/counter.rs +++ b/quickwit/quickwit-metrics/src/counter.rs @@ -276,9 +276,9 @@ macro_rules! counter { register_fn: $crate::__counter_get_or_register, parent: $parent, metric_info: $parent.__info(), - hash: label_values.hash($parent.get_hash()), + hash: label_values.__hash($parent.get_hash()), label_count: label_values.len(), - labels_iter: label_values.to_labels() + labels_iter: label_values.__to_labels() ) }}; } diff --git a/quickwit/quickwit-metrics/src/gauge.rs b/quickwit/quickwit-metrics/src/gauge.rs index 671ca29850c..ae441328416 100644 --- a/quickwit/quickwit-metrics/src/gauge.rs +++ b/quickwit/quickwit-metrics/src/gauge.rs @@ -329,9 +329,9 @@ macro_rules! gauge { parent: $parent, metric_info: $parent.__info(), // Seed with parent hash, fold in each (name, value) pair. - hash: label_values.hash($parent.get_hash()), + hash: label_values.__hash($parent.get_hash()), label_count: label_values.len(), - labels_iter: label_values.to_labels() + labels_iter: label_values.__to_labels() ) }}; } diff --git a/quickwit/quickwit-metrics/src/histogram.rs b/quickwit/quickwit-metrics/src/histogram.rs index fa713658c6d..2dee894c32a 100644 --- a/quickwit/quickwit-metrics/src/histogram.rs +++ b/quickwit/quickwit-metrics/src/histogram.rs @@ -317,9 +317,9 @@ macro_rules! histogram { // Unwrap HistogramConfig -> MetricInfo for the extension. metric_info: $parent.__info().info, // Seed with parent hash, fold in each (name, value) pair. - hash: label_values.hash($parent.get_hash()), + hash: label_values.__hash($parent.get_hash()), label_count: label_values.len(), - labels_iter: label_values.to_labels() + labels_iter: label_values.__to_labels() ) }}; } diff --git a/quickwit/quickwit-metrics/src/labels.rs b/quickwit/quickwit-metrics/src/labels.rs index b01edbf4e92..8e2a75f9d48 100644 --- a/quickwit/quickwit-metrics/src/labels.rs +++ b/quickwit/quickwit-metrics/src/labels.rs @@ -27,6 +27,10 @@ use crate::__key_hash; /// Each value is converted individually via `Into`, so you /// can freely mix `&'static str`, `String`, `Cow<'static, str>`, etc. /// +/// Use this macro when the same label names are shared across multiple +/// metrics or call sites. For single-use labels, prefer the inline +/// `"key" => value` syntax directly in the metric macro. +/// /// # Example /// /// ```rust,ignore @@ -38,7 +42,9 @@ use crate::__key_hash; /// // Mixed types — &'static str and String — just work: /// let lv = label_values!(GC_LABELS, ["success", split_type.to_string()]); /// -/// counter!(parent: *GC_COUNTER, labels: lv).increment(1); +/// // Reuse the same LabelValues across multiple metrics: +/// counter!(parent: GC_COUNTER, labels: lv).increment(1); +/// gauge!(parent: GC_GAUGE, labels: lv).set(42.0); /// ``` #[macro_export] macro_rules! label_values { @@ -91,10 +97,10 @@ impl Labels { /// Concrete label names + values produced by [`label_values!`]. /// -/// Passed by reference (`&LabelValues`) to the `labels:` macro arm so -/// a single instance can be reused across multiple metric calls. Cloning -/// of the inner `SharedString` values only happens on the cold path -/// (global DashMap miss). +/// The `labels:` macro arm borrows the value internally, so a single +/// instance can be reused across multiple metric calls. Cloning of the +/// inner `SharedString` values only happens on the cold path (cache miss +/// in the thread-local or global DashMap). #[derive(Clone)] pub struct LabelValues { names: [&'static str; N], @@ -102,11 +108,11 @@ pub struct LabelValues { } impl LabelValues { - /// Computes the order-independent cache-key hash, seeded with a - /// parent hash so the result is fully composable: - /// `hash(parent, [A,B]) == __key_hash(parent, [A,B])`. + /// Computes an order-independent cache-key hash by folding per-label + /// hashes into `seed` via commutative wrapping addition, so the result + /// is fully composable with the parent's hash. #[doc(hidden)] - pub fn hash(&self, seed: u64) -> u64 { + pub fn __hash(&self, seed: u64) -> u64 { __key_hash( seed, self.names @@ -119,7 +125,7 @@ impl LabelValues { /// Builds `metrics::Label`s by cloning the stored names and values. /// Only called on the cold path (global DashMap miss). #[doc(hidden)] - pub fn to_labels(&self) -> impl Iterator + '_ { + pub fn __to_labels(&self) -> impl Iterator + '_ { self.names .iter() .zip(self.values.iter()) From d75c1a24cdc8e8d03708c762d9597c6b0ff37ee7 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 11:37:48 +0200 Subject: [PATCH 34/54] =?UTF-8?q?Rename=20Labels=E2=86=92LabelNames,=20Lab?= =?UTF-8?q?elValues=E2=86=92Labels;=20add=20const=20labels!=20macro?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename `Labels` to `LabelNames` (label-name template) - Rename `LabelValues` to `Labels` (concrete name+value pairs) - Add `labels!` macro for const-constructible all-static label pairs - Remove Counter fields from CountingUdpSocket, use statics directly - Inline get_actor_inboxes_count_gauge_guard into its single call site Co-authored-by: Cursor --- quickwit/quickwit-actors/src/mailbox.rs | 6 +- quickwit/quickwit-cluster/src/lib.rs | 21 ++---- quickwit/quickwit-common/src/io.rs | 15 ++-- quickwit/quickwit-common/src/runtimes.rs | 11 +-- .../src/tower/circuit_breaker.rs | 4 +- .../quickwit-control-plane/src/metrics.rs | 4 +- quickwit/quickwit-jaeger/src/metrics.rs | 8 +- .../benches/quickwit_metrics.rs | 4 +- .../quickwit-metrics/examples/http_service.rs | 6 +- quickwit/quickwit-metrics/src/counter.rs | 4 +- quickwit/quickwit-metrics/src/gauge.rs | 4 +- quickwit/quickwit-metrics/src/histogram.rs | 4 +- quickwit/quickwit-metrics/src/inner.rs | 2 +- quickwit/quickwit-metrics/src/labels.rs | 73 +++++++++++++------ quickwit/quickwit-metrics/src/lib.rs | 10 +-- .../src/otlp/metrics.rs | 10 +-- quickwit/quickwit-search/src/metrics.rs | 4 +- 17 files changed, 106 insertions(+), 84 deletions(-) diff --git a/quickwit/quickwit-actors/src/mailbox.rs b/quickwit/quickwit-actors/src/mailbox.rs index 9534658a248..2cb173ba109 100644 --- a/quickwit/quickwit-actors/src/mailbox.rs +++ b/quickwit/quickwit-actors/src/mailbox.rs @@ -393,10 +393,6 @@ impl Inbox { } } -fn get_actor_inboxes_count_gauge_guard() -> GaugeGuard { - GaugeGuard::new(&INBOX_GAUGE, 1.0) -} - pub(crate) fn create_mailbox( actor_name: String, queue_capacity: QueueCapacity, @@ -414,7 +410,7 @@ pub(crate) fn create_mailbox( }; let inner = InboxInner { rx, - _inboxes_count_gauge_guard: get_actor_inboxes_count_gauge_guard(), + _inboxes_count_gauge_guard: GaugeGuard::new(&INBOX_GAUGE, 1.0), }; let inbox = Inbox { inner: Arc::new(inner), diff --git a/quickwit/quickwit-cluster/src/lib.rs b/quickwit/quickwit-cluster/src/lib.rs index 0a0d532ee58..0ac923dfc7f 100644 --- a/quickwit/quickwit-cluster/src/lib.rs +++ b/quickwit/quickwit-cluster/src/lib.rs @@ -34,7 +34,6 @@ pub use grpc_service::cluster_grpc_server; use quickwit_common::tower::ClientGrpcConfig; use quickwit_config::service::QuickwitService; use quickwit_config::{GrpcConfig, NodeConfig, TlsConfig}; -use quickwit_metrics::Counter; use quickwit_proto::indexing::CpuCapacity; use quickwit_proto::ingest::ingester::IngesterStatus; use quickwit_proto::tonic::transport::{Certificate, ClientTlsConfig, Identity}; @@ -78,10 +77,6 @@ struct CountingUdpTransport; struct CountingUdpSocket { socket: UdpSocket, - gossip_recv: Counter, - gossip_recv_bytes: Counter, - gossip_send: Counter, - gossip_send_bytes: Counter, } #[async_trait] @@ -89,16 +84,16 @@ impl Socket for CountingUdpSocket { async fn send(&mut self, to: SocketAddr, msg: ChitchatMessage) -> anyhow::Result<()> { let msg_len = msg.serialized_len() as u64; self.socket.send(to, msg).await?; - self.gossip_send.increment(1); - self.gossip_send_bytes.increment(msg_len); + GOSSIP_SENT_MESSAGES_TOTAL.increment(1); + GOSSIP_SENT_BYTES_TOTAL.increment(msg_len); Ok(()) } async fn recv(&mut self) -> anyhow::Result<(SocketAddr, ChitchatMessage)> { let (socket_addr, msg) = self.socket.recv().await?; - self.gossip_recv.increment(1); + GOSSIP_RECV_MESSAGES_TOTAL.increment(1); let msg_len = msg.serialized_len() as u64; - self.gossip_recv_bytes.increment(msg_len); + GOSSIP_RECV_BYTES_TOTAL.increment(msg_len); Ok((socket_addr, msg)) } } @@ -107,13 +102,7 @@ impl Socket for CountingUdpSocket { impl Transport for CountingUdpTransport { async fn open(&self, listen_addr: SocketAddr) -> anyhow::Result> { let socket = UdpSocket::open(listen_addr).await?; - Ok(Box::new(CountingUdpSocket { - socket, - gossip_recv: GOSSIP_RECV_MESSAGES_TOTAL.clone(), - gossip_recv_bytes: GOSSIP_RECV_BYTES_TOTAL.clone(), - gossip_send: GOSSIP_SENT_MESSAGES_TOTAL.clone(), - gossip_send_bytes: GOSSIP_SENT_BYTES_TOTAL.clone(), - })) + Ok(Box::new(CountingUdpSocket { socket })) } } diff --git a/quickwit/quickwit-common/src/io.rs b/quickwit/quickwit-common/src/io.rs index 5b1fc2d0429..adbdd57317d 100644 --- a/quickwit/quickwit-common/src/io.rs +++ b/quickwit/quickwit-common/src/io.rs @@ -52,7 +52,7 @@ static WRITE_BYTES: LazyLock = LazyLock::new(|| { counter!( name: "write_bytes", description: "Number of bytes written by a given component in [indexer, merger, deleter, split_downloader_{merge,delete}]", - subsystem: "", + subsystem: "io", ) }); @@ -272,18 +272,22 @@ pub trait IoControlsAccess: Sized { } fn apply(&self, f: F) -> R - where F: Fn(&IoControls) -> R; + where + F: Fn(&IoControls) -> R; } impl IoControlsAccess for IoControls { fn apply(&self, f: F) -> R - where F: Fn(&IoControls) -> R { + where + F: Fn(&IoControls) -> R, + { f(self) } } impl ControlledWrite -where A: IoControlsAccess +where + A: IoControlsAccess, { pub fn underlying_wrt(&mut self) -> &mut W { &mut self.underlying_wrt @@ -296,7 +300,8 @@ where A: IoControlsAccess } impl io::Write for ControlledWrite -where A: IoControlsAccess +where + A: IoControlsAccess, { fn write(&mut self, buf: &[u8]) -> io::Result { let buf = truncate_bytes(buf); diff --git a/quickwit/quickwit-common/src/runtimes.rs b/quickwit/quickwit-common/src/runtimes.rs index d2436b1f61d..451ff733b46 100644 --- a/quickwit/quickwit-common/src/runtimes.rs +++ b/quickwit/quickwit-common/src/runtimes.rs @@ -17,7 +17,7 @@ use std::sync::OnceLock; use std::sync::atomic::{AtomicUsize, Ordering}; use std::time::Duration; -use quickwit_metrics::{Counter, Gauge, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, counter, gauge, labels}; use tokio::runtime::Runtime; use tokio_metrics::{RuntimeMetrics, RuntimeMonitor}; @@ -214,14 +214,15 @@ struct RuntimeMetricsRecorder { impl RuntimeMetricsRecorder { pub fn new(label: &'static str) -> Self { + let labels = labels!("runtime_type" => label); Self { - scheduled_tasks: gauge!(parent: TOKIO_SCHEDULED_TASKS, "runtime_type" => label), + scheduled_tasks: gauge!(parent: TOKIO_SCHEDULED_TASKS, labels: labels), worker_busy_duration_milliseconds_total: counter!( parent: TOKIO_WORKER_BUSY_DURATION_MILLISECONDS_TOTAL, - "runtime_type" => label, + labels: labels, ), - worker_busy_ratio: gauge!(parent: TOKIO_WORKER_BUSY_RATIO, "runtime_type" => label), - worker_threads: gauge!(parent: TOKIO_WORKER_THREADS, "runtime_type" => label), + worker_busy_ratio: gauge!(parent: TOKIO_WORKER_BUSY_RATIO, labels: labels), + worker_threads: gauge!(parent: TOKIO_WORKER_THREADS, labels: labels), } } diff --git a/quickwit/quickwit-common/src/tower/circuit_breaker.rs b/quickwit/quickwit-common/src/tower/circuit_breaker.rs index 53bebeef413..b42f68e8cd3 100644 --- a/quickwit/quickwit-common/src/tower/circuit_breaker.rs +++ b/quickwit/quickwit-common/src/tower/circuit_breaker.rs @@ -303,9 +303,9 @@ mod tests { const TIMEOUT: Duration = Duration::from_millis(500); let int_counter = counter!( - name: "circuit_break_total_test", + name: "circuit_break_total", description: "test circuit breaker counter", - subsystem: "", + subsystem: "test", ); let mut service = ServiceBuilder::new() .layer(TestCircuitBreakerEvaluator.make_layer(10, TIMEOUT, int_counter)) diff --git a/quickwit/quickwit-control-plane/src/metrics.rs b/quickwit/quickwit-control-plane/src/metrics.rs index 4875f2091a7..cde944f2e52 100644 --- a/quickwit/quickwit-control-plane/src/metrics.rs +++ b/quickwit/quickwit-control-plane/src/metrics.rs @@ -14,7 +14,7 @@ use std::sync::LazyLock; -use quickwit_metrics::{Counter, Gauge, Labels, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, LabelNames, counter, gauge}; #[derive(Debug, Clone, Copy)] pub struct ShardLocalityMetrics { @@ -51,7 +51,7 @@ pub(crate) static OPEN_SHARDS: LazyLock = pub(crate) static CLOSED_SHARDS: LazyLock = LazyLock::new(|| gauge!(parent: SHARDS, "state" => "closed")); -pub(crate) const INDEX_ID_LABELS: Labels<1> = Labels::new(["index_id"]); +pub(crate) const INDEX_ID_LABELS: LabelNames<1> = LabelNames::new(["index_id"]); static INDEXED_SHARDS: LazyLock = LazyLock::new(|| { gauge!( diff --git a/quickwit/quickwit-jaeger/src/metrics.rs b/quickwit/quickwit-jaeger/src/metrics.rs index 4a53c75f41e..465a97c4c14 100644 --- a/quickwit/quickwit-jaeger/src/metrics.rs +++ b/quickwit/quickwit-jaeger/src/metrics.rs @@ -15,11 +15,11 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; -use quickwit_metrics::{Counter, Histogram, Labels, counter, histogram}; +use quickwit_metrics::{Counter, Histogram, LabelNames, counter, histogram}; -pub(crate) const OPERATION_INDEX_LABELS: Labels<2> = Labels::new(["operation", "index"]); -pub(crate) const OPERATION_INDEX_ERROR_LABELS: Labels<3> = - Labels::new(["operation", "index", "error"]); +pub(crate) const OPERATION_INDEX_LABELS: LabelNames<2> = LabelNames::new(["operation", "index"]); +pub(crate) const OPERATION_INDEX_ERROR_LABELS: LabelNames<3> = + LabelNames::new(["operation", "index", "error"]); pub(crate) static REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( diff --git a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs index 07c0fb1e768..1b7ea379b30 100644 --- a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs +++ b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs @@ -738,8 +738,8 @@ fn observable_gauge(c: &mut Criterion) { // LABELS // --------------------------------------------------------------------------- -const LABELS_1: Labels<1> = Labels::new(["method"]); -const LABELS_3: Labels<3> = Labels::new(["method", "endpoint", "status"]); +const LABELS_1: LabelNames<1> = LabelNames::new(["method"]); +const LABELS_3: LabelNames<3> = LabelNames::new(["method", "endpoint", "status"]); fn labels_counter(c: &mut Criterion) { install_recorder(); diff --git a/quickwit/quickwit-metrics/examples/http_service.rs b/quickwit/quickwit-metrics/examples/http_service.rs index d98d263b24e..84fbe019b46 100644 --- a/quickwit/quickwit-metrics/examples/http_service.rs +++ b/quickwit/quickwit-metrics/examples/http_service.rs @@ -81,9 +81,9 @@ static HTTP_ACTIVE_CONNECTIONS_BY_REGION: LazyLock = LazyLock::new(|| { ) }); -// ─── Labels examples ─── +// ─── LabelNames examples ─── -const ROUTE_LABELS: Labels<2> = Labels::new(["method", "path"]); +const ROUTE_LABELS: LabelNames<2> = LabelNames::new(["method", "path"]); fn record_request(method: &'static str, path: &'static str, duration: f64, size: f64) { let route = label_values!(ROUTE_LABELS, [method, path]); @@ -180,7 +180,7 @@ fn main() { } println!(); - println!("=== Labels usage ==="); + println!("=== LabelNames usage ==="); for &(method, path, _) in &requests { let duration_ms: f64 = (path.len() as f64) * 0.013; let size: f64 = (path.len() * 100) as f64; diff --git a/quickwit/quickwit-metrics/src/counter.rs b/quickwit/quickwit-metrics/src/counter.rs index ba209f9b0d1..fae6d090a7e 100644 --- a/quickwit/quickwit-metrics/src/counter.rs +++ b/quickwit/quickwit-metrics/src/counter.rs @@ -264,8 +264,8 @@ macro_rules! counter { ) }; - // Parent extension via a pre-built LabelValues bundle. - // Same as the inline arm but hash and labels come from a LabelValues. + // Parent extension via a pre-built Labels bundle. + // Same as the inline arm but hash and labels come from a Labels. ( parent: $parent:expr, labels: $labels:expr $(,)? diff --git a/quickwit/quickwit-metrics/src/gauge.rs b/quickwit/quickwit-metrics/src/gauge.rs index ae441328416..9eeeabb737c 100644 --- a/quickwit/quickwit-metrics/src/gauge.rs +++ b/quickwit/quickwit-metrics/src/gauge.rs @@ -316,8 +316,8 @@ macro_rules! gauge { ) }; - // Parent extension via a pre-built LabelValues bundle. - // Same as the inline arm but hash and labels come from a LabelValues. + // Parent extension via a pre-built Labels bundle. + // Same as the inline arm but hash and labels come from a Labels. ( parent: $parent:expr, labels: $labels:expr $(,)? diff --git a/quickwit/quickwit-metrics/src/histogram.rs b/quickwit/quickwit-metrics/src/histogram.rs index 2dee894c32a..224d03e32b0 100644 --- a/quickwit/quickwit-metrics/src/histogram.rs +++ b/quickwit/quickwit-metrics/src/histogram.rs @@ -303,8 +303,8 @@ macro_rules! histogram { ) }; - // Parent extension via a pre-built LabelValues bundle. - // Same as the inline arm but hash and labels come from a LabelValues. + // Parent extension via a pre-built Labels bundle. + // Same as the inline arm but hash and labels come from a Labels. ( parent: $parent:expr, labels: $labels:expr $(,)? diff --git a/quickwit/quickwit-metrics/src/inner.rs b/quickwit/quickwit-metrics/src/inner.rs index 5db979403a9..06b1f124760 100644 --- a/quickwit/quickwit-metrics/src/inner.rs +++ b/quickwit/quickwit-metrics/src/inner.rs @@ -190,7 +190,7 @@ macro_rules! __metric_declaration { // ─── Metric extension helper ─── // -// Shared implementation for both metric extension arms (LabelValues and +// Shared implementation for both metric extension arms (Labels and // inline labels). Each metric macro pre-computes the hash and passes the // label-building logic as expression fragments. diff --git a/quickwit/quickwit-metrics/src/labels.rs b/quickwit/quickwit-metrics/src/labels.rs index 8e2a75f9d48..c6bbbf4d5d0 100644 --- a/quickwit/quickwit-metrics/src/labels.rs +++ b/quickwit/quickwit-metrics/src/labels.rs @@ -14,15 +14,17 @@ //! Reusable label templates for metric extension. //! -//! [`Labels`] holds label *names* at compile time; pair them with -//! values via the [`label_values!`] macro to get a [`LabelValues`] that +//! [`LabelNames`] holds label *names* at compile time; pair them with +//! values via the [`label_values!`] macro to get a [`Labels`] that //! the `labels:` macro arm can consume. This avoids repeating the same -//! label names at every call site and lets a single `LabelValues` be +//! label names at every call site and lets a single `Labels` be //! shared across counter, gauge, and histogram extensions. +use metrics::SharedString; + use crate::__key_hash; -/// Pairs a [`Labels`] template with concrete values, one per label name. +/// Pairs a [`LabelNames`] template with concrete values, one per label name. /// /// Each value is converted individually via `Into`, so you /// can freely mix `&'static str`, `String`, `Cow<'static, str>`, etc. @@ -34,15 +36,15 @@ use crate::__key_hash; /// # Example /// /// ```rust,ignore -/// const GC_LABELS: Labels<2> = Labels::new(["status", "split_type"]); +/// const GC_KEYS: LabelNames<2> = LabelNames::new(["status", "split_type"]); /// /// // All-static — zero allocation: -/// let lv = label_values!(GC_LABELS, ["success", "tantivy"]); +/// let lv = label_values!(GC_KEYS, ["success", "tantivy"]); /// /// // Mixed types — &'static str and String — just work: -/// let lv = label_values!(GC_LABELS, ["success", split_type.to_string()]); +/// let lv = label_values!(GC_KEYS, ["success", split_type.to_string()]); /// -/// // Reuse the same LabelValues across multiple metrics: +/// // Reuse the same Labels across multiple metrics: /// counter!(parent: GC_COUNTER, labels: lv).increment(1); /// gauge!(parent: GC_GAUGE, labels: lv).set(42.0); /// ``` @@ -53,33 +55,53 @@ macro_rules! label_values { }; } +/// Creates a const `Labels` from all-static key-value pairs. +/// +/// Every key and value must be `&'static str` literals. The result is a +/// `const` value — zero allocation, zero runtime cost. +/// +/// # Example +/// +/// ```rust,ignore +/// const LABELS: Labels<2> = labels!("env" => "prod", "region" => "us-east-1"); +/// ``` +#[macro_export] +macro_rules! labels { + ($($key:expr => $val:expr),+ $(,)?) => { + $crate::Labels::__from_parts( + [$($key),+], + [$($crate::__metrics::SharedString::from($val)),+], + ) + }; +} + /// A label-name template with a fixed number of slots. /// -/// `Labels` holds only the label *names* — it is `const`-constructible +/// `LabelNames` holds only the label *names* — it is `const`-constructible /// and carries no runtime data. Use the [`label_values!`] macro to pair -/// the names with concrete values, producing a [`LabelValues`] that +/// the names with concrete values, producing a [`Labels`] that /// the metric macros can consume. /// /// # Example /// /// ```rust,ignore -/// const SPLIT_LABELS: Labels<2> = Labels::new(["source", "level"]); +/// const SPLIT_KEYS: LabelNames<2> = LabelNames::new(["source", "level"]); /// /// // All the same type: -/// let lv = label_values!(SPLIT_LABELS, ["prod", "info"]); +/// let lv = label_values!(SPLIT_KEYS, ["prod", "info"]); /// /// // Mixed types: -/// let lv = label_values!(SPLIT_LABELS, [source_uid, level.to_string()]); +/// let lv = label_values!(SPLIT_KEYS, [source_uid, level.to_string()]); /// -/// // Reuse the same LabelValues across metrics: +/// // Reuse the same Labels across metrics: /// let c = counter!(parent: BASE_COUNTER, labels: lv); /// let g = gauge!(parent: BASE_GAUGE, labels: lv); /// ``` -pub struct Labels { +pub struct LabelNames { names: [&'static str; N], } -impl Labels { +impl LabelNames { /// Creates a label template from an array of label names. pub const fn new(names: [&'static str; N]) -> Self { Self { names } @@ -87,27 +109,36 @@ impl Labels { /// Internal plumbing used by [`label_values!`]. Not part of the public API. #[doc(hidden)] - pub fn __with_values>(&self, values: [V; N]) -> LabelValues { - LabelValues { + pub fn __with_values>(&self, values: [V; N]) -> Labels { + Labels { names: self.names, values: values.map(Into::into), } } } -/// Concrete label names + values produced by [`label_values!`]. +/// Concrete label names + values produced by [`label_values!`] or [`labels!`]. /// /// The `labels:` macro arm borrows the value internally, so a single /// instance can be reused across multiple metric calls. Cloning of the /// inner `SharedString` values only happens on the cold path (cache miss /// in the thread-local or global DashMap). #[derive(Clone)] -pub struct LabelValues { +pub struct Labels { names: [&'static str; N], values: [metrics::SharedString; N], } -impl LabelValues { +impl Labels { + /// Creates a fully-static `Labels` at compile time. + /// + /// Prefer the [`labels!`] macro which calls this constructor and + /// avoids repeating the array-index boilerplate. + #[doc(hidden)] + pub const fn __from_parts(names: [&'static str; N], values: [SharedString; N]) -> Self { + Self { names, values } + } + /// Computes an order-independent cache-key hash by folding per-label /// hashes into `seed` via commutative wrapping addition, so the result /// is fully composable with the parent's hash. diff --git a/quickwit/quickwit-metrics/src/lib.rs b/quickwit/quickwit-metrics/src/lib.rs index e674abbf0cc..0dd1875a3e6 100644 --- a/quickwit/quickwit-metrics/src/lib.rs +++ b/quickwit/quickwit-metrics/src/lib.rs @@ -78,17 +78,17 @@ //! by_method.increment(1); //! ``` //! -//! ### 3. Reusable label templates with `Labels` +//! ### 3. Reusable label templates with `LabelNames` //! -//! When several metrics share the same dynamic label names, use [`Labels`] +//! When several metrics share the same dynamic label names, use [`LabelNames`] //! to define the template once and pair it with values at each call site. -//! The resulting [`LabelValues`] is passed via the `labels:` arm of +//! The resulting [`Labels`] is passed via the `labels:` arm of //! any metric macro. //! //! ```rust,ignore //! use quickwit_metrics::*; //! -//! const ROUTE: Labels<2> = Labels::new(["method", "path"]); +//! const ROUTE: LabelNames<2> = LabelNames::new(["method", "path"]); //! //! fn on_request(method: &'static str, path: &'static str, duration: f64) { //! let route = label_values!(ROUTE, [method, path]); @@ -313,7 +313,7 @@ pub mod __inventory { pub use counter::Counter; pub use gauge::{Gauge, GaugeGuard}; pub use histogram::{Histogram, HistogramConfig, HistogramTimer}; -pub use labels::{LabelValues, Labels}; +pub use labels::{LabelNames, Labels}; // ─── metrics-rs re-exports ─── pub use metrics::{CounterFn, GaugeFn, HistogramFn}; pub use metrics_util::MetricKind; diff --git a/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs index 0f7c6c4d939..b13fdee8630 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs @@ -15,12 +15,12 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; -use quickwit_metrics::{Counter, Histogram, Labels, counter, histogram}; +use quickwit_metrics::{Counter, Histogram, LabelNames, counter, histogram}; -pub(crate) const OTLP_GRPC_LABELS: Labels<4> = - Labels::new(["service", "index", "transport", "format"]); -pub(crate) const OTLP_GRPC_ERROR_LABELS: Labels<5> = - Labels::new(["service", "index", "transport", "format", "error"]); +pub(crate) const OTLP_GRPC_LABELS: LabelNames<4> = + LabelNames::new(["service", "index", "transport", "format"]); +pub(crate) const OTLP_GRPC_ERROR_LABELS: LabelNames<5> = + LabelNames::new(["service", "index", "transport", "format", "error"]); pub(crate) static REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 7c7d4c15851..33ab4078415 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -19,9 +19,9 @@ use std::sync::LazyLock; use bytesize::ByteSize; use quickwit_common::metrics::{MaybeRegisteredCounter, exponential_buckets, linear_buckets}; -use quickwit_metrics::{Counter, Gauge, Histogram, Labels, counter, gauge, histogram}; +use quickwit_metrics::{Counter, Gauge, Histogram, LabelNames, counter, gauge, histogram}; -pub(crate) const STATUS_LABELS: Labels<1> = Labels::new(["status"]); +pub(crate) const STATUS_LABELS: LabelNames<1> = LabelNames::new(["status"]); fn print_if_not_null( field_name: &'static str, From 8a44574eeaaf3b67dfbbba609eee714f4b604e44 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 12:19:27 +0200 Subject: [PATCH 35/54] Replace fully qualified metric paths with use imports; add LabelNames for repeated keys - Add ACTOR_NAME and COMPONENT LabelNames constants in quickwit-indexing metrics - Use label_values!(ACTOR_NAME, [...]) and label_values!(COMPONENT, [...]) instead of repeating "actor_name" and "component" string literals - Replace all &quickwit_common::metrics::IN_FLIGHT_* qualified paths with direct use imports across quickwit-indexing, quickwit-ingest, and quickwit-serve Co-authored-by: Cursor --- quickwit/quickwit-common/src/thread_pool.rs | 7 ++--- .../src/actors/doc_processor.rs | 26 ++++++++++++------- .../quickwit-indexing/src/actors/indexer.rs | 3 ++- .../src/actors/indexing_pipeline.rs | 14 +++++----- .../src/actors/merge_pipeline.rs | 10 +++---- .../metrics_pipeline/parquet_uploader.rs | 6 ++--- .../processed_parquet_batch.rs | 3 ++- .../quickwit-indexing/src/actors/uploader.rs | 10 +++---- quickwit/quickwit-indexing/src/metrics.rs | 5 +++- .../src/models/processed_doc.rs | 3 ++- .../src/models/raw_doc_batch.rs | 5 ++-- .../src/source/ingest/mod.rs | 9 ++++--- quickwit/quickwit-indexing/src/source/mod.rs | 19 +++++++++----- .../quickwit-ingest/src/ingest_v2/fetch.rs | 7 ++--- .../quickwit-ingest/src/ingest_v2/ingester.rs | 3 ++- .../src/ingest_v2/replication.rs | 3 ++- .../quickwit-ingest/src/ingest_v2/router.rs | 3 ++- quickwit/quickwit-serve/src/decompression.rs | 3 ++- 18 files changed, 82 insertions(+), 57 deletions(-) diff --git a/quickwit/quickwit-common/src/thread_pool.rs b/quickwit/quickwit-common/src/thread_pool.rs index 25ef8122069..509459b3d93 100644 --- a/quickwit/quickwit-common/src/thread_pool.rs +++ b/quickwit/quickwit-common/src/thread_pool.rs @@ -16,7 +16,7 @@ use std::fmt; use std::sync::Arc; use futures::{Future, TryFutureExt}; -use quickwit_metrics::{Gauge, GaugeGuard, gauge}; +use quickwit_metrics::{Gauge, GaugeGuard, gauge, labels}; use tokio::sync::oneshot; use tracing::error; @@ -60,8 +60,9 @@ impl ThreadPool { let thread_pool = rayon_pool_builder .build() .expect("failed to spawn thread pool"); - let ongoing_tasks = gauge!(parent: THREAD_POOL_ONGOING_TASKS, "pool" => name); - let pending_tasks = gauge!(parent: THREAD_POOL_PENDING_TASKS, "pool" => name); + let labels = labels!("pool" => name); + let ongoing_tasks = gauge!(parent: THREAD_POOL_ONGOING_TASKS, labels: labels); + let pending_tasks = gauge!(parent: THREAD_POOL_PENDING_TASKS, labels: labels); ThreadPool { thread_pool: Arc::new(thread_pool), ongoing_tasks, diff --git a/quickwit/quickwit-indexing/src/actors/doc_processor.rs b/quickwit/quickwit-indexing/src/actors/doc_processor.rs index b2f5e4a95f8..f9e4b555b54 100644 --- a/quickwit/quickwit-indexing/src/actors/doc_processor.rs +++ b/quickwit/quickwit-indexing/src/actors/doc_processor.rs @@ -24,7 +24,7 @@ use quickwit_common::rate_limited_tracing::rate_limited_warn; use quickwit_common::runtimes::RuntimeType; use quickwit_config::{SourceInputFormat, TransformConfig}; use quickwit_doc_mapper::{DocMapper, DocParsingError, JsonObject}; -use quickwit_metrics::{Counter, counter}; +use quickwit_metrics::{Counter, counter, labels}; use quickwit_opentelemetry::otlp::{ JsonLogIterator, JsonSpanIterator, OtlpLogsError, OtlpTracesError, parse_otlp_logs_json, parse_otlp_logs_protobuf, parse_otlp_spans_json, parse_otlp_spans_protobuf, @@ -239,7 +239,8 @@ impl Iterator for JsonDocIterator { } impl From> for JsonDocIterator -where E: Into +where + E: Into, { fn from(result: Result) -> Self { match result { @@ -276,26 +277,31 @@ pub struct DocProcessorCounter { impl Serialize for DocProcessorCounter { fn serialize(&self, serializer: S) -> Result - where S: serde::Serializer { + where + S: serde::Serializer, + { serializer.serialize_u64(self.get_num_docs()) } } impl DocProcessorCounter { - fn for_index_and_doc_processor_outcome(index: &str, outcome: &str) -> DocProcessorCounter { - let index_label = quickwit_common::metrics::index_label(index).to_string(); - let outcome = outcome.to_string(); + fn for_index_and_doc_processor_outcome( + index: &str, + outcome: &'static str, + ) -> DocProcessorCounter { + let labels = labels!( + "index" => quickwit_common::metrics::index_label(index).to_string(), + "docs_processed_status" => outcome + ); DocProcessorCounter { num_docs: Default::default(), num_docs_metric: counter!( parent: PROCESSED_DOCS_TOTAL, - "index" => index_label.clone(), - "docs_processed_status" => outcome.clone(), + labels: labels, ), num_bytes_metric: counter!( parent: PROCESSED_BYTES, - "index" => index_label, - "docs_processed_status" => outcome, + labels: labels, ), } } diff --git a/quickwit/quickwit-indexing/src/actors/indexer.rs b/quickwit/quickwit-indexing/src/actors/indexer.rs index 8525fc1550a..0d49e6db3af 100644 --- a/quickwit/quickwit-indexing/src/actors/indexer.rs +++ b/quickwit/quickwit-indexing/src/actors/indexer.rs @@ -27,6 +27,7 @@ use quickwit_actors::{ Actor, ActorContext, ActorExitStatus, Command, Handler, Mailbox, QueueCapacity, }; use quickwit_common::io::IoControls; +use quickwit_common::metrics::IN_FLIGHT_INDEX_WRITER; use quickwit_common::runtimes::RuntimeType; use quickwit_common::temp_dir::TempDirectory; use quickwit_config::IndexingSettings; @@ -232,7 +233,7 @@ impl IndexerState { publish_lock, publish_token_opt, last_delete_opstamp, - memory_usage: GaugeGuard::new(&quickwit_common::metrics::IN_FLIGHT_INDEX_WRITER, 0.0), + memory_usage: GaugeGuard::new(&IN_FLIGHT_INDEX_WRITER, 0.0), cooperative_indexing_period, split_builders_guard, }; diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index c4a18218ed7..67572bc48dc 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -28,7 +28,7 @@ use quickwit_common::temp_dir::TempDirectory; use quickwit_config::{IndexingSettings, RetentionPolicy, SourceConfig}; use quickwit_doc_mapper::DocMapper; use quickwit_ingest::IngesterPool; -use quickwit_metrics::{GaugeGuard, counter, gauge}; +use quickwit_metrics::{GaugeGuard, counter, gauge, label_values}; use quickwit_proto::indexing::IndexingPipelineId; use quickwit_proto::metastore::{MetastoreError, MetastoreServiceClient}; use quickwit_proto::types::ShardId; @@ -45,7 +45,7 @@ use crate::actors::sequencer::Sequencer; use crate::actors::uploader::UploaderType; use crate::actors::{Publisher, Uploader}; use crate::merge_policy::MergePolicy; -use crate::metrics::{BACKPRESSURE_MICROS, INDEXING_PIPELINES}; +use crate::metrics::{ACTOR_NAME, BACKPRESSURE_MICROS, INDEXING_PIPELINES}; use crate::models::IndexingStatistics; use crate::source::{ AssignShards, Assignment, SourceActor, SourceRuntime, quickwit_supported_sources, @@ -315,7 +315,7 @@ impl IndexingPipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - "actor_name" => "publisher", + labels: label_values!(ACTOR_NAME, ["publisher"]), )) .spawn(publisher); @@ -324,7 +324,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - "actor_name" => "sequencer", + labels: label_values!(ACTOR_NAME, ["sequencer"]), )) .set_kill_switch(self.kill_switch.clone()) .spawn(sequencer); @@ -344,7 +344,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - "actor_name" => "uploader", + labels: label_values!(ACTOR_NAME, ["uploader"]), )) .set_kill_switch(self.kill_switch.clone()) .spawn(uploader); @@ -378,7 +378,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - "actor_name" => "indexer", + labels: label_values!(ACTOR_NAME, ["indexer"]), )) .set_kill_switch(self.kill_switch.clone()) .spawn(indexer); @@ -395,7 +395,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - "actor_name" => "doc_processor", + labels: label_values!(ACTOR_NAME, ["doc_processor"]), )) .set_kill_switch(self.kill_switch.clone()) .spawn(doc_processor); diff --git a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs index ee399259d72..f2889b3bc19 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs @@ -30,7 +30,7 @@ use quickwit_metastore::{ ListSplitsQuery, ListSplitsRequestExt, MetastoreServiceStreamSplitsExt, SplitMetadata, SplitState, }; -use quickwit_metrics::counter; +use quickwit_metrics::{counter, label_values}; use quickwit_proto::indexing::MergePipelineId; use quickwit_proto::metastore::{ ListSplitsRequest, MetastoreError, MetastoreResult, MetastoreService, MetastoreServiceClient, @@ -45,7 +45,7 @@ use crate::actors::pipeline_shared::wait_duration_before_retry; use crate::actors::publisher::DisconnectMergePlanner; use crate::actors::{MergeSchedulerService, Publisher, Uploader, UploaderType}; use crate::merge_policy::MergePolicy; -use crate::metrics::{BACKPRESSURE_MICROS, ONGOING_MERGE_OPERATIONS}; +use crate::metrics::{ACTOR_NAME, BACKPRESSURE_MICROS, ONGOING_MERGE_OPERATIONS}; use crate::models::MergeStatistics; use crate::split_store::IndexingSplitStore; @@ -276,7 +276,7 @@ impl MergePipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - "actor_name" => "merge_publisher", + labels: label_values!(ACTOR_NAME, ["merge_publisher"]), )) .spawn(merge_publisher); @@ -325,7 +325,7 @@ impl MergePipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - "actor_name" => "merge_executor", + labels: label_values!(ACTOR_NAME, ["merge_executor"]), )) .spawn(merge_executor); @@ -340,7 +340,7 @@ impl MergePipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - "actor_name" => "merge_split_downloader", + labels: label_values!(ACTOR_NAME, ["merge_split_downloader"]), )) .spawn(merge_split_downloader); diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs index 8781e7570e9..c06c5660815 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs @@ -27,7 +27,7 @@ use async_trait::async_trait; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, Mailbox, QueueCapacity}; use quickwit_common::spawn_named_task; use quickwit_metastore::StageParquetSplitsRequestExt; -use quickwit_metrics::gauge; +use quickwit_metrics::{gauge, label_values}; use quickwit_parquet_engine::split::{ParquetSplitKind, ParquetSplitMetadata}; use quickwit_proto::metastore::{MetastoreService, MetastoreServiceClient}; use quickwit_storage::Storage; @@ -37,7 +37,7 @@ use tracing::{Instrument, Span, debug, info, instrument, warn}; use super::{ParquetSplitBatch, ParquetSplitsUpdate}; use crate::actors::sequencer::{Sequencer, SequencerCommand}; use crate::actors::{Publisher, UploaderCounters, UploaderType}; -use crate::metrics::AVAILABLE_CONCURRENT_UPLOAD_PERMITS; +use crate::metrics::{AVAILABLE_CONCURRENT_UPLOAD_PERMITS, COMPONENT}; /// Concurrent upload permits for metrics uploader. /// Uses same permit pool as indexer uploads. @@ -125,7 +125,7 @@ impl ParquetUploader { .get_or_init(|| Semaphore::const_new(self.max_concurrent_uploads)); let gauge = gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - "component" => "metrics", + labels: label_values!(COMPONENT, ["metrics"]), ); gauge.set(concurrent_upload_permits.available_permits() as f64); concurrent_upload_permits diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs index 7078dd24f21..f0493e2c3a0 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs @@ -21,6 +21,7 @@ use std::fmt; use arrow::record_batch::RecordBatch; use quickwit_metastore::checkpoint::SourceCheckpointDelta; +use quickwit_common::metrics::IN_FLIGHT_INDEXER_MAILBOX; use quickwit_metrics::GaugeGuard; /// Batch of parquet data as Arrow RecordBatch for the parquet indexing pipeline. @@ -66,7 +67,7 @@ impl ProcessedParquetBatch { .sum(); let gauge_guard = GaugeGuard::new( - &quickwit_common::metrics::IN_FLIGHT_INDEXER_MAILBOX, + &IN_FLIGHT_INDEXER_MAILBOX, memory_size as f64, ); diff --git a/quickwit/quickwit-indexing/src/actors/uploader.rs b/quickwit/quickwit-indexing/src/actors/uploader.rs index b8b13ddc606..b9fb880da7d 100644 --- a/quickwit/quickwit-indexing/src/actors/uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/uploader.rs @@ -28,7 +28,7 @@ use quickwit_common::spawn_named_task; use quickwit_config::RetentionPolicy; use quickwit_metastore::checkpoint::IndexCheckpointDelta; use quickwit_metastore::{SplitMetadata, StageSplitsRequestExt}; -use quickwit_metrics::gauge; +use quickwit_metrics::{gauge, label_values}; use quickwit_proto::metastore::{MetastoreService, MetastoreServiceClient, StageSplitsRequest}; use quickwit_proto::search::{ReportSplit, ReportSplitsRequest}; use quickwit_proto::types::{IndexUid, PublishToken}; @@ -41,7 +41,7 @@ use tracing::{Instrument, Span, debug, info, instrument, warn}; use crate::actors::Publisher; use crate::actors::sequencer::{Sequencer, SequencerCommand}; use crate::merge_policy::{MergePolicy, MergeTask}; -use crate::metrics::AVAILABLE_CONCURRENT_UPLOAD_PERMITS; +use crate::metrics::{AVAILABLE_CONCURRENT_UPLOAD_PERMITS, COMPONENT}; use crate::models::{ EmptySplit, PackagedSplit, PackagedSplitBatch, PublishLock, SplitsUpdate, create_split_metadata, }; @@ -206,21 +206,21 @@ impl Uploader { &CONCURRENT_UPLOAD_PERMITS_INDEX, gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - "component" => "indexer", + labels: label_values!(COMPONENT, ["indexer"]), ), ), UploaderType::MergeUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - "component" => "merger", + labels: label_values!(COMPONENT, ["merger"]), ), ), UploaderType::DeleteUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - "component" => "merger", + labels: label_values!(COMPONENT, ["merger"]), ), ), }; diff --git a/quickwit/quickwit-indexing/src/metrics.rs b/quickwit/quickwit-indexing/src/metrics.rs index b15b0e6eb8d..08c3c0f3911 100644 --- a/quickwit/quickwit-indexing/src/metrics.rs +++ b/quickwit/quickwit-indexing/src/metrics.rs @@ -14,7 +14,10 @@ use std::sync::LazyLock; -use quickwit_metrics::{Counter, Gauge, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, LabelNames, counter, gauge}; + +pub(crate) const ACTOR_NAME: LabelNames<1> = LabelNames::new(["actor_name"]); +pub(crate) const COMPONENT: LabelNames<1> = LabelNames::new(["component"]); pub(crate) static PROCESSED_DOCS_TOTAL: LazyLock = LazyLock::new(|| { counter!( diff --git a/quickwit/quickwit-indexing/src/models/processed_doc.rs b/quickwit/quickwit-indexing/src/models/processed_doc.rs index e6957734d0e..f7906a9c4ff 100644 --- a/quickwit/quickwit-indexing/src/models/processed_doc.rs +++ b/quickwit/quickwit-indexing/src/models/processed_doc.rs @@ -15,6 +15,7 @@ use std::fmt; use quickwit_metastore::checkpoint::SourceCheckpointDelta; +use quickwit_common::metrics::IN_FLIGHT_INDEXER_MAILBOX; use quickwit_metrics::GaugeGuard; use tantivy::{DateTime, TantivyDocument}; @@ -52,7 +53,7 @@ impl ProcessedDocBatch { ) -> Self { let delta = docs.iter().map(|doc| doc.num_bytes as i64).sum::(); let gauge_guard = GaugeGuard::new( - &quickwit_common::metrics::IN_FLIGHT_INDEXER_MAILBOX, + &IN_FLIGHT_INDEXER_MAILBOX, delta as f64, ); Self { diff --git a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs index 901e51c527b..8a272b3d414 100644 --- a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs +++ b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs @@ -16,6 +16,7 @@ use std::fmt; use bytes::Bytes; use quickwit_metastore::checkpoint::SourceCheckpointDelta; +use quickwit_common::metrics::IN_FLIGHT_DOC_PROCESSOR_MAILBOX; use quickwit_metrics::GaugeGuard; pub struct RawDocBatch { @@ -35,7 +36,7 @@ impl RawDocBatch { ) -> Self { let delta = docs.iter().map(|doc| doc.len() as i64).sum::(); let gauge_guard = GaugeGuard::new( - &quickwit_common::metrics::IN_FLIGHT_DOC_PROCESSOR_MAILBOX, + &IN_FLIGHT_DOC_PROCESSOR_MAILBOX, delta as f64, ); @@ -69,7 +70,7 @@ impl fmt::Debug for RawDocBatch { impl Default for RawDocBatch { fn default() -> Self { let _gauge_guard = GaugeGuard::new( - &quickwit_common::metrics::IN_FLIGHT_DOC_PROCESSOR_MAILBOX, + &IN_FLIGHT_DOC_PROCESSOR_MAILBOX, 0.0, ); Self { diff --git a/quickwit/quickwit-indexing/src/source/ingest/mod.rs b/quickwit/quickwit-indexing/src/source/ingest/mod.rs index cc7c0f1772c..783f0ef28c8 100644 --- a/quickwit/quickwit-indexing/src/source/ingest/mod.rs +++ b/quickwit/quickwit-indexing/src/source/ingest/mod.rs @@ -665,6 +665,7 @@ mod tests { use itertools::Itertools; use quickwit_actors::{ActorContext, Universe}; use quickwit_common::ServiceStream; + use quickwit_common::metrics::IN_FLIGHT_FETCH_STREAM; use quickwit_common::stream_utils::InFlightValue; use quickwit_config::{IndexingSettings, SourceConfig, SourceParams}; use quickwit_ingest::IngesterPoolEntry; @@ -1436,7 +1437,7 @@ mod tests { let in_flight_value = InFlightValue::new( fetch_message, batch_size, - &quickwit_common::metrics::IN_FLIGHT_FETCH_STREAM, + &IN_FLIGHT_FETCH_STREAM, ); fetch_message_tx.send(Ok(in_flight_value)).await.unwrap(); @@ -1453,7 +1454,7 @@ mod tests { let in_flight_value = InFlightValue::new( fetch_message, batch_size, - &quickwit_common::metrics::IN_FLIGHT_FETCH_STREAM, + &IN_FLIGHT_FETCH_STREAM, ); fetch_message_tx.send(Ok(in_flight_value)).await.unwrap(); @@ -1467,7 +1468,7 @@ mod tests { let in_flight_value = InFlightValue::new( fetch_message, ByteSize(0), - &quickwit_common::metrics::IN_FLIGHT_FETCH_STREAM, + &IN_FLIGHT_FETCH_STREAM, ); fetch_message_tx.send(Ok(in_flight_value)).await.unwrap(); @@ -1528,7 +1529,7 @@ mod tests { let in_flight_value = InFlightValue::new( fetch_message, batch_size, - &quickwit_common::metrics::IN_FLIGHT_FETCH_STREAM, + &IN_FLIGHT_FETCH_STREAM, ); fetch_message_tx.send(Ok(in_flight_value)).await.unwrap(); diff --git a/quickwit/quickwit-indexing/src/source/mod.rs b/quickwit/quickwit-indexing/src/source/mod.rs index 78ed27c7dce..e2d5e1910fa 100644 --- a/quickwit/quickwit-indexing/src/source/mod.rs +++ b/quickwit/quickwit-indexing/src/source/mod.rs @@ -100,6 +100,11 @@ use quickwit_config::{ use quickwit_ingest::IngesterPool; use quickwit_metastore::IndexMetadataResponseExt; use quickwit_metastore::checkpoint::{SourceCheckpoint, SourceCheckpointDelta}; +use quickwit_common::metrics::{ + IN_FLIGHT_FILE_SOURCE, IN_FLIGHT_INGEST_SOURCE, IN_FLIGHT_KAFKA_SOURCE, + IN_FLIGHT_KINESIS_SOURCE, IN_FLIGHT_OTHER_SOURCE, IN_FLIGHT_PUBSUB_SOURCE, + IN_FLIGHT_PULSAR_SOURCE, +}; use quickwit_metrics::GaugeGuard; use quickwit_proto::indexing::IndexingPipelineId; use quickwit_proto::metastore::{ @@ -529,13 +534,13 @@ impl BatchBuilder { pub fn with_capacity(capacity: usize, source_type: SourceType) -> Self { let gauge = match source_type { - SourceType::File => &quickwit_common::metrics::IN_FLIGHT_FILE_SOURCE, - SourceType::IngestV2 => &quickwit_common::metrics::IN_FLIGHT_INGEST_SOURCE, - SourceType::Kafka => &quickwit_common::metrics::IN_FLIGHT_KAFKA_SOURCE, - SourceType::Kinesis => &quickwit_common::metrics::IN_FLIGHT_KINESIS_SOURCE, - SourceType::PubSub => &quickwit_common::metrics::IN_FLIGHT_PUBSUB_SOURCE, - SourceType::Pulsar => &quickwit_common::metrics::IN_FLIGHT_PULSAR_SOURCE, - _ => &quickwit_common::metrics::IN_FLIGHT_OTHER_SOURCE, + SourceType::File => &IN_FLIGHT_FILE_SOURCE, + SourceType::IngestV2 => &IN_FLIGHT_INGEST_SOURCE, + SourceType::Kafka => &IN_FLIGHT_KAFKA_SOURCE, + SourceType::Kinesis => &IN_FLIGHT_KINESIS_SOURCE, + SourceType::PubSub => &IN_FLIGHT_PUBSUB_SOURCE, + SourceType::Pulsar => &IN_FLIGHT_PULSAR_SOURCE, + _ => &IN_FLIGHT_OTHER_SOURCE, }; let gauge_guard = GaugeGuard::new(gauge, 0.0); diff --git a/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs b/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs index 3d45ef7f571..3eb35698fb7 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs @@ -22,6 +22,7 @@ use bytes::{BufMut, BytesMut}; use bytesize::ByteSize; use futures::StreamExt; use mrecordlog::Record; +use quickwit_common::metrics::{IN_FLIGHT_FETCH_STREAM, IN_FLIGHT_MULTI_FETCH_STREAM}; use quickwit_common::retry::RetryParams; use quickwit_common::stream_utils::{InFlightValue, TrackedSender}; use quickwit_common::{ServiceStream, spawn_named_task}; @@ -82,7 +83,7 @@ impl FetchStreamTask { .unwrap_or_default(); let (fetch_message_tx, fetch_stream) = ServiceStream::new_bounded_with_gauge( 3, - &quickwit_common::metrics::IN_FLIGHT_FETCH_STREAM, + &IN_FLIGHT_FETCH_STREAM, ); let mut fetch_task = Self { shard_id: open_fetch_stream_request.shard_id().clone(), @@ -560,7 +561,7 @@ async fn fault_tolerant_fetch_stream( let in_flight_value = InFlightValue::new( fetch_message, batch_size, - &quickwit_common::metrics::IN_FLIGHT_MULTI_FETCH_STREAM, + &IN_FLIGHT_MULTI_FETCH_STREAM, ); if fetch_message_tx.send(Ok(in_flight_value)).await.is_err() { // The consumer was dropped. @@ -573,7 +574,7 @@ async fn fault_tolerant_fetch_stream( let in_flight_value = InFlightValue::new( fetch_message, ByteSize(0), - &quickwit_common::metrics::IN_FLIGHT_MULTI_FETCH_STREAM, + &IN_FLIGHT_MULTI_FETCH_STREAM, ); // We ignore the send error if the consumer was dropped because we're going // to return anyway. diff --git a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs index f3a66cadc33..1c0c09e6b93 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs @@ -25,6 +25,7 @@ use futures::StreamExt; use futures::stream::FuturesUnordered; use mrecordlog::error::CreateQueueError; use quickwit_cluster::Cluster; +use quickwit_common::metrics::IN_FLIGHT_INGESTER_PERSIST; use quickwit_common::pretty::PrettyDisplay; use quickwit_common::pubsub::{EventBroker, EventSubscriber}; use quickwit_common::rate_limiter::{RateLimiter, RateLimiterSettings}; @@ -1127,7 +1128,7 @@ impl IngesterService for Ingester { }) .sum::(); let _gauge_guard = GaugeGuard::new( - &quickwit_common::metrics::IN_FLIGHT_INGESTER_PERSIST, + &IN_FLIGHT_INGESTER_PERSIST, request_size_bytes as f64, ); diff --git a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs index 3542acca6f0..e7b5ca5fdca 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs @@ -18,6 +18,7 @@ use std::time::{Duration, Instant}; use bytesize::ByteSize; use futures::{Future, StreamExt}; use mrecordlog::error::CreateQueueError; +use quickwit_common::metrics::IN_FLIGHT_INGESTER_REPLICATE; use quickwit_common::{ServiceStream, rate_limited_warn}; use quickwit_metrics::GaugeGuard; use quickwit_proto::ingest::ingester::{ @@ -505,7 +506,7 @@ impl ReplicationTask { } let request_size_bytes = replicate_request.num_bytes(); let _gauge_guard = GaugeGuard::new( - &quickwit_common::metrics::IN_FLIGHT_INGESTER_REPLICATE, + &IN_FLIGHT_INGESTER_REPLICATE, request_size_bytes as f64, ); diff --git a/quickwit/quickwit-ingest/src/ingest_v2/router.rs b/quickwit/quickwit-ingest/src/ingest_v2/router.rs index 26fd917c609..2fe4adf5f25 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/router.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/router.rs @@ -20,6 +20,7 @@ use std::time::Duration; use async_trait::async_trait; use futures::stream::FuturesUnordered; use futures::{Future, StreamExt}; +use quickwit_common::metrics::IN_FLIGHT_INGEST_ROUTER; use quickwit_common::pubsub::{EventBroker, EventSubscriber}; use quickwit_common::{rate_limited_error, rate_limited_warn}; use quickwit_metrics::{GaugeGuard, counter}; @@ -570,7 +571,7 @@ impl IngestRouterService for IngestRouter { let request_size_bytes = ingest_request.num_bytes(); let _gauge_guard = GaugeGuard::new( - &quickwit_common::metrics::IN_FLIGHT_INGEST_ROUTER, + &IN_FLIGHT_INGEST_ROUTER, request_size_bytes as f64, ); let num_subrequests = ingest_request.subrequests.len(); diff --git a/quickwit/quickwit-serve/src/decompression.rs b/quickwit/quickwit-serve/src/decompression.rs index e3fb7a833a0..03f8eb3b00f 100644 --- a/quickwit/quickwit-serve/src/decompression.rs +++ b/quickwit/quickwit-serve/src/decompression.rs @@ -17,6 +17,7 @@ use std::sync::LazyLock; use bytes::Bytes; use flate2::read::{MultiGzDecoder, ZlibDecoder}; +use quickwit_common::metrics::IN_FLIGHT_REST_SERVER; use quickwit_common::thread_pool::run_cpu_intensive; use quickwit_metrics::GaugeGuard; use thiserror::Error; @@ -115,7 +116,7 @@ pub(crate) struct Body { impl Body { pub fn new(content: Bytes, load_shield_permit: LoadShieldPermit) -> Body { let gauge_guard = GaugeGuard::new( - &quickwit_common::metrics::IN_FLIGHT_REST_SERVER, + &IN_FLIGHT_REST_SERVER, content.len() as f64, ); Body { From fe626ebce99765c27d7aaff76be81aecd219ada2 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 12:35:53 +0200 Subject: [PATCH 36/54] Refactor quickwit-ingest metrics: use direct imports and LabelNames constants - Replace all `metrics::INGEST_RESULT_*` qualified paths in router.rs with direct imports - Move VALIDITY LabelNames from ingest_v2/metrics.rs to crate-level metrics.rs for shared use across ingest_api_service and ingester - Replace raw "validity" => "value" with label_values!(VALIDITY, [...]) in ingest_api_service.rs - Use labels! macro in with_lock_metrics! macro for operation/type labels - Import IN_FLIGHT_WAL directly in ingest_v2/metrics.rs Co-authored-by: Cursor --- .../quickwit-ingest/src/ingest_api_service.rs | 9 +-- .../quickwit-ingest/src/ingest_v2/ingester.rs | 24 ++++---- .../quickwit-ingest/src/ingest_v2/metrics.rs | 7 ++- .../quickwit-ingest/src/ingest_v2/router.rs | 55 +++++++++++-------- quickwit/quickwit-ingest/src/lib.rs | 10 ++-- quickwit/quickwit-ingest/src/metrics.rs | 4 +- 6 files changed, 60 insertions(+), 49 deletions(-) diff --git a/quickwit/quickwit-ingest/src/ingest_api_service.rs b/quickwit/quickwit-ingest/src/ingest_api_service.rs index c7d2d67d31b..fef3119f1bd 100644 --- a/quickwit/quickwit-ingest/src/ingest_api_service.rs +++ b/quickwit/quickwit-ingest/src/ingest_api_service.rs @@ -22,12 +22,12 @@ use quickwit_actors::{ }; use quickwit_common::runtimes::RuntimeType; use quickwit_common::tower::Cost; -use quickwit_metrics::counter; +use quickwit_metrics::{counter, label_values}; use quickwit_proto::ingest::RateLimitingCause; use tracing::{error, info}; use ulid::Ulid; -use crate::metrics::{DOCS_BYTES_TOTAL, DOCS_TOTAL}; +use crate::metrics::{DOCS_BYTES_TOTAL, DOCS_TOTAL, VALIDITY}; use crate::notifications::Notifications; use crate::{ CommitType, CreateQueueIfNotExistsRequest, CreateQueueIfNotExistsResponse, CreateQueueRequest, @@ -202,12 +202,13 @@ impl IngestApiService { } num_docs += batch_num_docs; + let labels = label_values!(VALIDITY, ["valid"]); counter!( parent: DOCS_BYTES_TOTAL, - "validity" => "valid", + labels: labels, ) .increment(batch_num_bytes as u64); - counter!(parent: DOCS_TOTAL, "validity" => "valid").increment(batch_num_docs as u64); + counter!(parent: DOCS_TOTAL, labels: labels).increment(batch_num_docs as u64); } // TODO we could fsync here and disable autosync to have better i/o perfs. Ok(( diff --git a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs index 1c0c09e6b93..12e7c6b3a4f 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs @@ -30,7 +30,7 @@ use quickwit_common::pretty::PrettyDisplay; use quickwit_common::pubsub::{EventBroker, EventSubscriber}; use quickwit_common::rate_limiter::{RateLimiter, RateLimiterSettings}; use quickwit_common::{ServiceStream, rate_limited_error, rate_limited_warn}; -use quickwit_metrics::{GaugeGuard, counter}; +use quickwit_metrics::{GaugeGuard, counter, label_values}; use quickwit_proto::control_plane::{ AdviseResetShardsRequest, ControlPlaneService, ControlPlaneServiceClient, }; @@ -63,9 +63,9 @@ use super::replication::{ }; use super::state::{IngesterState, InnerIngesterState, WeakIngesterState}; use crate::ingest_v2::doc_mapper::get_or_try_build_doc_mapper; -use crate::ingest_v2::metrics::{RESET_SHARDS_OPERATIONS_TOTAL, report_wal_usage}; +use crate::ingest_v2::metrics::{RESET_SHARDS_OPERATIONS_TOTAL, STATUS, report_wal_usage}; use crate::ingest_v2::models::IngesterShardType; -use crate::metrics::{DOCS_BYTES_TOTAL, DOCS_TOTAL}; +use crate::metrics::{DOCS_BYTES_TOTAL, DOCS_TOTAL, VALIDITY}; use crate::mrecordlog_async::MultiRecordLogAsync; use crate::{FollowerId, estimate_size, with_lock_metrics}; @@ -335,7 +335,7 @@ impl Ingester { ); counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, - "status" => "success", + labels: label_values!(STATUS, ["success"]), ) .increment(1); @@ -347,7 +347,7 @@ impl Ingester { counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, - "status" => "error", + labels: label_values!(STATUS, ["error"]), ) .increment(1); } @@ -356,7 +356,7 @@ impl Ingester { counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, - "status" => "timeout", + labels: label_values!(STATUS, ["timeout"]), ) .increment(1); } @@ -573,12 +573,12 @@ impl Ingester { if valid_doc_batch.is_empty() { counter!( parent: DOCS_TOTAL, - "validity" => "invalid", + labels: label_values!(VALIDITY, ["invalid"]), ) .increment(parse_failures.len() as u64); counter!( parent: DOCS_BYTES_TOTAL, - "validity" => "invalid", + labels: label_values!(VALIDITY, ["invalid"]), ) .increment(original_batch_num_bytes); let persist_success = PersistSuccess { @@ -596,23 +596,23 @@ impl Ingester { counter!( parent: DOCS_TOTAL, - "validity" => "valid", + labels: label_values!(VALIDITY, ["valid"]), ) .increment(valid_doc_batch.num_docs() as u64); counter!( parent: DOCS_BYTES_TOTAL, - "validity" => "valid", + labels: label_values!(VALIDITY, ["valid"]), ) .increment(valid_doc_batch.num_bytes() as u64); if !parse_failures.is_empty() { counter!( parent: DOCS_TOTAL, - "validity" => "invalid", + labels: label_values!(VALIDITY, ["invalid"]), ) .increment(parse_failures.len() as u64); counter!( parent: DOCS_BYTES_TOTAL, - "validity" => "invalid", + labels: label_values!(VALIDITY, ["invalid"]), ) .increment(original_batch_num_bytes - valid_doc_batch.num_bytes() as u64); } diff --git a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs index 6e7bf351722..3cf40467e98 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs @@ -16,7 +16,10 @@ use std::sync::LazyLock; use mrecordlog::ResourceUsage; use quickwit_common::metrics::{exponential_buckets, linear_buckets}; -use quickwit_metrics::{Counter, Gauge, Histogram, counter, gauge, histogram}; +use quickwit_common::metrics::IN_FLIGHT_WAL; +use quickwit_metrics::{Counter, Gauge, Histogram, LabelNames, counter, gauge, histogram}; + +pub(super) const STATUS: LabelNames<1> = LabelNames::new(["status"]); static INGEST_RESULT_TOTAL: LazyLock = LazyLock::new(|| { counter!( @@ -155,6 +158,6 @@ pub(super) static WAL_MEMORY_USED_BYTES: LazyLock = LazyLock::new(|| { pub(super) fn report_wal_usage(wal_usage: ResourceUsage) { WAL_DISK_USED_BYTES.set(wal_usage.disk_used_bytes as f64); - quickwit_common::metrics::IN_FLIGHT_WAL.set(wal_usage.memory_allocated_bytes as f64); + IN_FLIGHT_WAL.set(wal_usage.memory_allocated_bytes as f64); WAL_MEMORY_USED_BYTES.set(wal_usage.memory_used_bytes as f64); } diff --git a/quickwit/quickwit-ingest/src/ingest_v2/router.rs b/quickwit/quickwit-ingest/src/ingest_v2/router.rs index 2fe4adf5f25..69273ae27d7 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/router.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/router.rs @@ -48,9 +48,16 @@ use super::debouncing::{ use super::ingester::PERSIST_REQUEST_TIMEOUT; use super::routing_table::RoutingTable; use super::workbench::IngestWorkbench; -use super::{IngesterPool, metrics, pending_subrequests}; +use super::{IngesterPool, pending_subrequests}; use crate::get_ingest_router_buffer_size; -use crate::ingest_v2::metrics::INGEST_ATTEMPTS; +use crate::ingest_v2::metrics::{ + INGEST_ATTEMPTS, INGEST_RESULT_CIRCUIT_BREAKER, INGEST_RESULT_INDEX_NOT_FOUND, + INGEST_RESULT_INTERNAL, INGEST_RESULT_LOAD_SHEDDING, INGEST_RESULT_NO_SHARDS_AVAILABLE, + INGEST_RESULT_ROUTER_LOAD_SHEDDING, INGEST_RESULT_ROUTER_TIMEOUT, + INGEST_RESULT_SHARD_NOT_FOUND, INGEST_RESULT_SHARD_RATE_LIMITED, INGEST_RESULT_SOURCE_NOT_FOUND, + INGEST_RESULT_SUCCESS, INGEST_RESULT_TIMEOUT, INGEST_RESULT_UNAVAILABLE, + INGEST_RESULT_UNSPECIFIED, INGEST_RESULT_WAL_FULL, +}; /// Duration after which ingest requests time out with [`IngestV2Error::Timeout`]. fn ingest_request_timeout() -> Duration { @@ -495,35 +502,35 @@ fn update_ingest_metrics(ingest_result: &IngestV2Result, num_s let num_subrequests = num_subrequests as u64; match ingest_result { Ok(ingest_response) => { - metrics::INGEST_RESULT_SUCCESS.increment(ingest_response.successes.len() as u64); + INGEST_RESULT_SUCCESS.increment(ingest_response.successes.len() as u64); for ingest_failure in &ingest_response.failures { match ingest_failure.reason() { IngestFailureReason::CircuitBreaker => { - metrics::INGEST_RESULT_CIRCUIT_BREAKER.increment(1); + INGEST_RESULT_CIRCUIT_BREAKER.increment(1); } IngestFailureReason::Unspecified => { - metrics::INGEST_RESULT_UNSPECIFIED.increment(1) + INGEST_RESULT_UNSPECIFIED.increment(1) } IngestFailureReason::IndexNotFound => { - metrics::INGEST_RESULT_INDEX_NOT_FOUND.increment(1) + INGEST_RESULT_INDEX_NOT_FOUND.increment(1) } IngestFailureReason::SourceNotFound => { - metrics::INGEST_RESULT_SOURCE_NOT_FOUND.increment(1) + INGEST_RESULT_SOURCE_NOT_FOUND.increment(1) } - IngestFailureReason::Internal => metrics::INGEST_RESULT_INTERNAL.increment(1), + IngestFailureReason::Internal => INGEST_RESULT_INTERNAL.increment(1), IngestFailureReason::NoShardsAvailable => { - metrics::INGEST_RESULT_NO_SHARDS_AVAILABLE.increment(1) + INGEST_RESULT_NO_SHARDS_AVAILABLE.increment(1) } IngestFailureReason::ShardRateLimited => { - metrics::INGEST_RESULT_SHARD_RATE_LIMITED.increment(1) + INGEST_RESULT_SHARD_RATE_LIMITED.increment(1) } - IngestFailureReason::WalFull => metrics::INGEST_RESULT_WAL_FULL.increment(1), - IngestFailureReason::Timeout => metrics::INGEST_RESULT_TIMEOUT.increment(1), + IngestFailureReason::WalFull => INGEST_RESULT_WAL_FULL.increment(1), + IngestFailureReason::Timeout => INGEST_RESULT_TIMEOUT.increment(1), IngestFailureReason::RouterLoadShedding => { - metrics::INGEST_RESULT_ROUTER_LOAD_SHEDDING.increment(1) + INGEST_RESULT_ROUTER_LOAD_SHEDDING.increment(1) } IngestFailureReason::LoadShedding => { - metrics::INGEST_RESULT_LOAD_SHEDDING.increment(1) + INGEST_RESULT_LOAD_SHEDDING.increment(1) } } } @@ -531,35 +538,35 @@ fn update_ingest_metrics(ingest_result: &IngestV2Result, num_s Err(ingest_error) => match ingest_error { IngestV2Error::TooManyRequests(rate_limiting_cause) => match rate_limiting_cause { RateLimitingCause::RouterLoadShedding => { - metrics::INGEST_RESULT_ROUTER_LOAD_SHEDDING.increment(num_subrequests); + INGEST_RESULT_ROUTER_LOAD_SHEDDING.increment(num_subrequests); } RateLimitingCause::LoadShedding => { - metrics::INGEST_RESULT_LOAD_SHEDDING.increment(num_subrequests) + INGEST_RESULT_LOAD_SHEDDING.increment(num_subrequests) } RateLimitingCause::WalFull => { - metrics::INGEST_RESULT_WAL_FULL.increment(num_subrequests); + INGEST_RESULT_WAL_FULL.increment(num_subrequests); } RateLimitingCause::CircuitBreaker => { - metrics::INGEST_RESULT_CIRCUIT_BREAKER.increment(num_subrequests); + INGEST_RESULT_CIRCUIT_BREAKER.increment(num_subrequests); } RateLimitingCause::ShardRateLimiting => { - metrics::INGEST_RESULT_SHARD_RATE_LIMITED.increment(num_subrequests); + INGEST_RESULT_SHARD_RATE_LIMITED.increment(num_subrequests); } RateLimitingCause::Unknown => { - metrics::INGEST_RESULT_UNSPECIFIED.increment(num_subrequests); + INGEST_RESULT_UNSPECIFIED.increment(num_subrequests); } }, IngestV2Error::Timeout(_) => { - metrics::INGEST_RESULT_ROUTER_TIMEOUT.increment(num_subrequests); + INGEST_RESULT_ROUTER_TIMEOUT.increment(num_subrequests); } IngestV2Error::ShardNotFound { .. } => { - metrics::INGEST_RESULT_SHARD_NOT_FOUND.increment(num_subrequests); + INGEST_RESULT_SHARD_NOT_FOUND.increment(num_subrequests); } IngestV2Error::Unavailable(_) => { - metrics::INGEST_RESULT_UNAVAILABLE.increment(num_subrequests); + INGEST_RESULT_UNAVAILABLE.increment(num_subrequests); } IngestV2Error::Internal(_) => { - metrics::INGEST_RESULT_INTERNAL.increment(num_subrequests); + INGEST_RESULT_INTERNAL.increment(num_subrequests); } }, } diff --git a/quickwit/quickwit-ingest/src/lib.rs b/quickwit/quickwit-ingest/src/lib.rs index e437fdb2704..4d0838f9ac0 100644 --- a/quickwit/quickwit-ingest/src/lib.rs +++ b/quickwit/quickwit-ingest/src/lib.rs @@ -109,10 +109,10 @@ pub async fn start_ingest_api_service( macro_rules! with_lock_metrics { ($future:expr, $operation:expr, $kind:expr) => { { + let labels = quickwit_metrics::labels!("operation" => $operation, "type" => $kind); quickwit_metrics::gauge!( parent: $crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT, - "operation" => $operation, - "type" => $kind, + labels: labels, ) .increment(1.0); @@ -128,14 +128,12 @@ macro_rules! with_lock_metrics { } quickwit_metrics::gauge!( parent: $crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT, - "operation" => $operation, - "type" => $kind, + labels: labels, ) .decrement(1.0); quickwit_metrics::histogram!( parent: $crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUEST_DURATION_SECS, - "operation" => $operation, - "type" => $kind, + labels: labels, ) .record(elapsed.as_secs_f64()); diff --git a/quickwit/quickwit-ingest/src/metrics.rs b/quickwit/quickwit-ingest/src/metrics.rs index fbb63e47df2..c5e211fac62 100644 --- a/quickwit/quickwit-ingest/src/metrics.rs +++ b/quickwit/quickwit-ingest/src/metrics.rs @@ -14,7 +14,9 @@ use std::sync::LazyLock; -use quickwit_metrics::{Counter, Gauge, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, LabelNames, counter, gauge}; + +pub(crate) const VALIDITY: LabelNames<1> = LabelNames::new(["validity"]); pub(crate) static DOCS_BYTES_TOTAL: LazyLock = LazyLock::new(|| { counter!( From fc6e896249c4a0a5c37df5d72ef6030b7661fa84 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 12:49:38 +0200 Subject: [PATCH 37/54] Add label_names! and label_values!(names:) macros; make constructors private - Add label_names! macro to replace LabelNames::new([...]) - Make LabelNames::new private (__new, doc(hidden)) - Change label_values! syntax from (NAMES, [v1, v2]) to (names: NAMES, v1, v2) - Update all call sites across the workspace - Update docs and examples Co-authored-by: Cursor --- quickwit/quickwit-common/src/io.rs | 13 +++----- .../quickwit-control-plane/src/metrics.rs | 4 +-- .../src/model/shard_table.rs | 4 +-- .../src/actors/doc_processor.rs | 7 ++-- .../src/actors/indexing_pipeline.rs | 10 +++--- .../src/actors/merge_pipeline.rs | 6 ++-- .../metrics_pipeline/parquet_uploader.rs | 2 +- .../processed_parquet_batch.rs | 7 ++-- .../quickwit-indexing/src/actors/uploader.rs | 6 ++-- quickwit/quickwit-indexing/src/metrics.rs | 6 ++-- .../src/models/processed_doc.rs | 7 ++-- .../src/models/raw_doc_batch.rs | 12 ++----- .../src/source/ingest/mod.rs | 28 +++++----------- quickwit/quickwit-indexing/src/source/mod.rs | 10 +++--- .../quickwit-ingest/src/ingest_api_service.rs | 2 +- .../quickwit-ingest/src/ingest_v2/fetch.rs | 6 ++-- .../quickwit-ingest/src/ingest_v2/ingester.rs | 23 ++++++------- .../quickwit-ingest/src/ingest_v2/metrics.rs | 9 ++--- .../src/ingest_v2/replication.rs | 6 ++-- .../quickwit-ingest/src/ingest_v2/router.rs | 19 ++++------- quickwit/quickwit-ingest/src/metrics.rs | 4 +-- quickwit/quickwit-jaeger/src/lib.rs | 20 +++++------ quickwit/quickwit-jaeger/src/metrics.rs | 6 ++-- quickwit/quickwit-jaeger/src/v1.rs | 4 +-- quickwit/quickwit-jaeger/src/v2.rs | 12 +++---- .../benches/quickwit_metrics.rs | 20 +++++------ .../quickwit-metrics/examples/http_service.rs | 6 ++-- quickwit/quickwit-metrics/src/labels.rs | 33 ++++++++++++++----- quickwit/quickwit-metrics/src/lib.rs | 6 ++-- .../quickwit-opentelemetry/src/otlp/logs.rs | 10 +++--- .../src/otlp/metrics.rs | 6 ++-- .../src/otlp/otel_metrics.rs | 11 ++++--- .../quickwit-opentelemetry/src/otlp/traces.rs | 10 +++--- quickwit/quickwit-search/src/metrics.rs | 6 ++-- .../quickwit-search/src/metrics_trackers.rs | 4 +-- quickwit/quickwit-serve/src/decompression.rs | 5 +-- 36 files changed, 160 insertions(+), 190 deletions(-) diff --git a/quickwit/quickwit-common/src/io.rs b/quickwit/quickwit-common/src/io.rs index adbdd57317d..76e5e4271b9 100644 --- a/quickwit/quickwit-common/src/io.rs +++ b/quickwit/quickwit-common/src/io.rs @@ -272,22 +272,18 @@ pub trait IoControlsAccess: Sized { } fn apply(&self, f: F) -> R - where - F: Fn(&IoControls) -> R; + where F: Fn(&IoControls) -> R; } impl IoControlsAccess for IoControls { fn apply(&self, f: F) -> R - where - F: Fn(&IoControls) -> R, - { + where F: Fn(&IoControls) -> R { f(self) } } impl ControlledWrite -where - A: IoControlsAccess, +where A: IoControlsAccess { pub fn underlying_wrt(&mut self) -> &mut W { &mut self.underlying_wrt @@ -300,8 +296,7 @@ where } impl io::Write for ControlledWrite -where - A: IoControlsAccess, +where A: IoControlsAccess { fn write(&mut self, buf: &[u8]) -> io::Result { let buf = truncate_bytes(buf); diff --git a/quickwit/quickwit-control-plane/src/metrics.rs b/quickwit/quickwit-control-plane/src/metrics.rs index cde944f2e52..fb2eb4153f4 100644 --- a/quickwit/quickwit-control-plane/src/metrics.rs +++ b/quickwit/quickwit-control-plane/src/metrics.rs @@ -14,7 +14,7 @@ use std::sync::LazyLock; -use quickwit_metrics::{Counter, Gauge, LabelNames, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, LabelNames, counter, gauge, label_names}; #[derive(Debug, Clone, Copy)] pub struct ShardLocalityMetrics { @@ -51,7 +51,7 @@ pub(crate) static OPEN_SHARDS: LazyLock = pub(crate) static CLOSED_SHARDS: LazyLock = LazyLock::new(|| gauge!(parent: SHARDS, "state" => "closed")); -pub(crate) const INDEX_ID_LABELS: LabelNames<1> = LabelNames::new(["index_id"]); +pub(crate) const INDEX_ID_LABELS: LabelNames<1> = label_names!("index_id"); static INDEXED_SHARDS: LazyLock = LazyLock::new(|| { gauge!( diff --git a/quickwit/quickwit-control-plane/src/model/shard_table.rs b/quickwit/quickwit-control-plane/src/model/shard_table.rs index 300344d012b..046a679c9b4 100644 --- a/quickwit/quickwit-control-plane/src/model/shard_table.rs +++ b/quickwit/quickwit-control-plane/src/model/shard_table.rs @@ -464,7 +464,7 @@ impl ShardTable { // can update the metrics for this specific index. if index_label == index_id { let shard_stats = table_entry.shards_stats(); - let labels = label_values!(INDEX_ID_LABELS, [index_label.to_string()]); + let labels = label_values!(names: INDEX_ID_LABELS, index_label.to_string()); gauge!( parent: OPEN_SHARDS, labels: labels, @@ -488,7 +488,7 @@ impl ShardTable { num_closed_shards += 1; } } - let labels = label_values!(INDEX_ID_LABELS, [index_label.to_string()]); + let labels = label_values!(names: INDEX_ID_LABELS, index_label.to_string()); gauge!( parent: OPEN_SHARDS, labels: labels, diff --git a/quickwit/quickwit-indexing/src/actors/doc_processor.rs b/quickwit/quickwit-indexing/src/actors/doc_processor.rs index f9e4b555b54..4abefc441d5 100644 --- a/quickwit/quickwit-indexing/src/actors/doc_processor.rs +++ b/quickwit/quickwit-indexing/src/actors/doc_processor.rs @@ -239,8 +239,7 @@ impl Iterator for JsonDocIterator { } impl From> for JsonDocIterator -where - E: Into, +where E: Into { fn from(result: Result) -> Self { match result { @@ -277,9 +276,7 @@ pub struct DocProcessorCounter { impl Serialize for DocProcessorCounter { fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { + where S: serde::Serializer { serializer.serialize_u64(self.get_num_docs()) } } diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index 67572bc48dc..eae5218cf8c 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -315,7 +315,7 @@ impl IndexingPipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(ACTOR_NAME, ["publisher"]), + labels: label_values!(names: ACTOR_NAME, "publisher"), )) .spawn(publisher); @@ -324,7 +324,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(ACTOR_NAME, ["sequencer"]), + labels: label_values!(names: ACTOR_NAME, "sequencer"), )) .set_kill_switch(self.kill_switch.clone()) .spawn(sequencer); @@ -344,7 +344,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(ACTOR_NAME, ["uploader"]), + labels: label_values!(names: ACTOR_NAME, "uploader"), )) .set_kill_switch(self.kill_switch.clone()) .spawn(uploader); @@ -378,7 +378,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(ACTOR_NAME, ["indexer"]), + labels: label_values!(names: ACTOR_NAME, "indexer"), )) .set_kill_switch(self.kill_switch.clone()) .spawn(indexer); @@ -395,7 +395,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(ACTOR_NAME, ["doc_processor"]), + labels: label_values!(names: ACTOR_NAME, "doc_processor"), )) .set_kill_switch(self.kill_switch.clone()) .spawn(doc_processor); diff --git a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs index f2889b3bc19..2d150c7893d 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs @@ -276,7 +276,7 @@ impl MergePipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(ACTOR_NAME, ["merge_publisher"]), + labels: label_values!(names: ACTOR_NAME, "merge_publisher"), )) .spawn(merge_publisher); @@ -325,7 +325,7 @@ impl MergePipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(ACTOR_NAME, ["merge_executor"]), + labels: label_values!(names: ACTOR_NAME, "merge_executor"), )) .spawn(merge_executor); @@ -340,7 +340,7 @@ impl MergePipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(ACTOR_NAME, ["merge_split_downloader"]), + labels: label_values!(names: ACTOR_NAME, "merge_split_downloader"), )) .spawn(merge_split_downloader); diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs index c06c5660815..3607a304241 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs @@ -125,7 +125,7 @@ impl ParquetUploader { .get_or_init(|| Semaphore::const_new(self.max_concurrent_uploads)); let gauge = gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: label_values!(COMPONENT, ["metrics"]), + labels: label_values!(names: COMPONENT, "metrics"), ); gauge.set(concurrent_upload_permits.available_permits() as f64); concurrent_upload_permits diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs index f0493e2c3a0..64f23e08b7e 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/processed_parquet_batch.rs @@ -20,8 +20,8 @@ use std::fmt; use arrow::record_batch::RecordBatch; -use quickwit_metastore::checkpoint::SourceCheckpointDelta; use quickwit_common::metrics::IN_FLIGHT_INDEXER_MAILBOX; +use quickwit_metastore::checkpoint::SourceCheckpointDelta; use quickwit_metrics::GaugeGuard; /// Batch of parquet data as Arrow RecordBatch for the parquet indexing pipeline. @@ -66,10 +66,7 @@ impl ProcessedParquetBatch { .map(|col| col.get_array_memory_size() as i64) .sum(); - let gauge_guard = GaugeGuard::new( - &IN_FLIGHT_INDEXER_MAILBOX, - memory_size as f64, - ); + let gauge_guard = GaugeGuard::new(&IN_FLIGHT_INDEXER_MAILBOX, memory_size as f64); Self { batches, diff --git a/quickwit/quickwit-indexing/src/actors/uploader.rs b/quickwit/quickwit-indexing/src/actors/uploader.rs index b9fb880da7d..d56f6f8a1b3 100644 --- a/quickwit/quickwit-indexing/src/actors/uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/uploader.rs @@ -206,21 +206,21 @@ impl Uploader { &CONCURRENT_UPLOAD_PERMITS_INDEX, gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: label_values!(COMPONENT, ["indexer"]), + labels: label_values!(names: COMPONENT, "indexer"), ), ), UploaderType::MergeUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: label_values!(COMPONENT, ["merger"]), + labels: label_values!(names: COMPONENT, "merger"), ), ), UploaderType::DeleteUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: label_values!(COMPONENT, ["merger"]), + labels: label_values!(names: COMPONENT, "merger"), ), ), }; diff --git a/quickwit/quickwit-indexing/src/metrics.rs b/quickwit/quickwit-indexing/src/metrics.rs index 08c3c0f3911..2d936a00cb9 100644 --- a/quickwit/quickwit-indexing/src/metrics.rs +++ b/quickwit/quickwit-indexing/src/metrics.rs @@ -14,10 +14,10 @@ use std::sync::LazyLock; -use quickwit_metrics::{Counter, Gauge, LabelNames, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, LabelNames, counter, gauge, label_names}; -pub(crate) const ACTOR_NAME: LabelNames<1> = LabelNames::new(["actor_name"]); -pub(crate) const COMPONENT: LabelNames<1> = LabelNames::new(["component"]); +pub(crate) const ACTOR_NAME: LabelNames<1> = label_names!("actor_name"); +pub(crate) const COMPONENT: LabelNames<1> = label_names!("component"); pub(crate) static PROCESSED_DOCS_TOTAL: LazyLock = LazyLock::new(|| { counter!( diff --git a/quickwit/quickwit-indexing/src/models/processed_doc.rs b/quickwit/quickwit-indexing/src/models/processed_doc.rs index f7906a9c4ff..eb9c7179342 100644 --- a/quickwit/quickwit-indexing/src/models/processed_doc.rs +++ b/quickwit/quickwit-indexing/src/models/processed_doc.rs @@ -14,8 +14,8 @@ use std::fmt; -use quickwit_metastore::checkpoint::SourceCheckpointDelta; use quickwit_common::metrics::IN_FLIGHT_INDEXER_MAILBOX; +use quickwit_metastore::checkpoint::SourceCheckpointDelta; use quickwit_metrics::GaugeGuard; use tantivy::{DateTime, TantivyDocument}; @@ -52,10 +52,7 @@ impl ProcessedDocBatch { force_commit: bool, ) -> Self { let delta = docs.iter().map(|doc| doc.num_bytes as i64).sum::(); - let gauge_guard = GaugeGuard::new( - &IN_FLIGHT_INDEXER_MAILBOX, - delta as f64, - ); + let gauge_guard = GaugeGuard::new(&IN_FLIGHT_INDEXER_MAILBOX, delta as f64); Self { docs, checkpoint_delta, diff --git a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs index 8a272b3d414..f84226bfaa6 100644 --- a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs +++ b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs @@ -15,8 +15,8 @@ use std::fmt; use bytes::Bytes; -use quickwit_metastore::checkpoint::SourceCheckpointDelta; use quickwit_common::metrics::IN_FLIGHT_DOC_PROCESSOR_MAILBOX; +use quickwit_metastore::checkpoint::SourceCheckpointDelta; use quickwit_metrics::GaugeGuard; pub struct RawDocBatch { @@ -35,10 +35,7 @@ impl RawDocBatch { force_commit: bool, ) -> Self { let delta = docs.iter().map(|doc| doc.len() as i64).sum::(); - let gauge_guard = GaugeGuard::new( - &IN_FLIGHT_DOC_PROCESSOR_MAILBOX, - delta as f64, - ); + let gauge_guard = GaugeGuard::new(&IN_FLIGHT_DOC_PROCESSOR_MAILBOX, delta as f64); Self { docs, @@ -69,10 +66,7 @@ impl fmt::Debug for RawDocBatch { impl Default for RawDocBatch { fn default() -> Self { - let _gauge_guard = GaugeGuard::new( - &IN_FLIGHT_DOC_PROCESSOR_MAILBOX, - 0.0, - ); + let _gauge_guard = GaugeGuard::new(&IN_FLIGHT_DOC_PROCESSOR_MAILBOX, 0.0); Self { docs: Vec::new(), checkpoint_delta: SourceCheckpointDelta::default(), diff --git a/quickwit/quickwit-indexing/src/source/ingest/mod.rs b/quickwit/quickwit-indexing/src/source/ingest/mod.rs index 783f0ef28c8..c815d2bc1e7 100644 --- a/quickwit/quickwit-indexing/src/source/ingest/mod.rs +++ b/quickwit/quickwit-indexing/src/source/ingest/mod.rs @@ -1434,11 +1434,8 @@ mod tests { }; let batch_size = fetch_payload.estimate_size(); let fetch_message = FetchMessage::new_payload(fetch_payload); - let in_flight_value = InFlightValue::new( - fetch_message, - batch_size, - &IN_FLIGHT_FETCH_STREAM, - ); + let in_flight_value = + InFlightValue::new(fetch_message, batch_size, &IN_FLIGHT_FETCH_STREAM); fetch_message_tx.send(Ok(in_flight_value)).await.unwrap(); let fetch_payload = FetchPayload { @@ -1451,11 +1448,8 @@ mod tests { }; let batch_size = fetch_payload.estimate_size(); let fetch_message = FetchMessage::new_payload(fetch_payload); - let in_flight_value = InFlightValue::new( - fetch_message, - batch_size, - &IN_FLIGHT_FETCH_STREAM, - ); + let in_flight_value = + InFlightValue::new(fetch_message, batch_size, &IN_FLIGHT_FETCH_STREAM); fetch_message_tx.send(Ok(in_flight_value)).await.unwrap(); let fetch_eof = FetchEof { @@ -1465,11 +1459,8 @@ mod tests { eof_position: Some(Position::eof(23u64)), }; let fetch_message = FetchMessage::new_eof(fetch_eof); - let in_flight_value = InFlightValue::new( - fetch_message, - ByteSize(0), - &IN_FLIGHT_FETCH_STREAM, - ); + let in_flight_value = + InFlightValue::new(fetch_message, ByteSize(0), &IN_FLIGHT_FETCH_STREAM); fetch_message_tx.send(Ok(in_flight_value)).await.unwrap(); source.emit_batches(&source_sink, &ctx).await.unwrap(); @@ -1526,11 +1517,8 @@ mod tests { }; let batch_size = fetch_payload.estimate_size(); let fetch_message = FetchMessage::new_payload(fetch_payload); - let in_flight_value = InFlightValue::new( - fetch_message, - batch_size, - &IN_FLIGHT_FETCH_STREAM, - ); + let in_flight_value = + InFlightValue::new(fetch_message, batch_size, &IN_FLIGHT_FETCH_STREAM); fetch_message_tx.send(Ok(in_flight_value)).await.unwrap(); source.emit_batches(&source_sink, &ctx).await.unwrap(); diff --git a/quickwit/quickwit-indexing/src/source/mod.rs b/quickwit/quickwit-indexing/src/source/mod.rs index e2d5e1910fa..a936eacf8ca 100644 --- a/quickwit/quickwit-indexing/src/source/mod.rs +++ b/quickwit/quickwit-indexing/src/source/mod.rs @@ -92,6 +92,11 @@ pub use pulsar_source::{PulsarSource, PulsarSourceFactory}; #[cfg(feature = "sqs")] pub use queue_sources::sqs_queue; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler}; +use quickwit_common::metrics::{ + IN_FLIGHT_FILE_SOURCE, IN_FLIGHT_INGEST_SOURCE, IN_FLIGHT_KAFKA_SOURCE, + IN_FLIGHT_KINESIS_SOURCE, IN_FLIGHT_OTHER_SOURCE, IN_FLIGHT_PUBSUB_SOURCE, + IN_FLIGHT_PULSAR_SOURCE, +}; use quickwit_common::pubsub::EventBroker; use quickwit_common::runtimes::RuntimeType; use quickwit_config::{ @@ -100,11 +105,6 @@ use quickwit_config::{ use quickwit_ingest::IngesterPool; use quickwit_metastore::IndexMetadataResponseExt; use quickwit_metastore::checkpoint::{SourceCheckpoint, SourceCheckpointDelta}; -use quickwit_common::metrics::{ - IN_FLIGHT_FILE_SOURCE, IN_FLIGHT_INGEST_SOURCE, IN_FLIGHT_KAFKA_SOURCE, - IN_FLIGHT_KINESIS_SOURCE, IN_FLIGHT_OTHER_SOURCE, IN_FLIGHT_PUBSUB_SOURCE, - IN_FLIGHT_PULSAR_SOURCE, -}; use quickwit_metrics::GaugeGuard; use quickwit_proto::indexing::IndexingPipelineId; use quickwit_proto::metastore::{ diff --git a/quickwit/quickwit-ingest/src/ingest_api_service.rs b/quickwit/quickwit-ingest/src/ingest_api_service.rs index fef3119f1bd..1c1eceaaf26 100644 --- a/quickwit/quickwit-ingest/src/ingest_api_service.rs +++ b/quickwit/quickwit-ingest/src/ingest_api_service.rs @@ -202,7 +202,7 @@ impl IngestApiService { } num_docs += batch_num_docs; - let labels = label_values!(VALIDITY, ["valid"]); + let labels = label_values!(names: VALIDITY, "valid"); counter!( parent: DOCS_BYTES_TOTAL, labels: labels, diff --git a/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs b/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs index 3eb35698fb7..0c1178678b9 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/fetch.rs @@ -81,10 +81,8 @@ impl FetchStreamTask { .as_u64() .map(|offset| offset + 1) .unwrap_or_default(); - let (fetch_message_tx, fetch_stream) = ServiceStream::new_bounded_with_gauge( - 3, - &IN_FLIGHT_FETCH_STREAM, - ); + let (fetch_message_tx, fetch_stream) = + ServiceStream::new_bounded_with_gauge(3, &IN_FLIGHT_FETCH_STREAM); let mut fetch_task = Self { shard_id: open_fetch_stream_request.shard_id().clone(), queue_id: open_fetch_stream_request.queue_id(), diff --git a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs index 12e7c6b3a4f..f2bf2dc169c 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs @@ -335,7 +335,7 @@ impl Ingester { ); counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, - labels: label_values!(STATUS, ["success"]), + labels: label_values!(names: STATUS, "success"), ) .increment(1); @@ -347,7 +347,7 @@ impl Ingester { counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, - labels: label_values!(STATUS, ["error"]), + labels: label_values!(names: STATUS, "error"), ) .increment(1); } @@ -356,7 +356,7 @@ impl Ingester { counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, - labels: label_values!(STATUS, ["timeout"]), + labels: label_values!(names: STATUS, "timeout"), ) .increment(1); } @@ -573,12 +573,12 @@ impl Ingester { if valid_doc_batch.is_empty() { counter!( parent: DOCS_TOTAL, - labels: label_values!(VALIDITY, ["invalid"]), + labels: label_values!(names: VALIDITY, "invalid"), ) .increment(parse_failures.len() as u64); counter!( parent: DOCS_BYTES_TOTAL, - labels: label_values!(VALIDITY, ["invalid"]), + labels: label_values!(names: VALIDITY, "invalid"), ) .increment(original_batch_num_bytes); let persist_success = PersistSuccess { @@ -596,23 +596,23 @@ impl Ingester { counter!( parent: DOCS_TOTAL, - labels: label_values!(VALIDITY, ["valid"]), + labels: label_values!(names: VALIDITY, "valid"), ) .increment(valid_doc_batch.num_docs() as u64); counter!( parent: DOCS_BYTES_TOTAL, - labels: label_values!(VALIDITY, ["valid"]), + labels: label_values!(names: VALIDITY, "valid"), ) .increment(valid_doc_batch.num_bytes() as u64); if !parse_failures.is_empty() { counter!( parent: DOCS_TOTAL, - labels: label_values!(VALIDITY, ["invalid"]), + labels: label_values!(names: VALIDITY, "invalid"), ) .increment(parse_failures.len() as u64); counter!( parent: DOCS_BYTES_TOTAL, - labels: label_values!(VALIDITY, ["invalid"]), + labels: label_values!(names: VALIDITY, "invalid"), ) .increment(original_batch_num_bytes - valid_doc_batch.num_bytes() as u64); } @@ -1127,10 +1127,7 @@ impl IngesterService for Ingester { _ => None, }) .sum::(); - let _gauge_guard = GaugeGuard::new( - &IN_FLIGHT_INGESTER_PERSIST, - request_size_bytes as f64, - ); + let _gauge_guard = GaugeGuard::new(&IN_FLIGHT_INGESTER_PERSIST, request_size_bytes as f64); self.persist_inner(persist_request).await } diff --git a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs index 3cf40467e98..57e4e62cf00 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs @@ -15,11 +15,12 @@ use std::sync::LazyLock; use mrecordlog::ResourceUsage; -use quickwit_common::metrics::{exponential_buckets, linear_buckets}; -use quickwit_common::metrics::IN_FLIGHT_WAL; -use quickwit_metrics::{Counter, Gauge, Histogram, LabelNames, counter, gauge, histogram}; +use quickwit_common::metrics::{IN_FLIGHT_WAL, exponential_buckets, linear_buckets}; +use quickwit_metrics::{ + Counter, Gauge, Histogram, LabelNames, counter, gauge, histogram, label_names, +}; -pub(super) const STATUS: LabelNames<1> = LabelNames::new(["status"]); +pub(super) const STATUS: LabelNames<1> = label_names!("status"); static INGEST_RESULT_TOTAL: LazyLock = LazyLock::new(|| { counter!( diff --git a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs index e7b5ca5fdca..12050c66655 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs @@ -505,10 +505,8 @@ impl ReplicationTask { ))); } let request_size_bytes = replicate_request.num_bytes(); - let _gauge_guard = GaugeGuard::new( - &IN_FLIGHT_INGESTER_REPLICATE, - request_size_bytes as f64, - ); + let _gauge_guard = + GaugeGuard::new(&IN_FLIGHT_INGESTER_REPLICATE, request_size_bytes as f64); self.current_replication_seqno += 1; diff --git a/quickwit/quickwit-ingest/src/ingest_v2/router.rs b/quickwit/quickwit-ingest/src/ingest_v2/router.rs index 69273ae27d7..f18d441af6c 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/router.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/router.rs @@ -54,9 +54,9 @@ use crate::ingest_v2::metrics::{ INGEST_ATTEMPTS, INGEST_RESULT_CIRCUIT_BREAKER, INGEST_RESULT_INDEX_NOT_FOUND, INGEST_RESULT_INTERNAL, INGEST_RESULT_LOAD_SHEDDING, INGEST_RESULT_NO_SHARDS_AVAILABLE, INGEST_RESULT_ROUTER_LOAD_SHEDDING, INGEST_RESULT_ROUTER_TIMEOUT, - INGEST_RESULT_SHARD_NOT_FOUND, INGEST_RESULT_SHARD_RATE_LIMITED, INGEST_RESULT_SOURCE_NOT_FOUND, - INGEST_RESULT_SUCCESS, INGEST_RESULT_TIMEOUT, INGEST_RESULT_UNAVAILABLE, - INGEST_RESULT_UNSPECIFIED, INGEST_RESULT_WAL_FULL, + INGEST_RESULT_SHARD_NOT_FOUND, INGEST_RESULT_SHARD_RATE_LIMITED, + INGEST_RESULT_SOURCE_NOT_FOUND, INGEST_RESULT_SUCCESS, INGEST_RESULT_TIMEOUT, + INGEST_RESULT_UNAVAILABLE, INGEST_RESULT_UNSPECIFIED, INGEST_RESULT_WAL_FULL, }; /// Duration after which ingest requests time out with [`IngestV2Error::Timeout`]. @@ -508,9 +508,7 @@ fn update_ingest_metrics(ingest_result: &IngestV2Result, num_s IngestFailureReason::CircuitBreaker => { INGEST_RESULT_CIRCUIT_BREAKER.increment(1); } - IngestFailureReason::Unspecified => { - INGEST_RESULT_UNSPECIFIED.increment(1) - } + IngestFailureReason::Unspecified => INGEST_RESULT_UNSPECIFIED.increment(1), IngestFailureReason::IndexNotFound => { INGEST_RESULT_INDEX_NOT_FOUND.increment(1) } @@ -529,9 +527,7 @@ fn update_ingest_metrics(ingest_result: &IngestV2Result, num_s IngestFailureReason::RouterLoadShedding => { INGEST_RESULT_ROUTER_LOAD_SHEDDING.increment(1) } - IngestFailureReason::LoadShedding => { - INGEST_RESULT_LOAD_SHEDDING.increment(1) - } + IngestFailureReason::LoadShedding => INGEST_RESULT_LOAD_SHEDDING.increment(1), } } } @@ -577,10 +573,7 @@ impl IngestRouterService for IngestRouter { async fn ingest(&self, ingest_request: IngestRequestV2) -> IngestV2Result { let request_size_bytes = ingest_request.num_bytes(); - let _gauge_guard = GaugeGuard::new( - &IN_FLIGHT_INGEST_ROUTER, - request_size_bytes as f64, - ); + let _gauge_guard = GaugeGuard::new(&IN_FLIGHT_INGEST_ROUTER, request_size_bytes as f64); let num_subrequests = ingest_request.subrequests.len(); let _permit = self diff --git a/quickwit/quickwit-ingest/src/metrics.rs b/quickwit/quickwit-ingest/src/metrics.rs index c5e211fac62..6c6946437ab 100644 --- a/quickwit/quickwit-ingest/src/metrics.rs +++ b/quickwit/quickwit-ingest/src/metrics.rs @@ -14,9 +14,9 @@ use std::sync::LazyLock; -use quickwit_metrics::{Counter, Gauge, LabelNames, counter, gauge}; +use quickwit_metrics::{Counter, Gauge, LabelNames, counter, gauge, label_names}; -pub(crate) const VALIDITY: LabelNames<1> = LabelNames::new(["validity"]); +pub(crate) const VALIDITY: LabelNames<1> = label_names!("validity"); pub(crate) static DOCS_BYTES_TOTAL: LazyLock = LazyLock::new(|| { counter!( diff --git a/quickwit/quickwit-jaeger/src/lib.rs b/quickwit/quickwit-jaeger/src/lib.rs index 9e781130210..f501b1eb282 100644 --- a/quickwit/quickwit-jaeger/src/lib.rs +++ b/quickwit/quickwit-jaeger/src/lib.rs @@ -423,8 +423,8 @@ impl JaegerService { counter!( parent: FETCHED_TRACES_TOTAL, labels: label_values!( - OPERATION_INDEX_LABELS, - [operation_name, OTEL_TRACES_INDEX_ID] + names: OPERATION_INDEX_LABELS, + operation_name, OTEL_TRACES_INDEX_ID ), ) .increment(num_traces); @@ -433,8 +433,8 @@ impl JaegerService { histogram!( parent: REQUEST_DURATION_SECONDS, labels: label_values!( - OPERATION_INDEX_ERROR_LABELS, - [operation_name, OTEL_TRACES_INDEX_ID, "false"] + names: OPERATION_INDEX_ERROR_LABELS, + operation_name, OTEL_TRACES_INDEX_ID, "false" ), ) .record(elapsed); @@ -447,8 +447,8 @@ pub(crate) fn record_error(operation_name: &'static str, request_start: Instant) counter!( parent: REQUEST_ERRORS_TOTAL, labels: label_values!( - OPERATION_INDEX_LABELS, - [operation_name, OTEL_TRACES_INDEX_ID] + names: OPERATION_INDEX_LABELS, + operation_name, OTEL_TRACES_INDEX_ID ), ) .increment(1); @@ -457,8 +457,8 @@ pub(crate) fn record_error(operation_name: &'static str, request_start: Instant) histogram!( parent: REQUEST_DURATION_SECONDS, labels: label_values!( - OPERATION_INDEX_ERROR_LABELS, - [operation_name, OTEL_TRACES_INDEX_ID, "true"] + names: OPERATION_INDEX_ERROR_LABELS, + operation_name, OTEL_TRACES_INDEX_ID, "true" ), ) .record(elapsed); @@ -466,8 +466,8 @@ pub(crate) fn record_error(operation_name: &'static str, request_start: Instant) pub(crate) fn record_send(operation_name: &'static str, num_spans: usize, num_bytes: usize) { let labels = label_values!( - OPERATION_INDEX_LABELS, - [operation_name, OTEL_TRACES_INDEX_ID] + names: OPERATION_INDEX_LABELS, + operation_name, OTEL_TRACES_INDEX_ID ); counter!(parent: FETCHED_SPANS_TOTAL, labels: labels).increment(num_spans as u64); counter!(parent: TRANSFERRED_BYTES_TOTAL, labels: labels).increment(num_bytes as u64); diff --git a/quickwit/quickwit-jaeger/src/metrics.rs b/quickwit/quickwit-jaeger/src/metrics.rs index 465a97c4c14..b2b91ccfdd3 100644 --- a/quickwit/quickwit-jaeger/src/metrics.rs +++ b/quickwit/quickwit-jaeger/src/metrics.rs @@ -15,11 +15,11 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; -use quickwit_metrics::{Counter, Histogram, LabelNames, counter, histogram}; +use quickwit_metrics::{Counter, Histogram, LabelNames, counter, histogram, label_names}; -pub(crate) const OPERATION_INDEX_LABELS: LabelNames<2> = LabelNames::new(["operation", "index"]); +pub(crate) const OPERATION_INDEX_LABELS: LabelNames<2> = label_names!("operation", "index"); pub(crate) const OPERATION_INDEX_ERROR_LABELS: LabelNames<3> = - LabelNames::new(["operation", "index", "error"]); + label_names!("operation", "index", "error"); pub(crate) static REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( diff --git a/quickwit/quickwit-jaeger/src/v1.rs b/quickwit/quickwit-jaeger/src/v1.rs index ab9f0379db1..aac38578725 100644 --- a/quickwit/quickwit-jaeger/src/v1.rs +++ b/quickwit/quickwit-jaeger/src/v1.rs @@ -39,7 +39,7 @@ macro_rules! metrics { let start = std::time::Instant::now(); let operation = stringify!($operation); let index = $index; - let labels = label_values!(OPERATION_INDEX_LABELS, [operation, index]); + let labels = label_values!(names: OPERATION_INDEX_LABELS, operation, index); counter!( parent: REQUESTS_TOTAL, labels: labels, @@ -61,7 +61,7 @@ macro_rules! metrics { let elapsed = start.elapsed().as_secs_f64(); histogram!( parent: REQUEST_DURATION_SECONDS, - labels: label_values!(OPERATION_INDEX_ERROR_LABELS, [operation, index, is_error]), + labels: label_values!(names: OPERATION_INDEX_ERROR_LABELS, operation, index, is_error), ) .record(elapsed); diff --git a/quickwit/quickwit-jaeger/src/v2.rs b/quickwit/quickwit-jaeger/src/v2.rs index bee978dbd30..54ed4b8b844 100644 --- a/quickwit/quickwit-jaeger/src/v2.rs +++ b/quickwit/quickwit-jaeger/src/v2.rs @@ -65,7 +65,7 @@ macro_rules! metrics { let start = std::time::Instant::now(); let operation = stringify!($operation); let index = $index; - let labels = label_values!(OPERATION_INDEX_LABELS, [operation, index]); + let labels = label_values!(names: OPERATION_INDEX_LABELS, operation, index); counter!( parent: REQUESTS_TOTAL, labels: labels, @@ -87,7 +87,7 @@ macro_rules! metrics { let elapsed = start.elapsed().as_secs_f64(); histogram!( parent: REQUEST_DURATION_SECONDS, - labels: label_values!(OPERATION_INDEX_ERROR_LABELS, [operation, index, is_error]), + labels: label_values!(names: OPERATION_INDEX_ERROR_LABELS, operation, index, is_error), ) .record(elapsed); @@ -446,8 +446,8 @@ async fn stream_otel_spans_impl( counter!( parent: FETCHED_TRACES_TOTAL, labels: label_values!( - OPERATION_INDEX_LABELS, - [operation_name, OTEL_TRACES_INDEX_ID] + names: OPERATION_INDEX_LABELS, + operation_name, OTEL_TRACES_INDEX_ID ), ) .increment(trace_ids.len() as u64); @@ -456,8 +456,8 @@ async fn stream_otel_spans_impl( histogram!( parent: REQUEST_DURATION_SECONDS, labels: label_values!( - OPERATION_INDEX_ERROR_LABELS, - [operation_name, OTEL_TRACES_INDEX_ID, "false",] + names: OPERATION_INDEX_ERROR_LABELS, + operation_name, OTEL_TRACES_INDEX_ID, "false" ), ) .record(elapsed); diff --git a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs index 1b7ea379b30..e6500078ed3 100644 --- a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs +++ b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs @@ -738,8 +738,8 @@ fn observable_gauge(c: &mut Criterion) { // LABELS // --------------------------------------------------------------------------- -const LABELS_1: LabelNames<1> = LabelNames::new(["method"]); -const LABELS_3: LabelNames<3> = LabelNames::new(["method", "endpoint", "status"]); +const LABELS_1: LabelNames<1> = label_names!("method"); +const LABELS_3: LabelNames<3> = label_names!("method", "endpoint", "status"); fn labels_counter(c: &mut Criterion) { install_recorder(); @@ -751,7 +751,7 @@ fn labels_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: label_values!(LABELS_1, ["GET"]) + labels: label_values!(names: LABELS_1, "GET") ) .increment(1); }); @@ -761,7 +761,7 @@ fn labels_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: label_values!(LABELS_3, ["GET", "/health", "200"]) + labels: label_values!(names: LABELS_3, "GET", "/health", "200") ) .increment(1); }); @@ -771,7 +771,7 @@ fn labels_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: label_values!(LABELS_1, ["GET".to_string()]) + labels: label_values!(names: LABELS_1, "GET".to_string()) ) .increment(1); }); @@ -787,7 +787,7 @@ fn labels_counter(c: &mut Criterion) { idx += 1; counter!( parent: PARENT_COUNTER, - labels: label_values!(LABELS_1, [m]) + labels: label_values!(names: LABELS_1, m) ) .increment(1); }); @@ -806,7 +806,7 @@ fn labels_gauge(c: &mut Criterion) { b.iter(|| { gauge!( parent: PARENT_GAUGE, - labels: label_values!(LABELS_1, ["GET"]) + labels: label_values!(names: LABELS_1, "GET") ) .set(42.0); }); @@ -816,7 +816,7 @@ fn labels_gauge(c: &mut Criterion) { b.iter(|| { gauge!( parent: PARENT_GAUGE, - labels: label_values!(LABELS_3, ["GET", "/health", "200"]) + labels: label_values!(names: LABELS_3, "GET", "/health", "200") ) .set(42.0); }); @@ -835,7 +835,7 @@ fn labels_histogram(c: &mut Criterion) { b.iter(|| { histogram!( parent: PARENT_HISTOGRAM, - labels: label_values!(LABELS_1, ["GET"]) + labels: label_values!(names: LABELS_1, "GET") ) .record(0.123); }); @@ -845,7 +845,7 @@ fn labels_histogram(c: &mut Criterion) { b.iter(|| { histogram!( parent: PARENT_HISTOGRAM, - labels: label_values!(LABELS_3, ["GET", "/health", "200"]) + labels: label_values!(names: LABELS_3, "GET", "/health", "200") ) .record(0.123); }); diff --git a/quickwit/quickwit-metrics/examples/http_service.rs b/quickwit/quickwit-metrics/examples/http_service.rs index 84fbe019b46..aeeecfdad83 100644 --- a/quickwit/quickwit-metrics/examples/http_service.rs +++ b/quickwit/quickwit-metrics/examples/http_service.rs @@ -83,17 +83,17 @@ static HTTP_ACTIVE_CONNECTIONS_BY_REGION: LazyLock = LazyLock::new(|| { // ─── LabelNames examples ─── -const ROUTE_LABELS: LabelNames<2> = LabelNames::new(["method", "path"]); +const ROUTE_LABELS: LabelNames<2> = label_names!("method", "path"); fn record_request(method: &'static str, path: &'static str, duration: f64, size: f64) { - let route = label_values!(ROUTE_LABELS, [method, path]); + let route = label_values!(names: ROUTE_LABELS, method, path); histogram!(parent: HTTP_REQUEST_DURATION, labels: route).record(duration); histogram!(parent: HTTP_RESPONSE_SIZE, labels: route).record(size); counter!(parent: HTTP_REQUESTS_TOTAL, labels: route).increment(1); } fn record_dynamic_request(method: String, path: String, duration: f64) { - let route = label_values!(ROUTE_LABELS, [method, path]); + let route = label_values!(names: ROUTE_LABELS, method, path); histogram!(parent: HTTP_REQUEST_DURATION, labels: route).record(duration); } diff --git a/quickwit/quickwit-metrics/src/labels.rs b/quickwit/quickwit-metrics/src/labels.rs index c6bbbf4d5d0..aaaf16c5c14 100644 --- a/quickwit/quickwit-metrics/src/labels.rs +++ b/quickwit/quickwit-metrics/src/labels.rs @@ -24,6 +24,20 @@ use metrics::SharedString; use crate::__key_hash; +/// Creates a const [`LabelNames`] from a list of label name literals. +/// +/// # Example +/// +/// ```rust,ignore +/// const ROUTE: LabelNames<2> = label_names!("method", "path"); +/// ``` +#[macro_export] +macro_rules! label_names { + ($($name:expr),+ $(,)?) => { + $crate::LabelNames::__new([$($name),+]) + }; +} + /// Pairs a [`LabelNames`] template with concrete values, one per label name. /// /// Each value is converted individually via `Into`, so you @@ -36,13 +50,13 @@ use crate::__key_hash; /// # Example /// /// ```rust,ignore -/// const GC_KEYS: LabelNames<2> = LabelNames::new(["status", "split_type"]); +/// const GC_KEYS: LabelNames<2> = label_names!("status", "split_type"); /// /// // All-static — zero allocation: -/// let lv = label_values!(GC_KEYS, ["success", "tantivy"]); +/// let lv = label_values!(names: GC_KEYS, "success", "tantivy"); /// /// // Mixed types — &'static str and String — just work: -/// let lv = label_values!(GC_KEYS, ["success", split_type.to_string()]); +/// let lv = label_values!(names: GC_KEYS, "success", split_type.to_string()); /// /// // Reuse the same Labels across multiple metrics: /// counter!(parent: GC_COUNTER, labels: lv).increment(1); @@ -50,7 +64,7 @@ use crate::__key_hash; /// ``` #[macro_export] macro_rules! label_values { - ($labels:expr, [$($val:expr),+ $(,)?]) => { + (names: $labels:expr, $($val:expr),+ $(,)?) => { $labels.__with_values([$(Into::<$crate::__metrics::SharedString>::into($val)),+]) }; } @@ -85,13 +99,13 @@ macro_rules! labels { /// # Example /// /// ```rust,ignore -/// const SPLIT_KEYS: LabelNames<2> = LabelNames::new(["source", "level"]); +/// const SPLIT_KEYS: LabelNames<2> = label_names!("source", "level"); /// /// // All the same type: -/// let lv = label_values!(SPLIT_KEYS, ["prod", "info"]); +/// let lv = label_values!(names: SPLIT_KEYS, "prod", "info"); /// /// // Mixed types: -/// let lv = label_values!(SPLIT_KEYS, [source_uid, level.to_string()]); +/// let lv = label_values!(names: SPLIT_KEYS, source_uid, level.to_string()); /// /// // Reuse the same Labels across metrics: /// let c = counter!(parent: BASE_COUNTER, labels: lv); @@ -102,8 +116,9 @@ pub struct LabelNames { } impl LabelNames { - /// Creates a label template from an array of label names. - pub const fn new(names: [&'static str; N]) -> Self { + /// Internal plumbing used by [`label_names!`]. Not part of the public API. + #[doc(hidden)] + pub const fn __new(names: [&'static str; N]) -> Self { Self { names } } diff --git a/quickwit/quickwit-metrics/src/lib.rs b/quickwit/quickwit-metrics/src/lib.rs index 0dd1875a3e6..02ef67614fd 100644 --- a/quickwit/quickwit-metrics/src/lib.rs +++ b/quickwit/quickwit-metrics/src/lib.rs @@ -88,17 +88,17 @@ //! ```rust,ignore //! use quickwit_metrics::*; //! -//! const ROUTE: LabelNames<2> = LabelNames::new(["method", "path"]); +//! const ROUTE: LabelNames<2> = label_names!("method", "path"); //! //! fn on_request(method: &'static str, path: &'static str, duration: f64) { -//! let route = label_values!(ROUTE, [method, path]); +//! let route = label_values!(names: ROUTE, method, path); //! histogram!(parent: REQUEST_DURATION, labels: route).record(duration); //! counter!(parent: HTTP_REQUESTS, labels: route).increment(1); //! } //! //! // Mixed types work too — Into is called per-element: //! fn on_dynamic_request(method: &'static str, path: String, duration: f64) { -//! let route = label_values!(ROUTE, [method, path]); +//! let route = label_values!(names: ROUTE, method, path); //! histogram!(parent: REQUEST_DURATION, labels: route).record(duration); //! } //! ``` diff --git a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs index 0b2fb5b50bc..e59d13e7ec0 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs @@ -243,7 +243,7 @@ impl OtlpGrpcLogsService { let num_bytes = doc_batch.num_bytes() as u64; self.store_logs(index_id.clone(), doc_batch).await?; - let labels = label_values!(OTLP_GRPC_LABELS, ["logs", index_id, "grpc", "protobuf"]); + let labels = label_values!(names: OTLP_GRPC_LABELS, "logs", index_id, "grpc", "protobuf"); counter!( parent: INGESTED_LOG_RECORDS_TOTAL, labels: labels, @@ -321,8 +321,8 @@ impl OtlpGrpcLogsService { let start = std::time::Instant::now(); let labels = label_values!( - OTLP_GRPC_LABELS, - ["logs", index_id.clone(), "grpc", "protobuf",] + names: OTLP_GRPC_LABELS, + "logs", index_id.clone(), "grpc", "protobuf" ); counter!( parent: REQUESTS_TOTAL, @@ -344,8 +344,8 @@ impl OtlpGrpcLogsService { histogram!( parent: REQUEST_DURATION_SECONDS, labels: label_values!( - OTLP_GRPC_ERROR_LABELS, - ["logs", index_id, "grpc", "protobuf", is_error,] + names: OTLP_GRPC_ERROR_LABELS, + "logs", index_id, "grpc", "protobuf", is_error ), ) .record(elapsed); diff --git a/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs index b13fdee8630..7fcad60a9f5 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs @@ -15,12 +15,12 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; -use quickwit_metrics::{Counter, Histogram, LabelNames, counter, histogram}; +use quickwit_metrics::{Counter, Histogram, LabelNames, counter, histogram, label_names}; pub(crate) const OTLP_GRPC_LABELS: LabelNames<4> = - LabelNames::new(["service", "index", "transport", "format"]); + label_names!("service", "index", "transport", "format"); pub(crate) const OTLP_GRPC_ERROR_LABELS: LabelNames<5> = - LabelNames::new(["service", "index", "transport", "format", "error"]); + label_names!("service", "index", "transport", "format", "error"); pub(crate) static REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( diff --git a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs index 95e9d0f9262..c272011da01 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs @@ -239,7 +239,8 @@ impl OtlpGrpcMetricsService { let num_bytes = doc_batch.num_bytes() as u64; self.store_metrics(index_id.clone(), doc_batch).await?; - let labels = label_values!(OTLP_GRPC_LABELS, ["metrics", index_id, "grpc", "protobuf"]); + let labels = + label_values!(names: OTLP_GRPC_LABELS, "metrics", index_id, "grpc", "protobuf"); counter!( parent: INGESTED_DATA_POINTS_TOTAL, labels: labels, @@ -335,8 +336,8 @@ impl OtlpGrpcMetricsService { let start = std::time::Instant::now(); let labels = label_values!( - OTLP_GRPC_LABELS, - ["metrics", index_id.clone(), "grpc", "protobuf"] + names: OTLP_GRPC_LABELS, + "metrics", index_id.clone(), "grpc", "protobuf" ); counter!( parent: REQUESTS_TOTAL, @@ -360,8 +361,8 @@ impl OtlpGrpcMetricsService { histogram!( parent: REQUEST_DURATION_SECONDS, labels: label_values!( - OTLP_GRPC_ERROR_LABELS, - ["metrics", index_id, "grpc", "protobuf", is_error] + names: OTLP_GRPC_ERROR_LABELS, + "metrics", index_id, "grpc", "protobuf", is_error ), ) .record(elapsed); diff --git a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs index 71a185eed37..01309105538 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs @@ -705,7 +705,7 @@ impl OtlpGrpcTracesService { let num_bytes = doc_batch.num_bytes() as u64; self.store_spans(index_id.clone(), doc_batch).await?; - let labels = label_values!(OTLP_GRPC_LABELS, ["trace", index_id, "grpc", "protobuf",]); + let labels = label_values!(names: OTLP_GRPC_LABELS, "trace", index_id, "grpc", "protobuf"); counter!( parent: INGESTED_SPANS_TOTAL, labels: labels, @@ -786,8 +786,8 @@ impl OtlpGrpcTracesService { let start = std::time::Instant::now(); let labels = label_values!( - OTLP_GRPC_LABELS, - ["trace", index_id.clone(), "grpc", "protobuf",] + names: OTLP_GRPC_LABELS, + "trace", index_id.clone(), "grpc", "protobuf" ); counter!( parent: REQUESTS_TOTAL, @@ -809,8 +809,8 @@ impl OtlpGrpcTracesService { histogram!( parent: REQUEST_DURATION_SECONDS, labels: label_values!( - OTLP_GRPC_ERROR_LABELS, - ["trace", index_id, "grpc", "protobuf", is_error,] + names: OTLP_GRPC_ERROR_LABELS, + "trace", index_id, "grpc", "protobuf", is_error ), ) .record(elapsed); diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 33ab4078415..047a537059b 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -19,9 +19,11 @@ use std::sync::LazyLock; use bytesize::ByteSize; use quickwit_common::metrics::{MaybeRegisteredCounter, exponential_buckets, linear_buckets}; -use quickwit_metrics::{Counter, Gauge, Histogram, LabelNames, counter, gauge, histogram}; +use quickwit_metrics::{ + Counter, Gauge, Histogram, LabelNames, counter, gauge, histogram, label_names, +}; -pub(crate) const STATUS_LABELS: LabelNames<1> = LabelNames::new(["status"]); +pub(crate) const STATUS_LABELS: LabelNames<1> = label_names!("status"); fn print_if_not_null( field_name: &'static str, diff --git a/quickwit/quickwit-search/src/metrics_trackers.rs b/quickwit/quickwit-search/src/metrics_trackers.rs index 8add6b1484f..a431d781979 100644 --- a/quickwit/quickwit-search/src/metrics_trackers.rs +++ b/quickwit/quickwit-search/src/metrics_trackers.rs @@ -74,7 +74,7 @@ impl PinnedDrop for RootSearchMetricsFuture { ) => (*num_targeted_splits, "cancelled"), }; - let labels = label_values!(STATUS_LABELS, [status]); + let labels = label_values!(names: STATUS_LABELS, status); counter!( parent: ROOT_SEARCH_REQUESTS_TOTAL, labels: labels, @@ -126,7 +126,7 @@ where F: Future> { fn drop(self: Pin<&mut Self>) { let status = self.status.unwrap_or("cancelled"); - let labels = label_values!(STATUS_LABELS, [status]); + let labels = label_values!(names: STATUS_LABELS, status); counter!( parent: LEAF_SEARCH_REQUESTS_TOTAL, labels: labels, diff --git a/quickwit/quickwit-serve/src/decompression.rs b/quickwit/quickwit-serve/src/decompression.rs index 03f8eb3b00f..8ac126c9233 100644 --- a/quickwit/quickwit-serve/src/decompression.rs +++ b/quickwit/quickwit-serve/src/decompression.rs @@ -115,10 +115,7 @@ pub(crate) struct Body { impl Body { pub fn new(content: Bytes, load_shield_permit: LoadShieldPermit) -> Body { - let gauge_guard = GaugeGuard::new( - &IN_FLIGHT_REST_SERVER, - content.len() as f64, - ); + let gauge_guard = GaugeGuard::new(&IN_FLIGHT_REST_SERVER, content.len() as f64); Body { content, _gauge_guard: gauge_guard, From 5fa06d5079265ff00e0a28eea91429dacc9a3f9e Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 12:53:42 +0200 Subject: [PATCH 38/54] Add GaugeGuard::decrement method for clarity Replace negative increment pattern with explicit decrement call in source batch clearing. Co-authored-by: Cursor --- quickwit/quickwit-indexing/src/source/mod.rs | 2 +- quickwit/quickwit-metrics/src/gauge.rs | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/quickwit/quickwit-indexing/src/source/mod.rs b/quickwit/quickwit-indexing/src/source/mod.rs index a936eacf8ca..8abd92e7114 100644 --- a/quickwit/quickwit-indexing/src/source/mod.rs +++ b/quickwit/quickwit-indexing/src/source/mod.rs @@ -572,7 +572,7 @@ impl BatchBuilder { pub fn clear(&mut self) { self.docs.clear(); self.checkpoint_delta = SourceCheckpointDelta::default(); - self.gauge_guard.increment(-(self.num_bytes as f64)); + self.gauge_guard.decrement(self.num_bytes as f64); self.num_bytes = 0; } } diff --git a/quickwit/quickwit-metrics/src/gauge.rs b/quickwit/quickwit-metrics/src/gauge.rs index 9eeeabb737c..2b954760447 100644 --- a/quickwit/quickwit-metrics/src/gauge.rs +++ b/quickwit/quickwit-metrics/src/gauge.rs @@ -234,6 +234,12 @@ impl GaugeGuard { self.gauge.increment(delta); } + /// Subtracts `delta` from the gauge and from the value this guard tracks. + pub fn decrement(&self, delta: f64) { + self.delta.fetch_sub(delta, Ordering::Relaxed); + self.gauge.decrement(delta); + } + /// Returns the value this guard is tracking. pub fn delta(&self) -> f64 { self.delta.load(Ordering::Relaxed) From 3db7f43ea296c28c0b3d895bf629c1c3a6778d94 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 13:04:02 +0200 Subject: [PATCH 39/54] Reuse shared labels across counter and histogram in lambda invoker Co-authored-by: Cursor --- quickwit/quickwit-lambda-client/src/invoker.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/quickwit/quickwit-lambda-client/src/invoker.rs b/quickwit/quickwit-lambda-client/src/invoker.rs index 4035b3a23b4..ea851e152b7 100644 --- a/quickwit/quickwit-lambda-client/src/invoker.rs +++ b/quickwit/quickwit-lambda-client/src/invoker.rs @@ -25,7 +25,7 @@ use base64::prelude::*; use prost::Message; use quickwit_common::retry::RetryParams; use quickwit_lambda_server::{LambdaSearchRequestPayload, LambdaSearchResponsePayload}; -use quickwit_metrics::{counter, histogram}; +use quickwit_metrics::{counter, histogram, labels}; use quickwit_proto::search::{LambdaSearchResponses, LambdaSingleSplitResult, LeafSearchRequest}; use quickwit_search::{LambdaLeafSearchInvoker, SearchError}; use tracing::{debug, info, instrument, warn}; @@ -175,14 +175,15 @@ impl LambdaLeafSearchInvoker for AwsLambdaInvoker { let result = self.invoke_leaf_search_with_retry(request).await; let elapsed = start.elapsed().as_secs_f64(); let status = if result.is_ok() { "success" } else { "error" }; + let labels = labels!("status" => status); counter!( parent: LEAF_SEARCH_REQUESTS_TOTAL, - "status" => status, + labels: labels, ) .increment(1); histogram!( parent: LEAF_SEARCH_DURATION_SECONDS, - "status" => status, + labels: labels, ) .record(elapsed); result From 0e0493bfbb5bc643763e44b0aafad75b7248ef41 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 14:11:04 +0200 Subject: [PATCH 40/54] Add multi-label composition in metric macros via __bind_labels! tt-muncher MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The labels: arm in counter!, gauge!, and histogram! now accepts multiple Labels expressions (e.g. `labels: region_labels, status_labels`). A recursive __bind_labels! macro binds each expression once, folds hash and count, and chains iterators — zero allocation on the hot path. Also adds Labels::iter() returning (&str, &str) pairs, and tests verifying two/three-label composition and hash equivalence with single Labels. Co-authored-by: Cursor --- .../benches/quickwit_metrics.rs | 145 +++++++++++++++++- quickwit/quickwit-metrics/src/counter.rs | 17 +- quickwit/quickwit-metrics/src/gauge.rs | 18 +-- quickwit/quickwit-metrics/src/histogram.rs | 19 ++- quickwit/quickwit-metrics/src/inner.rs | 59 +++++++ quickwit/quickwit-metrics/src/labels.rs | 16 +- quickwit/quickwit-metrics/tests/counter.rs | 96 +++++++++++- quickwit/quickwit-metrics/tests/gauge.rs | 66 +++++++- quickwit/quickwit-metrics/tests/histogram.rs | 75 ++++++++- 9 files changed, 473 insertions(+), 38 deletions(-) diff --git a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs index e6500078ed3..8ba92986c73 100644 --- a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs +++ b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs @@ -854,6 +854,141 @@ fn labels_histogram(c: &mut Criterion) { group.finish(); } +// --------------------------------------------------------------------------- +// COMPOSITE LABELS +// --------------------------------------------------------------------------- + +const COMP_METHOD: LabelNames<1> = label_names!("method"); +const COMP_ENDPOINT: LabelNames<1> = label_names!("endpoint"); +const COMP_STATUS: LabelNames<1> = label_names!("status"); +const COMP_ALL_3: LabelNames<3> = label_names!("method", "endpoint", "status"); + +fn composite_counter(c: &mut Criterion) { + install_recorder(); + let _ = &*PARENT_COUNTER; + + let mut group = c.benchmark_group("macros/composite/counter"); + + group.bench_function("single_3", |b| { + b.iter(|| { + counter!( + parent: PARENT_COUNTER, + labels: label_values!(names: COMP_ALL_3, "GET", "/health", "200"), + ) + .increment(1); + }); + }); + + group.bench_function("compose_1x3", |b| { + b.iter(|| { + counter!( + parent: PARENT_COUNTER, + labels: label_values!(names: COMP_METHOD, "GET"), + label_values!(names: COMP_ENDPOINT, "/health"), + label_values!(names: COMP_STATUS, "200"), + ) + .increment(1); + }); + }); + + group.bench_function("compose_1x2", |b| { + b.iter(|| { + counter!( + parent: PARENT_COUNTER, + labels: label_values!(names: COMP_METHOD, "GET"), + label_values!(names: COMP_ENDPOINT, "/health"), + ) + .increment(1); + }); + }); + + group.finish(); +} + +fn composite_gauge(c: &mut Criterion) { + install_recorder(); + let _ = &*PARENT_GAUGE; + + let mut group = c.benchmark_group("macros/composite/gauge"); + + group.bench_function("single_3", |b| { + b.iter(|| { + gauge!( + parent: PARENT_GAUGE, + labels: label_values!(names: COMP_ALL_3, "GET", "/health", "200"), + ) + .set(42.0); + }); + }); + + group.bench_function("compose_1x3", |b| { + b.iter(|| { + gauge!( + parent: PARENT_GAUGE, + labels: label_values!(names: COMP_METHOD, "GET"), + label_values!(names: COMP_ENDPOINT, "/health"), + label_values!(names: COMP_STATUS, "200"), + ) + .set(42.0); + }); + }); + + group.bench_function("compose_1x2", |b| { + b.iter(|| { + gauge!( + parent: PARENT_GAUGE, + labels: label_values!(names: COMP_METHOD, "GET"), + label_values!(names: COMP_ENDPOINT, "/health"), + ) + .set(42.0); + }); + }); + + group.finish(); +} + +fn composite_histogram(c: &mut Criterion) { + install_recorder(); + let _ = &*PARENT_HISTOGRAM; + + let mut group = c.benchmark_group("macros/composite/histogram"); + + group.bench_function("single_3", |b| { + b.iter(|| { + histogram!( + parent: PARENT_HISTOGRAM, + labels: label_values!(names: COMP_ALL_3, "GET", "/health", "200"), + ) + .record(0.123); + }); + }); + + group.bench_function("compose_1x3", |b| { + b.iter(|| { + histogram!( + parent: PARENT_HISTOGRAM, + labels: label_values!(names: COMP_METHOD, "GET"), + label_values!(names: COMP_ENDPOINT, "/health"), + label_values!(names: COMP_STATUS, "200"), + ) + .record(0.123); + }); + }); + + group.bench_function("compose_1x2", |b| { + b.iter(|| { + histogram!( + parent: PARENT_HISTOGRAM, + labels: label_values!(names: COMP_METHOD, "GET"), + label_values!(names: COMP_ENDPOINT, "/health"), + ) + .record(0.123); + }); + }); + + group.finish(); +} + // --------------------------------------------------------------------------- criterion_group!( @@ -888,11 +1023,19 @@ criterion_group!( labels_histogram, ); +criterion_group!( + composite_benches, + composite_counter, + composite_gauge, + composite_histogram, +); + criterion_main!( on_the_fly_benches, static_benches, parent_benches, dynamic_benches, observable_benches, - labels_benches + labels_benches, + composite_benches, ); diff --git a/quickwit/quickwit-metrics/src/counter.rs b/quickwit/quickwit-metrics/src/counter.rs index fae6d090a7e..3a491ff6d26 100644 --- a/quickwit/quickwit-metrics/src/counter.rs +++ b/quickwit/quickwit-metrics/src/counter.rs @@ -264,21 +264,22 @@ macro_rules! counter { ) }; - // Parent extension via a pre-built Labels bundle. - // Same as the inline arm but hash and labels come from a Labels. + // Parent extension via one or more pre-built Labels bundles. + // Composes hash, count, and label iterators across all labels via the + // __bind_labels! tt-muncher — zero allocation on the hot path. ( parent: $parent:expr, - labels: $labels:expr $(,)? + labels: $($labels:expr),+ $(,)? ) => {{ - let label_values = &($labels); - $crate::__metric_extension!( + $crate::__bind_labels!( metric_type: $crate::Counter, register_fn: $crate::__counter_get_or_register, parent: $parent, metric_info: $parent.__info(), - hash: label_values.__hash($parent.get_hash()), - label_count: label_values.len(), - labels_iter: label_values.__to_labels() + hash: $parent.get_hash(), + count: 0usize, + iter: std::iter::empty::<$crate::__metrics::Label>(), + $(next: $labels,)+ ) }}; } diff --git a/quickwit/quickwit-metrics/src/gauge.rs b/quickwit/quickwit-metrics/src/gauge.rs index 2b954760447..1db5e412560 100644 --- a/quickwit/quickwit-metrics/src/gauge.rs +++ b/quickwit/quickwit-metrics/src/gauge.rs @@ -322,22 +322,22 @@ macro_rules! gauge { ) }; - // Parent extension via a pre-built Labels bundle. - // Same as the inline arm but hash and labels come from a Labels. + // Parent extension via one or more pre-built Labels bundles. + // Composes hash, count, and label iterators across all labels via the + // __bind_labels! tt-muncher — zero allocation on the hot path. ( parent: $parent:expr, - labels: $labels:expr $(,)? + labels: $($labels:expr),+ $(,)? ) => {{ - let label_values = &($labels); - $crate::__metric_extension!( + $crate::__bind_labels!( metric_type: $crate::Gauge, register_fn: $crate::__gauge_get_or_register, parent: $parent, metric_info: $parent.__info(), - // Seed with parent hash, fold in each (name, value) pair. - hash: label_values.__hash($parent.get_hash()), - label_count: label_values.len(), - labels_iter: label_values.__to_labels() + hash: $parent.get_hash(), + count: 0usize, + iter: std::iter::empty::<$crate::__metrics::Label>(), + $(next: $labels,)+ ) }}; } diff --git a/quickwit/quickwit-metrics/src/histogram.rs b/quickwit/quickwit-metrics/src/histogram.rs index 224d03e32b0..8b0a0e829c9 100644 --- a/quickwit/quickwit-metrics/src/histogram.rs +++ b/quickwit/quickwit-metrics/src/histogram.rs @@ -303,23 +303,22 @@ macro_rules! histogram { ) }; - // Parent extension via a pre-built Labels bundle. - // Same as the inline arm but hash and labels come from a Labels. + // Parent extension via one or more pre-built Labels bundles. + // Composes hash, count, and label iterators across all labels via the + // __bind_labels! tt-muncher — zero allocation on the hot path. ( parent: $parent:expr, - labels: $labels:expr $(,)? + labels: $($labels:expr),+ $(,)? ) => {{ - let label_values = &($labels); - $crate::__metric_extension!( + $crate::__bind_labels!( metric_type: $crate::Histogram, register_fn: $crate::__histogram_get_or_register, parent: $parent, - // Unwrap HistogramConfig -> MetricInfo for the extension. metric_info: $parent.__info().info, - // Seed with parent hash, fold in each (name, value) pair. - hash: label_values.__hash($parent.get_hash()), - label_count: label_values.len(), - labels_iter: label_values.__to_labels() + hash: $parent.get_hash(), + count: 0usize, + iter: std::iter::empty::<$crate::__metrics::Label>(), + $(next: $labels,)+ ) }}; } diff --git a/quickwit/quickwit-metrics/src/inner.rs b/quickwit/quickwit-metrics/src/inner.rs index 06b1f124760..39f894171cb 100644 --- a/quickwit/quickwit-metrics/src/inner.rs +++ b/quickwit/quickwit-metrics/src/inner.rs @@ -251,6 +251,65 @@ macro_rules! __metric_extension { }}; } +/// Recursive tt-muncher that binds each `Labels` expression exactly once, +/// then folds hash, count, and iterator chain across all of them before +/// delegating to [`__metric_extension!`]. +/// +/// Each recursion step creates a nested scope so that earlier bindings remain +/// live when the base case finally emits the extension. Zero allocation on +/// the hot path. +#[doc(hidden)] +#[macro_export] +macro_rules! __bind_labels { + // Base case: no more labels to peel. Emit __metric_extension!. + ( + metric_type: $metric_type:ty, + register_fn: $register_fn:path, + parent: $parent:expr, + metric_info: $metric_info:expr, + hash: $hash:expr, + count: $count:expr, + iter: $iter:expr, + ) => { + $crate::__metric_extension!( + metric_type: $metric_type, + register_fn: $register_fn, + parent: $parent, + metric_info: $metric_info, + hash: $hash, + label_count: $count, + labels_iter: $iter + ) + }; + + // Recursive case: bind the next labels expr, fold into hash/count/iter, + // then recurse with remaining labels. + ( + metric_type: $metric_type:ty, + register_fn: $register_fn:path, + parent: $parent:expr, + metric_info: $metric_info:expr, + hash: $hash:expr, + count: $count:expr, + iter: $iter:expr, + next: $next:expr $(, next: $rest:expr)* $(,)? + ) => {{ + let __ref = &$next; + let hash = $crate::__key_hash($hash, __ref.iter()); + let count = $count + __ref.len(); + $crate::__bind_labels!( + metric_type: $metric_type, + register_fn: $register_fn, + parent: $parent, + metric_info: $metric_info, + hash: hash, + count: count, + iter: $iter.chain(__ref.__to_labels()), + $(next: $rest,)* + ) + }}; +} + // ─── Tests ─── #[cfg(test)] diff --git a/quickwit/quickwit-metrics/src/labels.rs b/quickwit/quickwit-metrics/src/labels.rs index aaaf16c5c14..b52004af8d9 100644 --- a/quickwit/quickwit-metrics/src/labels.rs +++ b/quickwit/quickwit-metrics/src/labels.rs @@ -159,13 +159,7 @@ impl Labels { /// is fully composable with the parent's hash. #[doc(hidden)] pub fn __hash(&self, seed: u64) -> u64 { - __key_hash( - seed, - self.names - .iter() - .zip(self.values.iter()) - .map(|(n, v)| (*n, v.as_ref())), - ) + __key_hash(seed, self.iter()) } /// Builds `metrics::Label`s by cloning the stored names and values. @@ -178,6 +172,14 @@ impl Labels { .map(|(n, v)| metrics::Label::new(*n, v.clone())) } + /// Returns an iterator of `(&str, &str)` name-value pairs. + pub fn iter(&self) -> impl Iterator { + self.names + .iter() + .zip(self.values.iter()) + .map(|(n, v)| (*n, v.as_ref())) + } + /// Number of labels. pub const fn len(&self) -> usize { N diff --git a/quickwit/quickwit-metrics/tests/counter.rs b/quickwit/quickwit-metrics/tests/counter.rs index 2a772f4a841..096b563a342 100644 --- a/quickwit/quickwit-metrics/tests/counter.rs +++ b/quickwit/quickwit-metrics/tests/counter.rs @@ -17,7 +17,7 @@ mod common; use common::with_recorder; use metrics::with_local_recorder; use metrics_util::debugging::{DebugValue, DebuggingRecorder}; -use quickwit_metrics::counter; +use quickwit_metrics::{counter, label_names, label_values, labels}; #[test] fn base_increments() { @@ -205,6 +205,100 @@ fn observable_parent_children_share_shadow() { }); } +#[test] +fn label_composition_two_labels() { + let entries = with_recorder(|| { + let parent = counter!( + name: "c_compose_two", + description: "two-label composition", + subsystem: "test", + "env" => "prod", + ); + const REGION: quickwit_metrics::LabelNames<1> = label_names!("region"); + const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); + let child = counter!( + parent: parent, + labels: label_values!(names: REGION, "us-east"), + label_values!(names: STATUS, "ok"), + ); + child.increment(3); + }); + + let child_entry = entries.iter().find(|(_, labels, _)| labels.len() == 3); + assert!(child_entry.is_some(), "composed child not found"); + let (name, labels, value) = child_entry.unwrap(); + assert_eq!(name, "quickwit_test_c_compose_two"); + assert_eq!( + labels, + &[ + ("env".to_string(), "prod".to_string()), + ("region".to_string(), "us-east".to_string()), + ("status".to_string(), "ok".to_string()), + ] + ); + assert_eq!(*value, DebugValue::Counter(3)); +} + +#[test] +fn label_composition_three_labels() { + let entries = with_recorder(|| { + let parent = counter!( + name: "c_compose_three", + description: "three-label composition", + subsystem: "test", + ); + let child = counter!( + parent: parent, + labels: labels!("env" => "staging"), + labels!("region" => "eu"), + labels!("az" => "eu-1a"), + ); + child.increment(7); + }); + + let child_entry = entries.iter().find(|(_, labels, _)| labels.len() == 3); + assert!(child_entry.is_some(), "composed child not found"); + let (name, labels, value) = child_entry.unwrap(); + assert_eq!(name, "quickwit_test_c_compose_three"); + assert_eq!( + labels, + &[ + ("env".to_string(), "staging".to_string()), + ("region".to_string(), "eu".to_string()), + ("az".to_string(), "eu-1a".to_string()), + ] + ); + assert_eq!(*value, DebugValue::Counter(7)); +} + +#[test] +fn label_composition_same_hash_as_single() { + with_recorder(|| { + let parent = counter!( + name: "c_compose_hash", + description: "hash equivalence", + subsystem: "test", + ); + const REGION: quickwit_metrics::LabelNames<1> = label_names!("region"); + const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); + + let via_compose = counter!( + parent: parent, + labels: label_values!(names: REGION, "us"), + label_values!(names: STATUS, "ok"), + ); + let via_single = counter!( + parent: parent, + labels: labels!("region" => "us", "status" => "ok"), + ); + via_compose.increment(1); + via_single.increment(2); + + assert_eq!(via_compose.get(), 3); + assert_eq!(via_single.get(), 3); + }); +} + #[test] fn observable_parent_distinct_labels_separate_shadow() { with_recorder(|| { diff --git a/quickwit/quickwit-metrics/tests/gauge.rs b/quickwit/quickwit-metrics/tests/gauge.rs index 8cf29c086d1..fb203cfd7b3 100644 --- a/quickwit/quickwit-metrics/tests/gauge.rs +++ b/quickwit/quickwit-metrics/tests/gauge.rs @@ -17,7 +17,7 @@ mod common; use common::with_recorder; use metrics::with_local_recorder; use metrics_util::debugging::{DebugValue, DebuggingRecorder}; -use quickwit_metrics::{GaugeGuard, gauge}; +use quickwit_metrics::{GaugeGuard, gauge, label_names, label_values, labels}; #[test] fn set() { @@ -161,6 +161,70 @@ fn multiple_guards() { assert_eq!(entries[0].2, DebugValue::Gauge(0.0.into())); } +// ── Label composition ── + +#[test] +fn label_composition_two_labels() { + let entries = with_recorder(|| { + let parent = gauge!( + name: "g_compose_two", + description: "two-label composition", + subsystem: "test", + "env" => "prod", + ); + const REGION: quickwit_metrics::LabelNames<1> = label_names!("region"); + const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); + let child = gauge!( + parent: parent, + labels: label_values!(names: REGION, "us-east"), + label_values!(names: STATUS, "ok"), + ); + child.set(42.0); + }); + + let child_entry = entries.iter().find(|(_, labels, _)| labels.len() == 3); + assert!(child_entry.is_some(), "composed child not found"); + let (name, labels, value) = child_entry.unwrap(); + assert_eq!(name, "quickwit_test_g_compose_two"); + assert_eq!( + labels, + &[ + ("env".to_string(), "prod".to_string()), + ("region".to_string(), "us-east".to_string()), + ("status".to_string(), "ok".to_string()), + ] + ); + assert_eq!(*value, DebugValue::Gauge(42.0.into())); +} + +#[test] +fn label_composition_same_hash_as_single() { + with_recorder(|| { + let parent = gauge!( + name: "g_compose_hash", + description: "hash equivalence", + subsystem: "test", + ); + const REGION: quickwit_metrics::LabelNames<1> = label_names!("region"); + const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); + + let via_compose = gauge!( + parent: parent, + labels: label_values!(names: REGION, "us"), + label_values!(names: STATUS, "ok"), + ); + let via_single = gauge!( + parent: parent, + labels: labels!("region" => "us", "status" => "ok"), + ); + via_compose.set(5.0); + via_single.increment(3.0); + + assert_eq!(via_compose.get(), 8.0); + assert_eq!(via_single.get(), 8.0); + }); +} + // ── Observable gauge ── #[test] diff --git a/quickwit/quickwit-metrics/tests/histogram.rs b/quickwit/quickwit-metrics/tests/histogram.rs index da42b5a7fd9..3f4e000f110 100644 --- a/quickwit/quickwit-metrics/tests/histogram.rs +++ b/quickwit/quickwit-metrics/tests/histogram.rs @@ -17,7 +17,7 @@ mod common; use common::with_recorder; use metrics::with_local_recorder; use metrics_util::debugging::{DebugValue, DebuggingRecorder}; -use quickwit_metrics::{HistogramTimer, histogram}; +use quickwit_metrics::{HistogramTimer, histogram, label_names, label_values, labels}; #[test] fn base_records_value() { @@ -97,6 +97,79 @@ fn parent_extends_labels() { ); } +#[test] +fn label_composition_two_labels() { + let entries = with_recorder(|| { + let parent = histogram!( + name: "h_compose_two", + description: "two-label composition", + subsystem: "test", + buckets: vec![1.0, 5.0], + "env" => "prod", + ); + const REGION: quickwit_metrics::LabelNames<1> = label_names!("region"); + const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); + let child = histogram!( + parent: parent, + labels: label_values!(names: REGION, "us-east"), + label_values!(names: STATUS, "ok"), + ); + child.record(2.5); + }); + + let child_entry = entries.iter().find(|(_, labels, _)| labels.len() == 3); + assert!(child_entry.is_some(), "composed child not found"); + let (name, labels, value) = child_entry.unwrap(); + assert_eq!(name, "quickwit_test_h_compose_two"); + assert_eq!( + labels, + &[ + ("env".to_string(), "prod".to_string()), + ("region".to_string(), "us-east".to_string()), + ("status".to_string(), "ok".to_string()), + ] + ); + match value { + DebugValue::Histogram(vals) => { + assert_eq!(vals.len(), 1); + assert_eq!(vals[0].into_inner(), 2.5); + } + other => panic!("expected Histogram, got {other:?}"), + } +} + +#[test] +fn label_composition_three_labels() { + let entries = with_recorder(|| { + let parent = histogram!( + name: "h_compose_three", + description: "three-label composition", + subsystem: "test", + buckets: vec![1.0], + ); + let child = histogram!( + parent: parent, + labels: labels!("env" => "staging"), + labels!("region" => "eu"), + labels!("az" => "eu-1a"), + ); + child.record(0.1); + }); + + let child_entry = entries.iter().find(|(_, labels, _)| labels.len() == 3); + assert!(child_entry.is_some(), "composed child not found"); + let (name, labels, _) = child_entry.unwrap(); + assert_eq!(name, "quickwit_test_h_compose_three"); + assert_eq!( + labels, + &[ + ("env".to_string(), "staging".to_string()), + ("region".to_string(), "eu".to_string()), + ("az".to_string(), "eu-1a".to_string()), + ] + ); +} + #[test] fn config_stored() { let recorder = DebuggingRecorder::new(); From 1528d15b36492757ecc27dd49901a43253a03b35 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 14:39:48 +0200 Subject: [PATCH 41/54] Use explicit [brackets] for labels: arm in metric macros The labels: argument now requires square brackets to visually distinguish the label list from other macro parameters: counter!(parent: FOO, labels: [labels_a, labels_b]) Also migrates sketch_processor.rs to use label composition. Co-authored-by: Cursor --- quickwit/quickwit-common/src/runtimes.rs | 8 +-- quickwit/quickwit-common/src/thread_pool.rs | 4 +- .../src/model/shard_table.rs | 8 +-- .../src/actors/doc_processor.rs | 4 +- .../src/actors/indexing_pipeline.rs | 10 +-- .../src/actors/merge_pipeline.rs | 6 +- .../metrics_pipeline/parquet_uploader.rs | 2 +- .../quickwit-indexing/src/actors/uploader.rs | 6 +- .../quickwit-ingest/src/ingest_api_service.rs | 4 +- .../quickwit-ingest/src/ingest_v2/ingester.rs | 18 +++--- quickwit/quickwit-ingest/src/lib.rs | 6 +- quickwit/quickwit-jaeger/src/lib.rs | 20 +++--- quickwit/quickwit-jaeger/src/v1.rs | 6 +- quickwit/quickwit-jaeger/src/v2.rs | 14 ++-- .../src/actors/garbage_collector.rs | 25 ++++---- .../quickwit-lambda-client/src/invoker.rs | 4 +- .../benches/quickwit_metrics.rs | 64 +++++++++++-------- .../quickwit-metrics/examples/http_service.rs | 8 +-- quickwit/quickwit-metrics/src/counter.rs | 2 +- quickwit/quickwit-metrics/src/gauge.rs | 2 +- quickwit/quickwit-metrics/src/histogram.rs | 2 +- quickwit/quickwit-metrics/src/lib.rs | 6 +- quickwit/quickwit-metrics/tests/counter.rs | 12 ++-- quickwit/quickwit-metrics/tests/gauge.rs | 8 +-- quickwit/quickwit-metrics/tests/histogram.rs | 7 +- .../quickwit-opentelemetry/src/otlp/logs.rs | 12 ++-- .../src/otlp/otel_metrics.rs | 12 ++-- .../quickwit-opentelemetry/src/otlp/traces.rs | 12 ++-- .../src/ingest/processor.rs | 12 ++-- .../src/ingest/sketch_processor.rs | 36 ++++------- .../quickwit-search/src/metrics_trackers.rs | 12 ++-- 31 files changed, 173 insertions(+), 179 deletions(-) diff --git a/quickwit/quickwit-common/src/runtimes.rs b/quickwit/quickwit-common/src/runtimes.rs index 451ff733b46..18a59b9b6c9 100644 --- a/quickwit/quickwit-common/src/runtimes.rs +++ b/quickwit/quickwit-common/src/runtimes.rs @@ -216,13 +216,13 @@ impl RuntimeMetricsRecorder { pub fn new(label: &'static str) -> Self { let labels = labels!("runtime_type" => label); Self { - scheduled_tasks: gauge!(parent: TOKIO_SCHEDULED_TASKS, labels: labels), + scheduled_tasks: gauge!(parent: TOKIO_SCHEDULED_TASKS, labels: [labels]), worker_busy_duration_milliseconds_total: counter!( parent: TOKIO_WORKER_BUSY_DURATION_MILLISECONDS_TOTAL, - labels: labels, + labels: [labels], ), - worker_busy_ratio: gauge!(parent: TOKIO_WORKER_BUSY_RATIO, labels: labels), - worker_threads: gauge!(parent: TOKIO_WORKER_THREADS, labels: labels), + worker_busy_ratio: gauge!(parent: TOKIO_WORKER_BUSY_RATIO, labels: [labels]), + worker_threads: gauge!(parent: TOKIO_WORKER_THREADS, labels: [labels]), } } diff --git a/quickwit/quickwit-common/src/thread_pool.rs b/quickwit/quickwit-common/src/thread_pool.rs index 509459b3d93..a57ee56d6f8 100644 --- a/quickwit/quickwit-common/src/thread_pool.rs +++ b/quickwit/quickwit-common/src/thread_pool.rs @@ -61,8 +61,8 @@ impl ThreadPool { .build() .expect("failed to spawn thread pool"); let labels = labels!("pool" => name); - let ongoing_tasks = gauge!(parent: THREAD_POOL_ONGOING_TASKS, labels: labels); - let pending_tasks = gauge!(parent: THREAD_POOL_PENDING_TASKS, labels: labels); + let ongoing_tasks = gauge!(parent: THREAD_POOL_ONGOING_TASKS, labels: [labels]); + let pending_tasks = gauge!(parent: THREAD_POOL_PENDING_TASKS, labels: [labels]); ThreadPool { thread_pool: Arc::new(thread_pool), ongoing_tasks, diff --git a/quickwit/quickwit-control-plane/src/model/shard_table.rs b/quickwit/quickwit-control-plane/src/model/shard_table.rs index 046a679c9b4..bfb3c12440c 100644 --- a/quickwit/quickwit-control-plane/src/model/shard_table.rs +++ b/quickwit/quickwit-control-plane/src/model/shard_table.rs @@ -467,12 +467,12 @@ impl ShardTable { let labels = label_values!(names: INDEX_ID_LABELS, index_label.to_string()); gauge!( parent: OPEN_SHARDS, - labels: labels, + labels: [labels], ) .set(shard_stats.num_open_shards as f64); gauge!( parent: CLOSED_SHARDS, - labels: labels, + labels: [labels], ) .set(shard_stats.num_closed_shards as f64); return; @@ -491,12 +491,12 @@ impl ShardTable { let labels = label_values!(names: INDEX_ID_LABELS, index_label.to_string()); gauge!( parent: OPEN_SHARDS, - labels: labels, + labels: [labels], ) .set(num_open_shards as f64); gauge!( parent: CLOSED_SHARDS, - labels: labels, + labels: [labels], ) .set(num_closed_shards as f64); } diff --git a/quickwit/quickwit-indexing/src/actors/doc_processor.rs b/quickwit/quickwit-indexing/src/actors/doc_processor.rs index 4abefc441d5..b1d293f4195 100644 --- a/quickwit/quickwit-indexing/src/actors/doc_processor.rs +++ b/quickwit/quickwit-indexing/src/actors/doc_processor.rs @@ -294,11 +294,11 @@ impl DocProcessorCounter { num_docs: Default::default(), num_docs_metric: counter!( parent: PROCESSED_DOCS_TOTAL, - labels: labels, + labels: [labels], ), num_bytes_metric: counter!( parent: PROCESSED_BYTES, - labels: labels, + labels: [labels], ), } } diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index eae5218cf8c..d6f84d83d2f 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -315,7 +315,7 @@ impl IndexingPipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(names: ACTOR_NAME, "publisher"), + labels: [label_values!(names: ACTOR_NAME, "publisher")], )) .spawn(publisher); @@ -324,7 +324,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(names: ACTOR_NAME, "sequencer"), + labels: [label_values!(names: ACTOR_NAME, "sequencer")], )) .set_kill_switch(self.kill_switch.clone()) .spawn(sequencer); @@ -344,7 +344,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(names: ACTOR_NAME, "uploader"), + labels: [label_values!(names: ACTOR_NAME, "uploader")], )) .set_kill_switch(self.kill_switch.clone()) .spawn(uploader); @@ -378,7 +378,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(names: ACTOR_NAME, "indexer"), + labels: [label_values!(names: ACTOR_NAME, "indexer")], )) .set_kill_switch(self.kill_switch.clone()) .spawn(indexer); @@ -395,7 +395,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(names: ACTOR_NAME, "doc_processor"), + labels: [label_values!(names: ACTOR_NAME, "doc_processor")], )) .set_kill_switch(self.kill_switch.clone()) .spawn(doc_processor); diff --git a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs index 2d150c7893d..b59d8e5d642 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs @@ -276,7 +276,7 @@ impl MergePipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(names: ACTOR_NAME, "merge_publisher"), + labels: [label_values!(names: ACTOR_NAME, "merge_publisher")], )) .spawn(merge_publisher); @@ -325,7 +325,7 @@ impl MergePipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(names: ACTOR_NAME, "merge_executor"), + labels: [label_values!(names: ACTOR_NAME, "merge_executor")], )) .spawn(merge_executor); @@ -340,7 +340,7 @@ impl MergePipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: label_values!(names: ACTOR_NAME, "merge_split_downloader"), + labels: [label_values!(names: ACTOR_NAME, "merge_split_downloader")], )) .spawn(merge_split_downloader); diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs index 3607a304241..fc01c39e29c 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs @@ -125,7 +125,7 @@ impl ParquetUploader { .get_or_init(|| Semaphore::const_new(self.max_concurrent_uploads)); let gauge = gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: label_values!(names: COMPONENT, "metrics"), + labels: [label_values!(names: COMPONENT, "metrics")], ); gauge.set(concurrent_upload_permits.available_permits() as f64); concurrent_upload_permits diff --git a/quickwit/quickwit-indexing/src/actors/uploader.rs b/quickwit/quickwit-indexing/src/actors/uploader.rs index d56f6f8a1b3..4cc3a8f3399 100644 --- a/quickwit/quickwit-indexing/src/actors/uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/uploader.rs @@ -206,21 +206,21 @@ impl Uploader { &CONCURRENT_UPLOAD_PERMITS_INDEX, gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: label_values!(names: COMPONENT, "indexer"), + labels: [label_values!(names: COMPONENT, "indexer")], ), ), UploaderType::MergeUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: label_values!(names: COMPONENT, "merger"), + labels: [label_values!(names: COMPONENT, "merger")], ), ), UploaderType::DeleteUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: label_values!(names: COMPONENT, "merger"), + labels: [label_values!(names: COMPONENT, "merger")], ), ), }; diff --git a/quickwit/quickwit-ingest/src/ingest_api_service.rs b/quickwit/quickwit-ingest/src/ingest_api_service.rs index 1c1eceaaf26..5123e958c43 100644 --- a/quickwit/quickwit-ingest/src/ingest_api_service.rs +++ b/quickwit/quickwit-ingest/src/ingest_api_service.rs @@ -205,10 +205,10 @@ impl IngestApiService { let labels = label_values!(names: VALIDITY, "valid"); counter!( parent: DOCS_BYTES_TOTAL, - labels: labels, + labels: [labels], ) .increment(batch_num_bytes as u64); - counter!(parent: DOCS_TOTAL, labels: labels).increment(batch_num_docs as u64); + counter!(parent: DOCS_TOTAL, labels: [labels]).increment(batch_num_docs as u64); } // TODO we could fsync here and disable autosync to have better i/o perfs. Ok(( diff --git a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs index f2bf2dc169c..b6fd8662a0a 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs @@ -335,7 +335,7 @@ impl Ingester { ); counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, - labels: label_values!(names: STATUS, "success"), + labels: [label_values!(names: STATUS, "success")], ) .increment(1); @@ -347,7 +347,7 @@ impl Ingester { counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, - labels: label_values!(names: STATUS, "error"), + labels: [label_values!(names: STATUS, "error")], ) .increment(1); } @@ -356,7 +356,7 @@ impl Ingester { counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, - labels: label_values!(names: STATUS, "timeout"), + labels: [label_values!(names: STATUS, "timeout")], ) .increment(1); } @@ -573,12 +573,12 @@ impl Ingester { if valid_doc_batch.is_empty() { counter!( parent: DOCS_TOTAL, - labels: label_values!(names: VALIDITY, "invalid"), + labels: [label_values!(names: VALIDITY, "invalid")], ) .increment(parse_failures.len() as u64); counter!( parent: DOCS_BYTES_TOTAL, - labels: label_values!(names: VALIDITY, "invalid"), + labels: [label_values!(names: VALIDITY, "invalid")], ) .increment(original_batch_num_bytes); let persist_success = PersistSuccess { @@ -596,23 +596,23 @@ impl Ingester { counter!( parent: DOCS_TOTAL, - labels: label_values!(names: VALIDITY, "valid"), + labels: [label_values!(names: VALIDITY, "valid")], ) .increment(valid_doc_batch.num_docs() as u64); counter!( parent: DOCS_BYTES_TOTAL, - labels: label_values!(names: VALIDITY, "valid"), + labels: [label_values!(names: VALIDITY, "valid")], ) .increment(valid_doc_batch.num_bytes() as u64); if !parse_failures.is_empty() { counter!( parent: DOCS_TOTAL, - labels: label_values!(names: VALIDITY, "invalid"), + labels: [label_values!(names: VALIDITY, "invalid")], ) .increment(parse_failures.len() as u64); counter!( parent: DOCS_BYTES_TOTAL, - labels: label_values!(names: VALIDITY, "invalid"), + labels: [label_values!(names: VALIDITY, "invalid")], ) .increment(original_batch_num_bytes - valid_doc_batch.num_bytes() as u64); } diff --git a/quickwit/quickwit-ingest/src/lib.rs b/quickwit/quickwit-ingest/src/lib.rs index 4d0838f9ac0..bde0bd8db0c 100644 --- a/quickwit/quickwit-ingest/src/lib.rs +++ b/quickwit/quickwit-ingest/src/lib.rs @@ -112,7 +112,7 @@ macro_rules! with_lock_metrics { let labels = quickwit_metrics::labels!("operation" => $operation, "type" => $kind); quickwit_metrics::gauge!( parent: $crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT, - labels: labels, + labels: [labels], ) .increment(1.0); @@ -128,12 +128,12 @@ macro_rules! with_lock_metrics { } quickwit_metrics::gauge!( parent: $crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT, - labels: labels, + labels: [labels], ) .decrement(1.0); quickwit_metrics::histogram!( parent: $crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUEST_DURATION_SECS, - labels: labels, + labels: [labels], ) .record(elapsed.as_secs_f64()); diff --git a/quickwit/quickwit-jaeger/src/lib.rs b/quickwit/quickwit-jaeger/src/lib.rs index f501b1eb282..bd2511f52ae 100644 --- a/quickwit/quickwit-jaeger/src/lib.rs +++ b/quickwit/quickwit-jaeger/src/lib.rs @@ -422,20 +422,20 @@ impl JaegerService { counter!( parent: FETCHED_TRACES_TOTAL, - labels: label_values!( + labels: [label_values!( names: OPERATION_INDEX_LABELS, operation_name, OTEL_TRACES_INDEX_ID - ), + )], ) .increment(num_traces); let elapsed = request_start.elapsed().as_secs_f64(); histogram!( parent: REQUEST_DURATION_SECONDS, - labels: label_values!( + labels: [label_values!( names: OPERATION_INDEX_ERROR_LABELS, operation_name, OTEL_TRACES_INDEX_ID, "false" - ), + )], ) .record(elapsed); }); @@ -446,20 +446,20 @@ impl JaegerService { pub(crate) fn record_error(operation_name: &'static str, request_start: Instant) { counter!( parent: REQUEST_ERRORS_TOTAL, - labels: label_values!( + labels: [label_values!( names: OPERATION_INDEX_LABELS, operation_name, OTEL_TRACES_INDEX_ID - ), + )], ) .increment(1); let elapsed = request_start.elapsed().as_secs_f64(); histogram!( parent: REQUEST_DURATION_SECONDS, - labels: label_values!( + labels: [label_values!( names: OPERATION_INDEX_ERROR_LABELS, operation_name, OTEL_TRACES_INDEX_ID, "true" - ), + )], ) .record(elapsed); } @@ -469,8 +469,8 @@ pub(crate) fn record_send(operation_name: &'static str, num_spans: usize, num_by names: OPERATION_INDEX_LABELS, operation_name, OTEL_TRACES_INDEX_ID ); - counter!(parent: FETCHED_SPANS_TOTAL, labels: labels).increment(num_spans as u64); - counter!(parent: TRANSFERRED_BYTES_TOTAL, labels: labels).increment(num_bytes as u64); + counter!(parent: FETCHED_SPANS_TOTAL, labels: [labels]).increment(num_spans as u64); + counter!(parent: TRANSFERRED_BYTES_TOTAL, labels: [labels]).increment(num_bytes as u64); } #[allow(deprecated)] diff --git a/quickwit/quickwit-jaeger/src/v1.rs b/quickwit/quickwit-jaeger/src/v1.rs index aac38578725..c5040a3919d 100644 --- a/quickwit/quickwit-jaeger/src/v1.rs +++ b/quickwit/quickwit-jaeger/src/v1.rs @@ -42,7 +42,7 @@ macro_rules! metrics { let labels = label_values!(names: OPERATION_INDEX_LABELS, operation, index); counter!( parent: REQUESTS_TOTAL, - labels: labels, + labels: [labels], ) .increment(1); let (res, is_error) = match $expr { @@ -52,7 +52,7 @@ macro_rules! metrics { err @ Err(_) => { counter!( parent: REQUEST_ERRORS_TOTAL, - labels: labels, + labels: [labels], ) .increment(1); (err, "true") @@ -61,7 +61,7 @@ macro_rules! metrics { let elapsed = start.elapsed().as_secs_f64(); histogram!( parent: REQUEST_DURATION_SECONDS, - labels: label_values!(names: OPERATION_INDEX_ERROR_LABELS, operation, index, is_error), + labels: [label_values!(names: OPERATION_INDEX_ERROR_LABELS, operation, index, is_error)], ) .record(elapsed); diff --git a/quickwit/quickwit-jaeger/src/v2.rs b/quickwit/quickwit-jaeger/src/v2.rs index 54ed4b8b844..4a0984398dc 100644 --- a/quickwit/quickwit-jaeger/src/v2.rs +++ b/quickwit/quickwit-jaeger/src/v2.rs @@ -68,7 +68,7 @@ macro_rules! metrics { let labels = label_values!(names: OPERATION_INDEX_LABELS, operation, index); counter!( parent: REQUESTS_TOTAL, - labels: labels, + labels: [labels], ) .increment(1); let (res, is_error) = match $expr { @@ -78,7 +78,7 @@ macro_rules! metrics { err @ Err(_) => { counter!( parent: REQUEST_ERRORS_TOTAL, - labels: labels, + labels: [labels], ) .increment(1); (err, "true") @@ -87,7 +87,7 @@ macro_rules! metrics { let elapsed = start.elapsed().as_secs_f64(); histogram!( parent: REQUEST_DURATION_SECONDS, - labels: label_values!(names: OPERATION_INDEX_ERROR_LABELS, operation, index, is_error), + labels: [label_values!(names: OPERATION_INDEX_ERROR_LABELS, operation, index, is_error)], ) .record(elapsed); @@ -445,20 +445,20 @@ async fn stream_otel_spans_impl( counter!( parent: FETCHED_TRACES_TOTAL, - labels: label_values!( + labels: [label_values!( names: OPERATION_INDEX_LABELS, operation_name, OTEL_TRACES_INDEX_ID - ), + )], ) .increment(trace_ids.len() as u64); let elapsed = request_start.elapsed().as_secs_f64(); histogram!( parent: REQUEST_DURATION_SECONDS, - labels: label_values!( + labels: [label_values!( names: OPERATION_INDEX_ERROR_LABELS, operation_name, OTEL_TRACES_INDEX_ID, "false" - ), + )], ) .record(elapsed); diff --git a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs index 00661c9741d..1a05e447ca1 100644 --- a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs +++ b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs @@ -23,7 +23,7 @@ use quickwit_common::is_parquet_pipeline_index; use quickwit_common::shared_consts::split_deletion_grace_period; use quickwit_index_management::{GcMetrics, run_garbage_collect, run_parquet_garbage_collect}; use quickwit_metastore::ListIndexesMetadataResponseExt; -use quickwit_metrics::counter; +use quickwit_metrics::{counter, label_names, label_values}; use quickwit_proto::metastore::{ ListIndexesMetadataRequest, MetastoreService, MetastoreServiceClient, }; @@ -191,7 +191,12 @@ impl GarbageCollector { } // Run Tantivy GC + let labels_result = label_names!("result"); + let labels_split = label_names!("split_type"); + if !tantivy_storages.is_empty() { + let labels_split = label_values!(names: labels_split, "tantivy"); + let tantivy_start = Instant::now(); let gc_res = run_garbage_collect( tantivy_storages, @@ -207,7 +212,7 @@ impl GarbageCollector { let tantivy_run_duration = tantivy_start.elapsed().as_secs(); counter!( parent: GC_SECONDS_TOTAL, - "split_type" => "tantivy", + labels: [labels_split], ) .increment(tantivy_run_duration); @@ -216,8 +221,7 @@ impl GarbageCollector { self.counters.num_successful_gc_run += 1; counter!( parent: GC_RUNS, - "result" => "success", - "split_type" => "tantivy", + labels: [labels_split, label_values!(names: labels_result, "success")], ) .increment(1); GcRunResult { @@ -240,8 +244,7 @@ impl GarbageCollector { self.counters.num_failed_gc_run += 1; counter!( parent: GC_RUNS, - "result" => "error", - "split_type" => "tantivy", + labels: [labels_split, label_values!(names: labels_result, "error")], ) .increment(1); error!(error=?error, "failed to run garbage collection"); @@ -253,6 +256,8 @@ impl GarbageCollector { // Run Parquet GC if !parquet_storages.is_empty() { + let labels_split = label_values!(names: labels_split, "parquet"); + let parquet_start = Instant::now(); let gc_res = run_parquet_garbage_collect( parquet_storages, @@ -268,7 +273,7 @@ impl GarbageCollector { let parquet_run_duration = parquet_start.elapsed().as_secs(); counter!( parent: GC_SECONDS_TOTAL, - "split_type" => "parquet", + labels: [labels_split], ) .increment(parquet_run_duration); @@ -277,8 +282,7 @@ impl GarbageCollector { self.counters.num_successful_gc_run += 1; counter!( parent: GC_RUNS, - "result" => "success", - "split_type" => "parquet", + labels: [labels_split, label_values!(names: labels_result, "success")], ) .increment(1); GcRunResult { @@ -297,8 +301,7 @@ impl GarbageCollector { self.counters.num_failed_gc_run += 1; counter!( parent: GC_RUNS, - "result" => "error", - "split_type" => "parquet", + labels: [labels_split, label_values!(names: labels_result, "error")], ) .increment(1); error!(error=?error, "failed to run parquet garbage collection"); diff --git a/quickwit/quickwit-lambda-client/src/invoker.rs b/quickwit/quickwit-lambda-client/src/invoker.rs index ea851e152b7..f2395b7e5e6 100644 --- a/quickwit/quickwit-lambda-client/src/invoker.rs +++ b/quickwit/quickwit-lambda-client/src/invoker.rs @@ -178,12 +178,12 @@ impl LambdaLeafSearchInvoker for AwsLambdaInvoker { let labels = labels!("status" => status); counter!( parent: LEAF_SEARCH_REQUESTS_TOTAL, - labels: labels, + labels: [labels], ) .increment(1); histogram!( parent: LEAF_SEARCH_DURATION_SECONDS, - labels: labels, + labels: [labels], ) .record(elapsed); result diff --git a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs index 8ba92986c73..55746585b8e 100644 --- a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs +++ b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs @@ -751,7 +751,7 @@ fn labels_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: label_values!(names: LABELS_1, "GET") + labels: [label_values!(names: LABELS_1, "GET")] ) .increment(1); }); @@ -761,7 +761,7 @@ fn labels_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: label_values!(names: LABELS_3, "GET", "/health", "200") + labels: [label_values!(names: LABELS_3, "GET", "/health", "200")] ) .increment(1); }); @@ -771,7 +771,7 @@ fn labels_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: label_values!(names: LABELS_1, "GET".to_string()) + labels: [label_values!(names: LABELS_1, "GET".to_string())] ) .increment(1); }); @@ -787,7 +787,7 @@ fn labels_counter(c: &mut Criterion) { idx += 1; counter!( parent: PARENT_COUNTER, - labels: label_values!(names: LABELS_1, m) + labels: [label_values!(names: LABELS_1, m)] ) .increment(1); }); @@ -806,7 +806,7 @@ fn labels_gauge(c: &mut Criterion) { b.iter(|| { gauge!( parent: PARENT_GAUGE, - labels: label_values!(names: LABELS_1, "GET") + labels: [label_values!(names: LABELS_1, "GET")] ) .set(42.0); }); @@ -816,7 +816,7 @@ fn labels_gauge(c: &mut Criterion) { b.iter(|| { gauge!( parent: PARENT_GAUGE, - labels: label_values!(names: LABELS_3, "GET", "/health", "200") + labels: [label_values!(names: LABELS_3, "GET", "/health", "200")] ) .set(42.0); }); @@ -835,7 +835,7 @@ fn labels_histogram(c: &mut Criterion) { b.iter(|| { histogram!( parent: PARENT_HISTOGRAM, - labels: label_values!(names: LABELS_1, "GET") + labels: [label_values!(names: LABELS_1, "GET")] ) .record(0.123); }); @@ -845,7 +845,7 @@ fn labels_histogram(c: &mut Criterion) { b.iter(|| { histogram!( parent: PARENT_HISTOGRAM, - labels: label_values!(names: LABELS_3, "GET", "/health", "200") + labels: [label_values!(names: LABELS_3, "GET", "/health", "200")] ) .record(0.123); }); @@ -873,7 +873,7 @@ fn composite_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: label_values!(names: COMP_ALL_3, "GET", "/health", "200"), + labels: [label_values!(names: COMP_ALL_3, "GET", "/health", "200")], ) .increment(1); }); @@ -883,9 +883,11 @@ fn composite_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: label_values!(names: COMP_METHOD, "GET"), - label_values!(names: COMP_ENDPOINT, "/health"), - label_values!(names: COMP_STATUS, "200"), + labels: [ + label_values!(names: COMP_METHOD, "GET"), + label_values!(names: COMP_ENDPOINT, "/health"), + label_values!(names: COMP_STATUS, "200"), + ], ) .increment(1); }); @@ -895,8 +897,10 @@ fn composite_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: label_values!(names: COMP_METHOD, "GET"), - label_values!(names: COMP_ENDPOINT, "/health"), + labels: [ + label_values!(names: COMP_METHOD, "GET"), + label_values!(names: COMP_ENDPOINT, "/health"), + ], ) .increment(1); }); @@ -915,7 +919,7 @@ fn composite_gauge(c: &mut Criterion) { b.iter(|| { gauge!( parent: PARENT_GAUGE, - labels: label_values!(names: COMP_ALL_3, "GET", "/health", "200"), + labels: [label_values!(names: COMP_ALL_3, "GET", "/health", "200")], ) .set(42.0); }); @@ -925,9 +929,11 @@ fn composite_gauge(c: &mut Criterion) { b.iter(|| { gauge!( parent: PARENT_GAUGE, - labels: label_values!(names: COMP_METHOD, "GET"), - label_values!(names: COMP_ENDPOINT, "/health"), - label_values!(names: COMP_STATUS, "200"), + labels: [ + label_values!(names: COMP_METHOD, "GET"), + label_values!(names: COMP_ENDPOINT, "/health"), + label_values!(names: COMP_STATUS, "200"), + ], ) .set(42.0); }); @@ -937,8 +943,10 @@ fn composite_gauge(c: &mut Criterion) { b.iter(|| { gauge!( parent: PARENT_GAUGE, - labels: label_values!(names: COMP_METHOD, "GET"), - label_values!(names: COMP_ENDPOINT, "/health"), + labels: [ + label_values!(names: COMP_METHOD, "GET"), + label_values!(names: COMP_ENDPOINT, "/health"), + ], ) .set(42.0); }); @@ -957,7 +965,7 @@ fn composite_histogram(c: &mut Criterion) { b.iter(|| { histogram!( parent: PARENT_HISTOGRAM, - labels: label_values!(names: COMP_ALL_3, "GET", "/health", "200"), + labels: [label_values!(names: COMP_ALL_3, "GET", "/health", "200")], ) .record(0.123); }); @@ -967,9 +975,11 @@ fn composite_histogram(c: &mut Criterion) { b.iter(|| { histogram!( parent: PARENT_HISTOGRAM, - labels: label_values!(names: COMP_METHOD, "GET"), - label_values!(names: COMP_ENDPOINT, "/health"), - label_values!(names: COMP_STATUS, "200"), + labels: [ + label_values!(names: COMP_METHOD, "GET"), + label_values!(names: COMP_ENDPOINT, "/health"), + label_values!(names: COMP_STATUS, "200"), + ], ) .record(0.123); }); @@ -979,8 +989,10 @@ fn composite_histogram(c: &mut Criterion) { b.iter(|| { histogram!( parent: PARENT_HISTOGRAM, - labels: label_values!(names: COMP_METHOD, "GET"), - label_values!(names: COMP_ENDPOINT, "/health"), + labels: [ + label_values!(names: COMP_METHOD, "GET"), + label_values!(names: COMP_ENDPOINT, "/health"), + ], ) .record(0.123); }); diff --git a/quickwit/quickwit-metrics/examples/http_service.rs b/quickwit/quickwit-metrics/examples/http_service.rs index aeeecfdad83..2056d6804d7 100644 --- a/quickwit/quickwit-metrics/examples/http_service.rs +++ b/quickwit/quickwit-metrics/examples/http_service.rs @@ -87,14 +87,14 @@ const ROUTE_LABELS: LabelNames<2> = label_names!("method", "path"); fn record_request(method: &'static str, path: &'static str, duration: f64, size: f64) { let route = label_values!(names: ROUTE_LABELS, method, path); - histogram!(parent: HTTP_REQUEST_DURATION, labels: route).record(duration); - histogram!(parent: HTTP_RESPONSE_SIZE, labels: route).record(size); - counter!(parent: HTTP_REQUESTS_TOTAL, labels: route).increment(1); + histogram!(parent: HTTP_REQUEST_DURATION, labels: [route]).record(duration); + histogram!(parent: HTTP_RESPONSE_SIZE, labels: [route]).record(size); + counter!(parent: HTTP_REQUESTS_TOTAL, labels: [route]).increment(1); } fn record_dynamic_request(method: String, path: String, duration: f64) { let route = label_values!(names: ROUTE_LABELS, method, path); - histogram!(parent: HTTP_REQUEST_DURATION, labels: route).record(duration); + histogram!(parent: HTTP_REQUEST_DURATION, labels: [route]).record(duration); } fn track_connection(region: &'static str) -> GaugeGuard { diff --git a/quickwit/quickwit-metrics/src/counter.rs b/quickwit/quickwit-metrics/src/counter.rs index 3a491ff6d26..3a3bfd4443d 100644 --- a/quickwit/quickwit-metrics/src/counter.rs +++ b/quickwit/quickwit-metrics/src/counter.rs @@ -269,7 +269,7 @@ macro_rules! counter { // __bind_labels! tt-muncher — zero allocation on the hot path. ( parent: $parent:expr, - labels: $($labels:expr),+ $(,)? + labels: [$($labels:expr),+ $(,)?] $(,)? ) => {{ $crate::__bind_labels!( metric_type: $crate::Counter, diff --git a/quickwit/quickwit-metrics/src/gauge.rs b/quickwit/quickwit-metrics/src/gauge.rs index 1db5e412560..e6bb587014f 100644 --- a/quickwit/quickwit-metrics/src/gauge.rs +++ b/quickwit/quickwit-metrics/src/gauge.rs @@ -327,7 +327,7 @@ macro_rules! gauge { // __bind_labels! tt-muncher — zero allocation on the hot path. ( parent: $parent:expr, - labels: $($labels:expr),+ $(,)? + labels: [$($labels:expr),+ $(,)?] $(,)? ) => {{ $crate::__bind_labels!( metric_type: $crate::Gauge, diff --git a/quickwit/quickwit-metrics/src/histogram.rs b/quickwit/quickwit-metrics/src/histogram.rs index 8b0a0e829c9..22da9b5af29 100644 --- a/quickwit/quickwit-metrics/src/histogram.rs +++ b/quickwit/quickwit-metrics/src/histogram.rs @@ -308,7 +308,7 @@ macro_rules! histogram { // __bind_labels! tt-muncher — zero allocation on the hot path. ( parent: $parent:expr, - labels: $($labels:expr),+ $(,)? + labels: [$($labels:expr),+ $(,)?] $(,)? ) => {{ $crate::__bind_labels!( metric_type: $crate::Histogram, diff --git a/quickwit/quickwit-metrics/src/lib.rs b/quickwit/quickwit-metrics/src/lib.rs index 02ef67614fd..f47e035cd4f 100644 --- a/quickwit/quickwit-metrics/src/lib.rs +++ b/quickwit/quickwit-metrics/src/lib.rs @@ -92,14 +92,14 @@ //! //! fn on_request(method: &'static str, path: &'static str, duration: f64) { //! let route = label_values!(names: ROUTE, method, path); -//! histogram!(parent: REQUEST_DURATION, labels: route).record(duration); -//! counter!(parent: HTTP_REQUESTS, labels: route).increment(1); +//! histogram!(parent: REQUEST_DURATION, labels: [route]).record(duration); +//! counter!(parent: HTTP_REQUESTS, labels: [route]).increment(1); //! } //! //! // Mixed types work too — Into is called per-element: //! fn on_dynamic_request(method: &'static str, path: String, duration: f64) { //! let route = label_values!(names: ROUTE, method, path); -//! histogram!(parent: REQUEST_DURATION, labels: route).record(duration); +//! histogram!(parent: REQUEST_DURATION, labels: [route]).record(duration); //! } //! ``` //! diff --git a/quickwit/quickwit-metrics/tests/counter.rs b/quickwit/quickwit-metrics/tests/counter.rs index 096b563a342..3f04da2f482 100644 --- a/quickwit/quickwit-metrics/tests/counter.rs +++ b/quickwit/quickwit-metrics/tests/counter.rs @@ -218,8 +218,7 @@ fn label_composition_two_labels() { const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); let child = counter!( parent: parent, - labels: label_values!(names: REGION, "us-east"), - label_values!(names: STATUS, "ok"), + labels: [label_values!(names: REGION, "us-east"), label_values!(names: STATUS, "ok")], ); child.increment(3); }); @@ -249,9 +248,7 @@ fn label_composition_three_labels() { ); let child = counter!( parent: parent, - labels: labels!("env" => "staging"), - labels!("region" => "eu"), - labels!("az" => "eu-1a"), + labels: [labels!("env" => "staging"), labels!("region" => "eu"), labels!("az" => "eu-1a")], ); child.increment(7); }); @@ -284,12 +281,11 @@ fn label_composition_same_hash_as_single() { let via_compose = counter!( parent: parent, - labels: label_values!(names: REGION, "us"), - label_values!(names: STATUS, "ok"), + labels: [label_values!(names: REGION, "us"), label_values!(names: STATUS, "ok")], ); let via_single = counter!( parent: parent, - labels: labels!("region" => "us", "status" => "ok"), + labels: [labels!("region" => "us", "status" => "ok")], ); via_compose.increment(1); via_single.increment(2); diff --git a/quickwit/quickwit-metrics/tests/gauge.rs b/quickwit/quickwit-metrics/tests/gauge.rs index fb203cfd7b3..ad891ac1961 100644 --- a/quickwit/quickwit-metrics/tests/gauge.rs +++ b/quickwit/quickwit-metrics/tests/gauge.rs @@ -176,8 +176,7 @@ fn label_composition_two_labels() { const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); let child = gauge!( parent: parent, - labels: label_values!(names: REGION, "us-east"), - label_values!(names: STATUS, "ok"), + labels: [label_values!(names: REGION, "us-east"), label_values!(names: STATUS, "ok")], ); child.set(42.0); }); @@ -210,12 +209,11 @@ fn label_composition_same_hash_as_single() { let via_compose = gauge!( parent: parent, - labels: label_values!(names: REGION, "us"), - label_values!(names: STATUS, "ok"), + labels: [label_values!(names: REGION, "us"), label_values!(names: STATUS, "ok")], ); let via_single = gauge!( parent: parent, - labels: labels!("region" => "us", "status" => "ok"), + labels: [labels!("region" => "us", "status" => "ok")], ); via_compose.set(5.0); via_single.increment(3.0); diff --git a/quickwit/quickwit-metrics/tests/histogram.rs b/quickwit/quickwit-metrics/tests/histogram.rs index 3f4e000f110..787a257c592 100644 --- a/quickwit/quickwit-metrics/tests/histogram.rs +++ b/quickwit/quickwit-metrics/tests/histogram.rs @@ -111,8 +111,7 @@ fn label_composition_two_labels() { const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); let child = histogram!( parent: parent, - labels: label_values!(names: REGION, "us-east"), - label_values!(names: STATUS, "ok"), + labels: [label_values!(names: REGION, "us-east"), label_values!(names: STATUS, "ok")], ); child.record(2.5); }); @@ -149,9 +148,7 @@ fn label_composition_three_labels() { ); let child = histogram!( parent: parent, - labels: labels!("env" => "staging"), - labels!("region" => "eu"), - labels!("az" => "eu-1a"), + labels: [labels!("env" => "staging"), labels!("region" => "eu"), labels!("az" => "eu-1a")], ); child.record(0.1); }); diff --git a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs index e59d13e7ec0..bce58777720 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs @@ -246,10 +246,10 @@ impl OtlpGrpcLogsService { let labels = label_values!(names: OTLP_GRPC_LABELS, "logs", index_id, "grpc", "protobuf"); counter!( parent: INGESTED_LOG_RECORDS_TOTAL, - labels: labels, + labels: [labels], ) .increment(num_log_records); - counter!(parent: INGESTED_BYTES_TOTAL, labels: labels).increment(num_bytes); + counter!(parent: INGESTED_BYTES_TOTAL, labels: [labels]).increment(num_bytes); let response = ExportLogsServiceResponse { // `rejected_log_records=0` and `error_message=""` is consided a "full" success. @@ -326,7 +326,7 @@ impl OtlpGrpcLogsService { ); counter!( parent: REQUESTS_TOTAL, - labels: labels, + labels: [labels], ) .increment(1); let (export_res, is_error) = match self.export_inner(request, index_id.clone()).await { @@ -334,7 +334,7 @@ impl OtlpGrpcLogsService { err @ Err(_) => { counter!( parent: REQUEST_ERRORS_TOTAL, - labels: labels, + labels: [labels], ) .increment(1); (err, "true") @@ -343,10 +343,10 @@ impl OtlpGrpcLogsService { let elapsed = start.elapsed().as_secs_f64(); histogram!( parent: REQUEST_DURATION_SECONDS, - labels: label_values!( + labels: [label_values!( names: OTLP_GRPC_ERROR_LABELS, "logs", index_id, "grpc", "protobuf", is_error - ), + )], ) .record(elapsed); diff --git a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs index c272011da01..d83425b6492 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs @@ -243,10 +243,10 @@ impl OtlpGrpcMetricsService { label_values!(names: OTLP_GRPC_LABELS, "metrics", index_id, "grpc", "protobuf"); counter!( parent: INGESTED_DATA_POINTS_TOTAL, - labels: labels, + labels: [labels], ) .increment(num_data_points - num_parse_errors); - counter!(parent: INGESTED_BYTES_TOTAL, labels: labels).increment(num_bytes); + counter!(parent: INGESTED_BYTES_TOTAL, labels: [labels]).increment(num_bytes); let response = ExportMetricsServiceResponse { partial_success: Some(ExportMetricsPartialSuccess { @@ -341,7 +341,7 @@ impl OtlpGrpcMetricsService { ); counter!( parent: REQUESTS_TOTAL, - labels: labels, + labels: [labels], ) .increment(1); @@ -350,7 +350,7 @@ impl OtlpGrpcMetricsService { err @ Err(_) => { counter!( parent: REQUEST_ERRORS_TOTAL, - labels: labels, + labels: [labels], ) .increment(1); (err, "true") @@ -360,10 +360,10 @@ impl OtlpGrpcMetricsService { let elapsed = start.elapsed().as_secs_f64(); histogram!( parent: REQUEST_DURATION_SECONDS, - labels: label_values!( + labels: [label_values!( names: OTLP_GRPC_ERROR_LABELS, "metrics", index_id, "grpc", "protobuf", is_error - ), + )], ) .record(elapsed); diff --git a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs index 01309105538..7b5c2214482 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs @@ -708,11 +708,11 @@ impl OtlpGrpcTracesService { let labels = label_values!(names: OTLP_GRPC_LABELS, "trace", index_id, "grpc", "protobuf"); counter!( parent: INGESTED_SPANS_TOTAL, - labels: labels, + labels: [labels], ) .increment(num_spans); counter!(parent: INGESTED_BYTES_TOTAL, - labels: labels + labels: [labels], ) .increment(num_bytes); @@ -791,7 +791,7 @@ impl OtlpGrpcTracesService { ); counter!( parent: REQUESTS_TOTAL, - labels: labels, + labels: [labels], ) .increment(1); let (export_res, is_error) = match self.export_inner(request, index_id.clone()).await { @@ -799,7 +799,7 @@ impl OtlpGrpcTracesService { err @ Err(_) => { counter!( parent: REQUEST_ERRORS_TOTAL, - labels: labels, + labels: [labels], ) .increment(1); (err, "true") @@ -808,10 +808,10 @@ impl OtlpGrpcTracesService { let elapsed = start.elapsed().as_secs_f64(); histogram!( parent: REQUEST_DURATION_SECONDS, - labels: label_values!( + labels: [label_values!( names: OTLP_GRPC_ERROR_LABELS, "trace", index_id, "grpc", "protobuf", is_error - ), + )], ) .record(elapsed); diff --git a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs index 316d8be1dc6..9bcc94b8955 100644 --- a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs +++ b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs @@ -18,7 +18,7 @@ use std::io::Cursor; use arrow::ipc::reader::StreamReader; use arrow::record_batch::RecordBatch; -use quickwit_metrics::counter; +use quickwit_metrics::{counter, labels}; use tracing::{debug, instrument, warn}; use crate::metrics::{ERRORS_TOTAL, INGEST_BYTES_TOTAL}; @@ -63,10 +63,12 @@ impl ParquetIngestProcessor { /// Returns error if IPC is malformed or schema doesn't match. #[instrument(skip(self, ipc_bytes), fields(bytes_len = ipc_bytes.len()))] pub fn process_ipc(&self, ipc_bytes: &[u8]) -> Result { + let labels_kind = labels!("kind" => "points"); + let labels_operation = labels!("operation" => "ingest"); // Record bytes ingested counter!( parent: INGEST_BYTES_TOTAL, - "kind" => "points", + labels: [labels_kind], ) .increment(ipc_bytes.len() as u64); @@ -75,8 +77,7 @@ impl ParquetIngestProcessor { Err(e) => { counter!( parent: ERRORS_TOTAL, - "operation" => "ingest", - "kind" => "points", + labels: [labels_kind, labels_operation], ) .increment(1); return Err(e); @@ -86,8 +87,7 @@ impl ParquetIngestProcessor { if let Err(e) = self.validate_schema(&batch) { counter!( parent: ERRORS_TOTAL, - "operation" => "ingest", - "kind" => "points", + labels: [labels_kind, labels_operation], ) .increment(1); return Err(e); diff --git a/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs b/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs index 9a45a7c2d6a..874d2f6f1d0 100644 --- a/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs +++ b/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs @@ -16,7 +16,7 @@ use arrow::array::AsArray; use arrow::record_batch::RecordBatch; -use quickwit_metrics::counter; +use quickwit_metrics::{counter, labels}; use tracing::{debug, instrument, warn}; use super::processor::IngestError; @@ -42,42 +42,30 @@ impl SketchParquetIngestProcessor { /// sketch arrays are inconsistent. #[instrument(skip(self, ipc_bytes), fields(bytes_len = ipc_bytes.len()))] pub fn process_ipc(&self, ipc_bytes: &[u8]) -> Result { - counter!( - parent: INGEST_BYTES_TOTAL, - "kind" => "sketches", - ) - .increment(ipc_bytes.len() as u64); + let labels_kind = labels!("kind" => "sketches"); + let labels_operation = labels!("operation" => "ingest"); + + counter!(parent: INGEST_BYTES_TOTAL, labels: [labels_kind]) + .increment(ipc_bytes.len() as u64); let batch = match super::processor::ipc_to_record_batch(ipc_bytes) { Ok(batch) => batch, Err(err) => { - counter!( - parent: ERRORS_TOTAL, - "operation" => "ingest", - "kind" => "sketches", - ) - .increment(1); + counter!(parent: ERRORS_TOTAL, labels: [labels_kind, labels_operation]) + .increment(1); return Err(err); } }; if let Err(err) = self.validate_schema(&batch) { - counter!( - parent: ERRORS_TOTAL, - "operation" => "ingest", - "kind" => "sketches", - ) - .increment(1); + counter!(parent: ERRORS_TOTAL, labels: [labels_kind, labels_operation]) + .increment(1); return Err(err); } if let Err(err) = self.validate_sketch_arrays(&batch) { - counter!( - parent: ERRORS_TOTAL, - "operation" => "ingest", - "kind" => "sketches", - ) - .increment(1); + counter!(parent: ERRORS_TOTAL, labels: [labels_kind, labels_operation]) + .increment(1); return Err(err); } diff --git a/quickwit/quickwit-search/src/metrics_trackers.rs b/quickwit/quickwit-search/src/metrics_trackers.rs index a431d781979..a46b33dd90c 100644 --- a/quickwit/quickwit-search/src/metrics_trackers.rs +++ b/quickwit/quickwit-search/src/metrics_trackers.rs @@ -77,17 +77,17 @@ impl PinnedDrop for RootSearchMetricsFuture { let labels = label_values!(names: STATUS_LABELS, status); counter!( parent: ROOT_SEARCH_REQUESTS_TOTAL, - labels: labels, + labels: [labels], ) .increment(1); histogram!( parent: ROOT_SEARCH_REQUEST_DURATION_SECONDS, - labels: labels, + labels: [labels], ) .record(self.start.elapsed().as_secs_f64()); histogram!( parent: ROOT_SEARCH_TARGETED_SPLITS, - labels: labels, + labels: [labels], ) .record(num_targeted_splits as f64); } @@ -129,17 +129,17 @@ where F: Future> let labels = label_values!(names: STATUS_LABELS, status); counter!( parent: LEAF_SEARCH_REQUESTS_TOTAL, - labels: labels, + labels: [labels], ) .increment(1); histogram!( parent: LEAF_SEARCH_REQUEST_DURATION_SECONDS, - labels: labels, + labels: [labels], ) .record(self.start.elapsed().as_secs_f64()); histogram!( parent: LEAF_SEARCH_TARGETED_SPLITS, - labels: labels, + labels: [labels], ) .record(self.targeted_splits as f64); } From ebb759118b7bffbc5a912fffaee8acce6dcd02eb Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 14:51:17 +0200 Subject: [PATCH 42/54] Change label_values! syntax from `names:` keyword to `=>` operator label_values!(ROUTE => method, path) instead of the previous: label_values!(names: ROUTE, method, path) The => visually connects the LabelNames template to its values, aligning with the existing "key" => value pattern in inline labels. Co-authored-by: Cursor --- .../src/model/shard_table.rs | 4 +- .../src/actors/indexing_pipeline.rs | 10 ++-- .../src/actors/merge_pipeline.rs | 6 +-- .../metrics_pipeline/parquet_uploader.rs | 2 +- .../quickwit-indexing/src/actors/uploader.rs | 6 +-- .../quickwit-ingest/src/ingest_api_service.rs | 2 +- .../quickwit-ingest/src/ingest_v2/ingester.rs | 18 +++---- quickwit/quickwit-jaeger/src/lib.rs | 10 ++-- quickwit/quickwit-jaeger/src/v1.rs | 4 +- quickwit/quickwit-jaeger/src/v2.rs | 8 +-- .../src/actors/garbage_collector.rs | 12 ++--- .../benches/quickwit_metrics.rs | 52 +++++++++---------- .../quickwit-metrics/examples/http_service.rs | 4 +- quickwit/quickwit-metrics/src/labels.rs | 18 +++---- quickwit/quickwit-metrics/src/lib.rs | 4 +- quickwit/quickwit-metrics/tests/counter.rs | 4 +- quickwit/quickwit-metrics/tests/gauge.rs | 4 +- quickwit/quickwit-metrics/tests/histogram.rs | 2 +- .../quickwit-opentelemetry/src/otlp/logs.rs | 6 +-- .../src/otlp/otel_metrics.rs | 6 +-- .../quickwit-opentelemetry/src/otlp/traces.rs | 6 +-- .../src/ingest/sketch_processor.rs | 6 +-- .../quickwit-search/src/metrics_trackers.rs | 4 +- 23 files changed, 98 insertions(+), 100 deletions(-) diff --git a/quickwit/quickwit-control-plane/src/model/shard_table.rs b/quickwit/quickwit-control-plane/src/model/shard_table.rs index bfb3c12440c..10af4845b80 100644 --- a/quickwit/quickwit-control-plane/src/model/shard_table.rs +++ b/quickwit/quickwit-control-plane/src/model/shard_table.rs @@ -464,7 +464,7 @@ impl ShardTable { // can update the metrics for this specific index. if index_label == index_id { let shard_stats = table_entry.shards_stats(); - let labels = label_values!(names: INDEX_ID_LABELS, index_label.to_string()); + let labels = label_values!(INDEX_ID_LABELS => index_label.to_string()); gauge!( parent: OPEN_SHARDS, labels: [labels], @@ -488,7 +488,7 @@ impl ShardTable { num_closed_shards += 1; } } - let labels = label_values!(names: INDEX_ID_LABELS, index_label.to_string()); + let labels = label_values!(INDEX_ID_LABELS => index_label.to_string()); gauge!( parent: OPEN_SHARDS, labels: [labels], diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index d6f84d83d2f..46143f00a82 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -315,7 +315,7 @@ impl IndexingPipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: [label_values!(names: ACTOR_NAME, "publisher")], + labels: [label_values!(ACTOR_NAME => "publisher")], )) .spawn(publisher); @@ -324,7 +324,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: [label_values!(names: ACTOR_NAME, "sequencer")], + labels: [label_values!(ACTOR_NAME => "sequencer")], )) .set_kill_switch(self.kill_switch.clone()) .spawn(sequencer); @@ -344,7 +344,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: [label_values!(names: ACTOR_NAME, "uploader")], + labels: [label_values!(ACTOR_NAME => "uploader")], )) .set_kill_switch(self.kill_switch.clone()) .spawn(uploader); @@ -378,7 +378,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: [label_values!(names: ACTOR_NAME, "indexer")], + labels: [label_values!(ACTOR_NAME => "indexer")], )) .set_kill_switch(self.kill_switch.clone()) .spawn(indexer); @@ -395,7 +395,7 @@ impl IndexingPipeline { .spawn_actor() .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: [label_values!(names: ACTOR_NAME, "doc_processor")], + labels: [label_values!(ACTOR_NAME => "doc_processor")], )) .set_kill_switch(self.kill_switch.clone()) .spawn(doc_processor); diff --git a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs index b59d8e5d642..7b57a5af751 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs @@ -276,7 +276,7 @@ impl MergePipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: [label_values!(names: ACTOR_NAME, "merge_publisher")], + labels: [label_values!(ACTOR_NAME => "merge_publisher")], )) .spawn(merge_publisher); @@ -325,7 +325,7 @@ impl MergePipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: [label_values!(names: ACTOR_NAME, "merge_executor")], + labels: [label_values!(ACTOR_NAME => "merge_executor")], )) .spawn(merge_executor); @@ -340,7 +340,7 @@ impl MergePipeline { .set_kill_switch(self.kill_switch.clone()) .set_backpressure_micros_counter(counter!( parent: BACKPRESSURE_MICROS, - labels: [label_values!(names: ACTOR_NAME, "merge_split_downloader")], + labels: [label_values!(ACTOR_NAME => "merge_split_downloader")], )) .spawn(merge_split_downloader); diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs index fc01c39e29c..c3c4cd31a87 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs @@ -125,7 +125,7 @@ impl ParquetUploader { .get_or_init(|| Semaphore::const_new(self.max_concurrent_uploads)); let gauge = gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: [label_values!(names: COMPONENT, "metrics")], + labels: [label_values!(COMPONENT => "metrics")], ); gauge.set(concurrent_upload_permits.available_permits() as f64); concurrent_upload_permits diff --git a/quickwit/quickwit-indexing/src/actors/uploader.rs b/quickwit/quickwit-indexing/src/actors/uploader.rs index 4cc3a8f3399..7dbf185283b 100644 --- a/quickwit/quickwit-indexing/src/actors/uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/uploader.rs @@ -206,21 +206,21 @@ impl Uploader { &CONCURRENT_UPLOAD_PERMITS_INDEX, gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: [label_values!(names: COMPONENT, "indexer")], + labels: [label_values!(COMPONENT => "indexer")], ), ), UploaderType::MergeUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: [label_values!(names: COMPONENT, "merger")], + labels: [label_values!(COMPONENT => "merger")], ), ), UploaderType::DeleteUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, gauge!( parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: [label_values!(names: COMPONENT, "merger")], + labels: [label_values!(COMPONENT => "merger")], ), ), }; diff --git a/quickwit/quickwit-ingest/src/ingest_api_service.rs b/quickwit/quickwit-ingest/src/ingest_api_service.rs index 5123e958c43..8c4141bb4aa 100644 --- a/quickwit/quickwit-ingest/src/ingest_api_service.rs +++ b/quickwit/quickwit-ingest/src/ingest_api_service.rs @@ -202,7 +202,7 @@ impl IngestApiService { } num_docs += batch_num_docs; - let labels = label_values!(names: VALIDITY, "valid"); + let labels = label_values!(VALIDITY => "valid"); counter!( parent: DOCS_BYTES_TOTAL, labels: [labels], diff --git a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs index b6fd8662a0a..57a03a2ac6c 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs @@ -335,7 +335,7 @@ impl Ingester { ); counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, - labels: [label_values!(names: STATUS, "success")], + labels: [label_values!(STATUS => "success")], ) .increment(1); @@ -347,7 +347,7 @@ impl Ingester { counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, - labels: [label_values!(names: STATUS, "error")], + labels: [label_values!(STATUS => "error")], ) .increment(1); } @@ -356,7 +356,7 @@ impl Ingester { counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, - labels: [label_values!(names: STATUS, "timeout")], + labels: [label_values!(STATUS => "timeout")], ) .increment(1); } @@ -573,12 +573,12 @@ impl Ingester { if valid_doc_batch.is_empty() { counter!( parent: DOCS_TOTAL, - labels: [label_values!(names: VALIDITY, "invalid")], + labels: [label_values!(VALIDITY => "invalid")], ) .increment(parse_failures.len() as u64); counter!( parent: DOCS_BYTES_TOTAL, - labels: [label_values!(names: VALIDITY, "invalid")], + labels: [label_values!(VALIDITY => "invalid")], ) .increment(original_batch_num_bytes); let persist_success = PersistSuccess { @@ -596,23 +596,23 @@ impl Ingester { counter!( parent: DOCS_TOTAL, - labels: [label_values!(names: VALIDITY, "valid")], + labels: [label_values!(VALIDITY => "valid")], ) .increment(valid_doc_batch.num_docs() as u64); counter!( parent: DOCS_BYTES_TOTAL, - labels: [label_values!(names: VALIDITY, "valid")], + labels: [label_values!(VALIDITY => "valid")], ) .increment(valid_doc_batch.num_bytes() as u64); if !parse_failures.is_empty() { counter!( parent: DOCS_TOTAL, - labels: [label_values!(names: VALIDITY, "invalid")], + labels: [label_values!(VALIDITY => "invalid")], ) .increment(parse_failures.len() as u64); counter!( parent: DOCS_BYTES_TOTAL, - labels: [label_values!(names: VALIDITY, "invalid")], + labels: [label_values!(VALIDITY => "invalid")], ) .increment(original_batch_num_bytes - valid_doc_batch.num_bytes() as u64); } diff --git a/quickwit/quickwit-jaeger/src/lib.rs b/quickwit/quickwit-jaeger/src/lib.rs index bd2511f52ae..fd271585d8e 100644 --- a/quickwit/quickwit-jaeger/src/lib.rs +++ b/quickwit/quickwit-jaeger/src/lib.rs @@ -423,7 +423,7 @@ impl JaegerService { counter!( parent: FETCHED_TRACES_TOTAL, labels: [label_values!( - names: OPERATION_INDEX_LABELS, + OPERATION_INDEX_LABELS => operation_name, OTEL_TRACES_INDEX_ID )], ) @@ -433,7 +433,7 @@ impl JaegerService { histogram!( parent: REQUEST_DURATION_SECONDS, labels: [label_values!( - names: OPERATION_INDEX_ERROR_LABELS, + OPERATION_INDEX_ERROR_LABELS => operation_name, OTEL_TRACES_INDEX_ID, "false" )], ) @@ -447,7 +447,7 @@ pub(crate) fn record_error(operation_name: &'static str, request_start: Instant) counter!( parent: REQUEST_ERRORS_TOTAL, labels: [label_values!( - names: OPERATION_INDEX_LABELS, + OPERATION_INDEX_LABELS => operation_name, OTEL_TRACES_INDEX_ID )], ) @@ -457,7 +457,7 @@ pub(crate) fn record_error(operation_name: &'static str, request_start: Instant) histogram!( parent: REQUEST_DURATION_SECONDS, labels: [label_values!( - names: OPERATION_INDEX_ERROR_LABELS, + OPERATION_INDEX_ERROR_LABELS => operation_name, OTEL_TRACES_INDEX_ID, "true" )], ) @@ -466,7 +466,7 @@ pub(crate) fn record_error(operation_name: &'static str, request_start: Instant) pub(crate) fn record_send(operation_name: &'static str, num_spans: usize, num_bytes: usize) { let labels = label_values!( - names: OPERATION_INDEX_LABELS, + OPERATION_INDEX_LABELS => operation_name, OTEL_TRACES_INDEX_ID ); counter!(parent: FETCHED_SPANS_TOTAL, labels: [labels]).increment(num_spans as u64); diff --git a/quickwit/quickwit-jaeger/src/v1.rs b/quickwit/quickwit-jaeger/src/v1.rs index c5040a3919d..74ae3d4e0f4 100644 --- a/quickwit/quickwit-jaeger/src/v1.rs +++ b/quickwit/quickwit-jaeger/src/v1.rs @@ -39,7 +39,7 @@ macro_rules! metrics { let start = std::time::Instant::now(); let operation = stringify!($operation); let index = $index; - let labels = label_values!(names: OPERATION_INDEX_LABELS, operation, index); + let labels = label_values!(OPERATION_INDEX_LABELS => operation, index); counter!( parent: REQUESTS_TOTAL, labels: [labels], @@ -61,7 +61,7 @@ macro_rules! metrics { let elapsed = start.elapsed().as_secs_f64(); histogram!( parent: REQUEST_DURATION_SECONDS, - labels: [label_values!(names: OPERATION_INDEX_ERROR_LABELS, operation, index, is_error)], + labels: [label_values!(OPERATION_INDEX_ERROR_LABELS => operation, index, is_error)], ) .record(elapsed); diff --git a/quickwit/quickwit-jaeger/src/v2.rs b/quickwit/quickwit-jaeger/src/v2.rs index 4a0984398dc..c71da2725c8 100644 --- a/quickwit/quickwit-jaeger/src/v2.rs +++ b/quickwit/quickwit-jaeger/src/v2.rs @@ -65,7 +65,7 @@ macro_rules! metrics { let start = std::time::Instant::now(); let operation = stringify!($operation); let index = $index; - let labels = label_values!(names: OPERATION_INDEX_LABELS, operation, index); + let labels = label_values!(OPERATION_INDEX_LABELS => operation, index); counter!( parent: REQUESTS_TOTAL, labels: [labels], @@ -87,7 +87,7 @@ macro_rules! metrics { let elapsed = start.elapsed().as_secs_f64(); histogram!( parent: REQUEST_DURATION_SECONDS, - labels: [label_values!(names: OPERATION_INDEX_ERROR_LABELS, operation, index, is_error)], + labels: [label_values!(OPERATION_INDEX_ERROR_LABELS => operation, index, is_error)], ) .record(elapsed); @@ -446,7 +446,7 @@ async fn stream_otel_spans_impl( counter!( parent: FETCHED_TRACES_TOTAL, labels: [label_values!( - names: OPERATION_INDEX_LABELS, + OPERATION_INDEX_LABELS => operation_name, OTEL_TRACES_INDEX_ID )], ) @@ -456,7 +456,7 @@ async fn stream_otel_spans_impl( histogram!( parent: REQUEST_DURATION_SECONDS, labels: [label_values!( - names: OPERATION_INDEX_ERROR_LABELS, + OPERATION_INDEX_ERROR_LABELS => operation_name, OTEL_TRACES_INDEX_ID, "false" )], ) diff --git a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs index 1a05e447ca1..660f38b3a54 100644 --- a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs +++ b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs @@ -195,7 +195,7 @@ impl GarbageCollector { let labels_split = label_names!("split_type"); if !tantivy_storages.is_empty() { - let labels_split = label_values!(names: labels_split, "tantivy"); + let labels_split = label_values!(labels_split => "tantivy"); let tantivy_start = Instant::now(); let gc_res = run_garbage_collect( @@ -221,7 +221,7 @@ impl GarbageCollector { self.counters.num_successful_gc_run += 1; counter!( parent: GC_RUNS, - labels: [labels_split, label_values!(names: labels_result, "success")], + labels: [labels_split, label_values!(labels_result => "success")], ) .increment(1); GcRunResult { @@ -244,7 +244,7 @@ impl GarbageCollector { self.counters.num_failed_gc_run += 1; counter!( parent: GC_RUNS, - labels: [labels_split, label_values!(names: labels_result, "error")], + labels: [labels_split, label_values!(labels_result => "error")], ) .increment(1); error!(error=?error, "failed to run garbage collection"); @@ -256,7 +256,7 @@ impl GarbageCollector { // Run Parquet GC if !parquet_storages.is_empty() { - let labels_split = label_values!(names: labels_split, "parquet"); + let labels_split = label_values!(labels_split => "parquet"); let parquet_start = Instant::now(); let gc_res = run_parquet_garbage_collect( @@ -282,7 +282,7 @@ impl GarbageCollector { self.counters.num_successful_gc_run += 1; counter!( parent: GC_RUNS, - labels: [labels_split, label_values!(names: labels_result, "success")], + labels: [labels_split, label_values!(labels_result => "success")], ) .increment(1); GcRunResult { @@ -301,7 +301,7 @@ impl GarbageCollector { self.counters.num_failed_gc_run += 1; counter!( parent: GC_RUNS, - labels: [labels_split, label_values!(names: labels_result, "error")], + labels: [labels_split, label_values!(labels_result => "error")], ) .increment(1); error!(error=?error, "failed to run parquet garbage collection"); diff --git a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs index 55746585b8e..ea089f12bf4 100644 --- a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs +++ b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs @@ -751,7 +751,7 @@ fn labels_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: [label_values!(names: LABELS_1, "GET")] + labels: [label_values!(LABELS_1 => "GET")] ) .increment(1); }); @@ -761,7 +761,7 @@ fn labels_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: [label_values!(names: LABELS_3, "GET", "/health", "200")] + labels: [label_values!(LABELS_3 => "GET", "/health", "200")] ) .increment(1); }); @@ -771,7 +771,7 @@ fn labels_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: [label_values!(names: LABELS_1, "GET".to_string())] + labels: [label_values!(LABELS_1 => "GET".to_string())] ) .increment(1); }); @@ -787,7 +787,7 @@ fn labels_counter(c: &mut Criterion) { idx += 1; counter!( parent: PARENT_COUNTER, - labels: [label_values!(names: LABELS_1, m)] + labels: [label_values!(LABELS_1 => m)] ) .increment(1); }); @@ -806,7 +806,7 @@ fn labels_gauge(c: &mut Criterion) { b.iter(|| { gauge!( parent: PARENT_GAUGE, - labels: [label_values!(names: LABELS_1, "GET")] + labels: [label_values!(LABELS_1 => "GET")] ) .set(42.0); }); @@ -816,7 +816,7 @@ fn labels_gauge(c: &mut Criterion) { b.iter(|| { gauge!( parent: PARENT_GAUGE, - labels: [label_values!(names: LABELS_3, "GET", "/health", "200")] + labels: [label_values!(LABELS_3 => "GET", "/health", "200")] ) .set(42.0); }); @@ -835,7 +835,7 @@ fn labels_histogram(c: &mut Criterion) { b.iter(|| { histogram!( parent: PARENT_HISTOGRAM, - labels: [label_values!(names: LABELS_1, "GET")] + labels: [label_values!(LABELS_1 => "GET")] ) .record(0.123); }); @@ -845,7 +845,7 @@ fn labels_histogram(c: &mut Criterion) { b.iter(|| { histogram!( parent: PARENT_HISTOGRAM, - labels: [label_values!(names: LABELS_3, "GET", "/health", "200")] + labels: [label_values!(LABELS_3 => "GET", "/health", "200")] ) .record(0.123); }); @@ -873,7 +873,7 @@ fn composite_counter(c: &mut Criterion) { b.iter(|| { counter!( parent: PARENT_COUNTER, - labels: [label_values!(names: COMP_ALL_3, "GET", "/health", "200")], + labels: [label_values!(COMP_ALL_3 => "GET", "/health", "200")], ) .increment(1); }); @@ -884,9 +884,9 @@ fn composite_counter(c: &mut Criterion) { counter!( parent: PARENT_COUNTER, labels: [ - label_values!(names: COMP_METHOD, "GET"), - label_values!(names: COMP_ENDPOINT, "/health"), - label_values!(names: COMP_STATUS, "200"), + label_values!(COMP_METHOD => "GET"), + label_values!(COMP_ENDPOINT => "/health"), + label_values!(COMP_STATUS => "200"), ], ) .increment(1); @@ -898,8 +898,8 @@ fn composite_counter(c: &mut Criterion) { counter!( parent: PARENT_COUNTER, labels: [ - label_values!(names: COMP_METHOD, "GET"), - label_values!(names: COMP_ENDPOINT, "/health"), + label_values!(COMP_METHOD => "GET"), + label_values!(COMP_ENDPOINT => "/health"), ], ) .increment(1); @@ -919,7 +919,7 @@ fn composite_gauge(c: &mut Criterion) { b.iter(|| { gauge!( parent: PARENT_GAUGE, - labels: [label_values!(names: COMP_ALL_3, "GET", "/health", "200")], + labels: [label_values!(COMP_ALL_3 => "GET", "/health", "200")], ) .set(42.0); }); @@ -930,9 +930,9 @@ fn composite_gauge(c: &mut Criterion) { gauge!( parent: PARENT_GAUGE, labels: [ - label_values!(names: COMP_METHOD, "GET"), - label_values!(names: COMP_ENDPOINT, "/health"), - label_values!(names: COMP_STATUS, "200"), + label_values!(COMP_METHOD => "GET"), + label_values!(COMP_ENDPOINT => "/health"), + label_values!(COMP_STATUS => "200"), ], ) .set(42.0); @@ -944,8 +944,8 @@ fn composite_gauge(c: &mut Criterion) { gauge!( parent: PARENT_GAUGE, labels: [ - label_values!(names: COMP_METHOD, "GET"), - label_values!(names: COMP_ENDPOINT, "/health"), + label_values!(COMP_METHOD => "GET"), + label_values!(COMP_ENDPOINT => "/health"), ], ) .set(42.0); @@ -965,7 +965,7 @@ fn composite_histogram(c: &mut Criterion) { b.iter(|| { histogram!( parent: PARENT_HISTOGRAM, - labels: [label_values!(names: COMP_ALL_3, "GET", "/health", "200")], + labels: [label_values!(COMP_ALL_3 => "GET", "/health", "200")], ) .record(0.123); }); @@ -976,9 +976,9 @@ fn composite_histogram(c: &mut Criterion) { histogram!( parent: PARENT_HISTOGRAM, labels: [ - label_values!(names: COMP_METHOD, "GET"), - label_values!(names: COMP_ENDPOINT, "/health"), - label_values!(names: COMP_STATUS, "200"), + label_values!(COMP_METHOD => "GET"), + label_values!(COMP_ENDPOINT => "/health"), + label_values!(COMP_STATUS => "200"), ], ) .record(0.123); @@ -990,8 +990,8 @@ fn composite_histogram(c: &mut Criterion) { histogram!( parent: PARENT_HISTOGRAM, labels: [ - label_values!(names: COMP_METHOD, "GET"), - label_values!(names: COMP_ENDPOINT, "/health"), + label_values!(COMP_METHOD => "GET"), + label_values!(COMP_ENDPOINT => "/health"), ], ) .record(0.123); diff --git a/quickwit/quickwit-metrics/examples/http_service.rs b/quickwit/quickwit-metrics/examples/http_service.rs index 2056d6804d7..d52b7072605 100644 --- a/quickwit/quickwit-metrics/examples/http_service.rs +++ b/quickwit/quickwit-metrics/examples/http_service.rs @@ -86,14 +86,14 @@ static HTTP_ACTIVE_CONNECTIONS_BY_REGION: LazyLock = LazyLock::new(|| { const ROUTE_LABELS: LabelNames<2> = label_names!("method", "path"); fn record_request(method: &'static str, path: &'static str, duration: f64, size: f64) { - let route = label_values!(names: ROUTE_LABELS, method, path); + let route = label_values!(ROUTE_LABELS => method, path); histogram!(parent: HTTP_REQUEST_DURATION, labels: [route]).record(duration); histogram!(parent: HTTP_RESPONSE_SIZE, labels: [route]).record(size); counter!(parent: HTTP_REQUESTS_TOTAL, labels: [route]).increment(1); } fn record_dynamic_request(method: String, path: String, duration: f64) { - let route = label_values!(names: ROUTE_LABELS, method, path); + let route = label_values!(ROUTE_LABELS => method, path); histogram!(parent: HTTP_REQUEST_DURATION, labels: [route]).record(duration); } diff --git a/quickwit/quickwit-metrics/src/labels.rs b/quickwit/quickwit-metrics/src/labels.rs index b52004af8d9..1018a79fa2a 100644 --- a/quickwit/quickwit-metrics/src/labels.rs +++ b/quickwit/quickwit-metrics/src/labels.rs @@ -53,18 +53,18 @@ macro_rules! label_names { /// const GC_KEYS: LabelNames<2> = label_names!("status", "split_type"); /// /// // All-static — zero allocation: -/// let lv = label_values!(names: GC_KEYS, "success", "tantivy"); +/// let lv = label_values!(GC_KEYS => "success", "tantivy"); /// /// // Mixed types — &'static str and String — just work: -/// let lv = label_values!(names: GC_KEYS, "success", split_type.to_string()); +/// let lv = label_values!(GC_KEYS => "success", split_type.to_string()); /// /// // Reuse the same Labels across multiple metrics: -/// counter!(parent: GC_COUNTER, labels: lv).increment(1); -/// gauge!(parent: GC_GAUGE, labels: lv).set(42.0); +/// counter!(parent: GC_COUNTER, labels: [lv]).increment(1); +/// gauge!(parent: GC_GAUGE, labels: [lv]).set(42.0); /// ``` #[macro_export] macro_rules! label_values { - (names: $labels:expr, $($val:expr),+ $(,)?) => { + ($labels:expr => $($val:expr),+ $(,)?) => { $labels.__with_values([$(Into::<$crate::__metrics::SharedString>::into($val)),+]) }; } @@ -102,14 +102,14 @@ macro_rules! labels { /// const SPLIT_KEYS: LabelNames<2> = label_names!("source", "level"); /// /// // All the same type: -/// let lv = label_values!(names: SPLIT_KEYS, "prod", "info"); +/// let lv = label_values!(SPLIT_KEYS => "prod", "info"); /// /// // Mixed types: -/// let lv = label_values!(names: SPLIT_KEYS, source_uid, level.to_string()); +/// let lv = label_values!(SPLIT_KEYS => source_uid, level.to_string()); /// /// // Reuse the same Labels across metrics: -/// let c = counter!(parent: BASE_COUNTER, labels: lv); -/// let g = gauge!(parent: BASE_GAUGE, labels: lv); +/// let c = counter!(parent: BASE_COUNTER, labels: [lv]); +/// let g = gauge!(parent: BASE_GAUGE, labels: [lv]); /// ``` pub struct LabelNames { names: [&'static str; N], diff --git a/quickwit/quickwit-metrics/src/lib.rs b/quickwit/quickwit-metrics/src/lib.rs index f47e035cd4f..08bb1568ad8 100644 --- a/quickwit/quickwit-metrics/src/lib.rs +++ b/quickwit/quickwit-metrics/src/lib.rs @@ -91,14 +91,14 @@ //! const ROUTE: LabelNames<2> = label_names!("method", "path"); //! //! fn on_request(method: &'static str, path: &'static str, duration: f64) { -//! let route = label_values!(names: ROUTE, method, path); +//! let route = label_values!(ROUTE => method, path); //! histogram!(parent: REQUEST_DURATION, labels: [route]).record(duration); //! counter!(parent: HTTP_REQUESTS, labels: [route]).increment(1); //! } //! //! // Mixed types work too — Into is called per-element: //! fn on_dynamic_request(method: &'static str, path: String, duration: f64) { -//! let route = label_values!(names: ROUTE, method, path); +//! let route = label_values!(ROUTE => method, path); //! histogram!(parent: REQUEST_DURATION, labels: [route]).record(duration); //! } //! ``` diff --git a/quickwit/quickwit-metrics/tests/counter.rs b/quickwit/quickwit-metrics/tests/counter.rs index 3f04da2f482..3974b820ee4 100644 --- a/quickwit/quickwit-metrics/tests/counter.rs +++ b/quickwit/quickwit-metrics/tests/counter.rs @@ -218,7 +218,7 @@ fn label_composition_two_labels() { const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); let child = counter!( parent: parent, - labels: [label_values!(names: REGION, "us-east"), label_values!(names: STATUS, "ok")], + labels: [label_values!(REGION => "us-east"), label_values!(STATUS => "ok")], ); child.increment(3); }); @@ -281,7 +281,7 @@ fn label_composition_same_hash_as_single() { let via_compose = counter!( parent: parent, - labels: [label_values!(names: REGION, "us"), label_values!(names: STATUS, "ok")], + labels: [label_values!(REGION => "us"), label_values!(STATUS => "ok")], ); let via_single = counter!( parent: parent, diff --git a/quickwit/quickwit-metrics/tests/gauge.rs b/quickwit/quickwit-metrics/tests/gauge.rs index ad891ac1961..ed46783c39b 100644 --- a/quickwit/quickwit-metrics/tests/gauge.rs +++ b/quickwit/quickwit-metrics/tests/gauge.rs @@ -176,7 +176,7 @@ fn label_composition_two_labels() { const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); let child = gauge!( parent: parent, - labels: [label_values!(names: REGION, "us-east"), label_values!(names: STATUS, "ok")], + labels: [label_values!(REGION => "us-east"), label_values!(STATUS => "ok")], ); child.set(42.0); }); @@ -209,7 +209,7 @@ fn label_composition_same_hash_as_single() { let via_compose = gauge!( parent: parent, - labels: [label_values!(names: REGION, "us"), label_values!(names: STATUS, "ok")], + labels: [label_values!(REGION => "us"), label_values!(STATUS => "ok")], ); let via_single = gauge!( parent: parent, diff --git a/quickwit/quickwit-metrics/tests/histogram.rs b/quickwit/quickwit-metrics/tests/histogram.rs index 787a257c592..546d362de18 100644 --- a/quickwit/quickwit-metrics/tests/histogram.rs +++ b/quickwit/quickwit-metrics/tests/histogram.rs @@ -111,7 +111,7 @@ fn label_composition_two_labels() { const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); let child = histogram!( parent: parent, - labels: [label_values!(names: REGION, "us-east"), label_values!(names: STATUS, "ok")], + labels: [label_values!(REGION => "us-east"), label_values!(STATUS => "ok")], ); child.record(2.5); }); diff --git a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs index bce58777720..1912b165bf0 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs @@ -243,7 +243,7 @@ impl OtlpGrpcLogsService { let num_bytes = doc_batch.num_bytes() as u64; self.store_logs(index_id.clone(), doc_batch).await?; - let labels = label_values!(names: OTLP_GRPC_LABELS, "logs", index_id, "grpc", "protobuf"); + let labels = label_values!(OTLP_GRPC_LABELS => "logs", index_id, "grpc", "protobuf"); counter!( parent: INGESTED_LOG_RECORDS_TOTAL, labels: [labels], @@ -321,7 +321,7 @@ impl OtlpGrpcLogsService { let start = std::time::Instant::now(); let labels = label_values!( - names: OTLP_GRPC_LABELS, + OTLP_GRPC_LABELS => "logs", index_id.clone(), "grpc", "protobuf" ); counter!( @@ -344,7 +344,7 @@ impl OtlpGrpcLogsService { histogram!( parent: REQUEST_DURATION_SECONDS, labels: [label_values!( - names: OTLP_GRPC_ERROR_LABELS, + OTLP_GRPC_ERROR_LABELS => "logs", index_id, "grpc", "protobuf", is_error )], ) diff --git a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs index d83425b6492..c1431b59831 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs @@ -240,7 +240,7 @@ impl OtlpGrpcMetricsService { self.store_metrics(index_id.clone(), doc_batch).await?; let labels = - label_values!(names: OTLP_GRPC_LABELS, "metrics", index_id, "grpc", "protobuf"); + label_values!(OTLP_GRPC_LABELS => "metrics", index_id, "grpc", "protobuf"); counter!( parent: INGESTED_DATA_POINTS_TOTAL, labels: [labels], @@ -336,7 +336,7 @@ impl OtlpGrpcMetricsService { let start = std::time::Instant::now(); let labels = label_values!( - names: OTLP_GRPC_LABELS, + OTLP_GRPC_LABELS => "metrics", index_id.clone(), "grpc", "protobuf" ); counter!( @@ -361,7 +361,7 @@ impl OtlpGrpcMetricsService { histogram!( parent: REQUEST_DURATION_SECONDS, labels: [label_values!( - names: OTLP_GRPC_ERROR_LABELS, + OTLP_GRPC_ERROR_LABELS => "metrics", index_id, "grpc", "protobuf", is_error )], ) diff --git a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs index 7b5c2214482..e34ee2a9c81 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs @@ -705,7 +705,7 @@ impl OtlpGrpcTracesService { let num_bytes = doc_batch.num_bytes() as u64; self.store_spans(index_id.clone(), doc_batch).await?; - let labels = label_values!(names: OTLP_GRPC_LABELS, "trace", index_id, "grpc", "protobuf"); + let labels = label_values!(OTLP_GRPC_LABELS => "trace", index_id, "grpc", "protobuf"); counter!( parent: INGESTED_SPANS_TOTAL, labels: [labels], @@ -786,7 +786,7 @@ impl OtlpGrpcTracesService { let start = std::time::Instant::now(); let labels = label_values!( - names: OTLP_GRPC_LABELS, + OTLP_GRPC_LABELS => "trace", index_id.clone(), "grpc", "protobuf" ); counter!( @@ -809,7 +809,7 @@ impl OtlpGrpcTracesService { histogram!( parent: REQUEST_DURATION_SECONDS, labels: [label_values!( - names: OTLP_GRPC_ERROR_LABELS, + OTLP_GRPC_ERROR_LABELS => "trace", index_id, "grpc", "protobuf", is_error )], ) diff --git a/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs b/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs index 874d2f6f1d0..062a3f33134 100644 --- a/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs +++ b/quickwit/quickwit-parquet-engine/src/ingest/sketch_processor.rs @@ -58,14 +58,12 @@ impl SketchParquetIngestProcessor { }; if let Err(err) = self.validate_schema(&batch) { - counter!(parent: ERRORS_TOTAL, labels: [labels_kind, labels_operation]) - .increment(1); + counter!(parent: ERRORS_TOTAL, labels: [labels_kind, labels_operation]).increment(1); return Err(err); } if let Err(err) = self.validate_sketch_arrays(&batch) { - counter!(parent: ERRORS_TOTAL, labels: [labels_kind, labels_operation]) - .increment(1); + counter!(parent: ERRORS_TOTAL, labels: [labels_kind, labels_operation]).increment(1); return Err(err); } diff --git a/quickwit/quickwit-search/src/metrics_trackers.rs b/quickwit/quickwit-search/src/metrics_trackers.rs index a46b33dd90c..3fc0d7f9519 100644 --- a/quickwit/quickwit-search/src/metrics_trackers.rs +++ b/quickwit/quickwit-search/src/metrics_trackers.rs @@ -74,7 +74,7 @@ impl PinnedDrop for RootSearchMetricsFuture { ) => (*num_targeted_splits, "cancelled"), }; - let labels = label_values!(names: STATUS_LABELS, status); + let labels = label_values!(STATUS_LABELS => status); counter!( parent: ROOT_SEARCH_REQUESTS_TOTAL, labels: [labels], @@ -126,7 +126,7 @@ where F: Future> { fn drop(self: Pin<&mut Self>) { let status = self.status.unwrap_or("cancelled"); - let labels = label_values!(names: STATUS_LABELS, status); + let labels = label_values!(STATUS_LABELS => status); counter!( parent: LEAF_SEARCH_REQUESTS_TOTAL, labels: [labels], From a0a3f04233d00f6a6dceba42c54a79c557215246 Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Tue, 5 May 2026 14:55:24 +0200 Subject: [PATCH 43/54] Use scoped counters for local metrics --- quickwit/quickwit-common/src/io.rs | 21 ++++- quickwit/quickwit-common/src/metrics.rs | 82 +++++++++-------- .../src/otlp/otel_metrics.rs | 3 +- quickwit/quickwit-search/src/leaf.rs | 6 +- quickwit/quickwit-search/src/metrics.rs | 89 ++++++------------- 5 files changed, 93 insertions(+), 108 deletions(-) diff --git a/quickwit/quickwit-common/src/io.rs b/quickwit/quickwit-common/src/io.rs index 76e5e4271b9..b02bf6b1053 100644 --- a/quickwit/quickwit-common/src/io.rs +++ b/quickwit/quickwit-common/src/io.rs @@ -37,7 +37,7 @@ use pin_project::pin_project; use quickwit_metrics::{Counter, counter}; use tokio::io::AsyncWrite; -use crate::metrics::MaybeRegisteredCounter; +use crate::metrics::ScopedCounter; use crate::{KillSwitch, Progress, ProtectedZoneGuard}; // Max 1MB at a time. @@ -79,7 +79,7 @@ pub fn limiter(throughput: ByteSize) -> Limiter { #[derive(Clone, Default)] pub struct IoControls { throughput_limiter_opt: Option, - bytes_counter: MaybeRegisteredCounter, + bytes_counter: ScopedCounter, progress: Progress, kill_switch: KillSwitch, } @@ -107,7 +107,7 @@ impl IoControls { } pub fn set_component(mut self, component: &'static str) -> Self { - self.bytes_counter = MaybeRegisteredCounter::registered(counter!( + self.bytes_counter = ScopedCounter::Global(counter!( parent: WRITE_BYTES, "component" => component, )); @@ -127,7 +127,7 @@ impl IoControls { } pub fn set_bytes_counter(mut self, bytes_counter: Counter) -> Self { - self.bytes_counter = MaybeRegisteredCounter::registered(bytes_counter); + self.bytes_counter = ScopedCounter::Global(bytes_counter); self } @@ -359,6 +359,19 @@ mod tests { assert_eq!(io_controls.num_bytes(), 2_000_000u64); } + #[tokio::test] + async fn test_controlled_writer_registered_counter_async() { + let io_controls = + IoControls::default().set_component("test_controlled_writer_registered_counter_async"); + let mut controlled_write = io_controls.clone().wrap_write(sink()); + let buf = vec![44u8; 1_000]; + + controlled_write.write_all(&buf).await.unwrap(); + controlled_write.flush().await.unwrap(); + + assert_eq!(io_controls.num_bytes(), 1_000u64); + } + #[test] fn test_controlled_writer_limited_sync() { let io_controls = IoControls::default().set_throughput_limit(ByteSize::mb(2)); diff --git a/quickwit/quickwit-common/src/metrics.rs b/quickwit/quickwit-common/src/metrics.rs index 37c71947f4a..a97f3781534 100644 --- a/quickwit/quickwit-common/src/metrics.rs +++ b/quickwit/quickwit-common/src/metrics.rs @@ -24,49 +24,45 @@ use quickwit_metrics::{Counter, Gauge, gauge}; static PROMETHEUS_HANDLE: OnceLock = OnceLock::new(); -#[derive(Clone)] -pub struct MaybeRegisteredCounter { - inner: MaybeRegisteredCounterInner, +#[derive(Clone, Default)] +pub struct LocalCounter { + inner: Arc, } -#[derive(Clone)] -enum MaybeRegisteredCounterInner { - Local(Arc), - Registered(Counter), -} +impl LocalCounter { + pub fn increment(&self, value: u64) { + self.inner.fetch_add(value, Ordering::Relaxed); + } -impl Default for MaybeRegisteredCounter { - fn default() -> Self { - Self::local() + pub fn get(&self) -> u64 { + self.inner.load(Ordering::Relaxed) } } -impl MaybeRegisteredCounter { - pub fn local() -> Self { - Self { - inner: MaybeRegisteredCounterInner::Local(Arc::new(AtomicU64::new(0))), - } - } +#[derive(Clone)] +pub enum ScopedCounter { + Local(LocalCounter), + Global(Counter), +} - pub fn registered(counter: Counter) -> Self { - Self { - inner: MaybeRegisteredCounterInner::Registered(counter), - } +impl Default for ScopedCounter { + fn default() -> Self { + Self::Local(LocalCounter::default()) } +} +impl ScopedCounter { pub fn increment(&self, value: u64) { - match &self.inner { - MaybeRegisteredCounterInner::Local(counter) => { - counter.fetch_add(value, Ordering::Relaxed); - } - MaybeRegisteredCounterInner::Registered(counter) => counter.increment(value), + match self { + Self::Local(counter) => counter.increment(value), + Self::Global(counter) => counter.increment(value), } } pub fn get(&self) -> u64 { - match &self.inner { - MaybeRegisteredCounterInner::Local(counter) => counter.load(Ordering::Relaxed), - MaybeRegisteredCounterInner::Registered(counter) => counter.get(), + match self { + Self::Local(counter) => counter.get(), + Self::Global(counter) => counter.get(), } } } @@ -227,8 +223,20 @@ mod tests { use super::*; #[test] - fn maybe_registered_counter_counts_locally() { - let counter = MaybeRegisteredCounter::local(); + fn local_counter_counts_locally() { + let counter = LocalCounter::default(); + let counter_clone = counter.clone(); + + counter.increment(3); + counter_clone.increment(4); + + assert_eq!(counter.get(), 7); + assert_eq!(counter_clone.get(), 7); + } + + #[test] + fn scoped_counter_counts_locally() { + let counter = ScopedCounter::default(); let counter_clone = counter.clone(); counter.increment(3); @@ -239,18 +247,18 @@ mod tests { } #[test] - fn maybe_registered_counter_wraps_registered_counter() { - let registered_counter = counter!( - name: "maybe_registered_counter_test", - description: "Maybe registered counter test.", + fn scoped_counter_wraps_global_counter() { + let global_counter = counter!( + name: "scoped_counter_test", + description: "Scoped counter test.", subsystem: "", ); - let counter = MaybeRegisteredCounter::registered(registered_counter.clone()); + let counter = ScopedCounter::Global(global_counter.clone()); counter.increment(5); assert_eq!(counter.get(), 5); - assert_eq!(registered_counter.get(), 5); + assert_eq!(global_counter.get(), 5); } #[test] diff --git a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs index c1431b59831..f159dcbf75c 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs @@ -239,8 +239,7 @@ impl OtlpGrpcMetricsService { let num_bytes = doc_batch.num_bytes() as u64; self.store_metrics(index_id.clone(), doc_batch).await?; - let labels = - label_values!(OTLP_GRPC_LABELS => "metrics", index_id, "grpc", "protobuf"); + let labels = label_values!(OTLP_GRPC_LABELS => "metrics", index_id, "grpc", "protobuf"); counter!( parent: INGESTED_DATA_POINTS_TOTAL, labels: [labels], diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index b05828944ea..278848178e8 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -1628,7 +1628,7 @@ pub async fn single_doc_mapping_leaf_search( make_merge_collector(&request, searcher_context.get_aggregation_limits())?; let mut incremental_merge_collector = IncrementalCollector::new(merge_collector); - let split_outcome_counters = Arc::new(SplitSearchOutcomeCounters::new_unregistered()); + let split_outcome_counters = Arc::new(SplitSearchOutcomeCounters::default()); // Sort out the splits that are already in the partial result cache. let uncached_splits: Vec<(SplitIdAndFooterOffsets, SearchRequest)> = @@ -1807,7 +1807,7 @@ enum SplitSearchState { } impl SplitSearchState { - pub fn increment(self, counters: &SplitSearchOutcomeCounters) { + fn increment(self, counters: &SplitSearchOutcomeCounters) { match self { SplitSearchState::Start => counters.cancel_before_warmup.increment(1), SplitSearchState::CacheHit => counters.cache_hit.increment(1), @@ -1838,7 +1838,7 @@ impl SplitSearchStateGuard { pub fn new(local_split_search_outcome_counters: Arc) -> Self { SplitSearchStateGuard { state: SplitSearchState::Start, - local_split_search_outcome_counters: local_split_search_outcome_counters.clone(), + local_split_search_outcome_counters, } } diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 047a537059b..b8d9c5fbcb6 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -18,7 +18,7 @@ use std::fmt; use std::sync::LazyLock; use bytesize::ByteSize; -use quickwit_common::metrics::{MaybeRegisteredCounter, exponential_buckets, linear_buckets}; +use quickwit_common::metrics::{ScopedCounter, exponential_buckets, linear_buckets}; use quickwit_metrics::{ Counter, Gauge, Histogram, LabelNames, counter, gauge, histogram, label_names, }; @@ -27,7 +27,7 @@ pub(crate) const STATUS_LABELS: LabelNames<1> = label_names!("status"); fn print_if_not_null( field_name: &'static str, - counter: &MaybeRegisteredCounter, + counter: &ScopedCounter, f: &mut fmt::Formatter, ) -> fmt::Result { let val = counter.get(); @@ -37,15 +37,16 @@ fn print_if_not_null( Ok(()) } +#[derive(Default)] pub struct SplitSearchOutcomeCounters { - pub cancel_before_warmup: MaybeRegisteredCounter, - pub cache_hit: MaybeRegisteredCounter, - pub pruned_before_warmup: MaybeRegisteredCounter, - pub cancel_warmup: MaybeRegisteredCounter, - pub pruned_after_warmup: MaybeRegisteredCounter, - pub cancel_cpu_queue: MaybeRegisteredCounter, - pub cancel_cpu: MaybeRegisteredCounter, - pub success: MaybeRegisteredCounter, + pub cancel_before_warmup: ScopedCounter, + pub cache_hit: ScopedCounter, + pub pruned_before_warmup: ScopedCounter, + pub cancel_warmup: ScopedCounter, + pub pruned_after_warmup: ScopedCounter, + pub cancel_cpu_queue: ScopedCounter, + pub cancel_cpu: ScopedCounter, + pub success: ScopedCounter, } impl fmt::Display for SplitSearchOutcomeCounters { @@ -64,58 +65,22 @@ impl fmt::Display for SplitSearchOutcomeCounters { impl SplitSearchOutcomeCounters { /// Create a new SplitSearchOutcomeCounters instance, registered in prometheus. - pub fn new_registered() -> Self { - Self::new_registered_from_counter(&SPLIT_SEARCH_OUTCOME) - } - - /// Create a new SplitSearchOutcomeCounters instance that is not reported. - pub fn new_unregistered() -> Self { - SplitSearchOutcomeCounters { - cancel_before_warmup: MaybeRegisteredCounter::local(), - cache_hit: MaybeRegisteredCounter::local(), - pruned_before_warmup: MaybeRegisteredCounter::local(), - cancel_warmup: MaybeRegisteredCounter::local(), - pruned_after_warmup: MaybeRegisteredCounter::local(), - cancel_cpu_queue: MaybeRegisteredCounter::local(), - cancel_cpu: MaybeRegisteredCounter::local(), - success: MaybeRegisteredCounter::local(), - } - } - - fn new_registered_from_counter(search_split_outcome: &Counter) -> Self { + pub fn new_global() -> Self { + let counter = |category: &'static str| { + ScopedCounter::Global(counter!( + parent: &SPLIT_SEARCH_OUTCOME, + "category" => category, + )) + }; SplitSearchOutcomeCounters { - cancel_before_warmup: MaybeRegisteredCounter::registered(counter!( - parent: search_split_outcome, - "category" => "cancel_before_warmup", - )), - cache_hit: MaybeRegisteredCounter::registered(counter!( - parent: search_split_outcome, - "category" => "cache_hit", - )), - pruned_before_warmup: MaybeRegisteredCounter::registered(counter!( - parent: search_split_outcome, - "category" => "pruned_before_warmup", - )), - cancel_warmup: MaybeRegisteredCounter::registered(counter!( - parent: search_split_outcome, - "category" => "cancel_warmup", - )), - pruned_after_warmup: MaybeRegisteredCounter::registered(counter!( - parent: search_split_outcome, - "category" => "pruned_after_warmup", - )), - cancel_cpu_queue: MaybeRegisteredCounter::registered(counter!( - parent: search_split_outcome, - "category" => "cancel_cpu_queue", - )), - cancel_cpu: MaybeRegisteredCounter::registered(counter!( - parent: search_split_outcome, - "category" => "cancel_cpu", - )), - success: MaybeRegisteredCounter::registered(counter!( - parent: search_split_outcome, - "category" => "success", - )), + cancel_before_warmup: counter("cancel_before_warmup"), + cache_hit: counter("cache_hit"), + pruned_before_warmup: counter("pruned_before_warmup"), + cancel_warmup: counter("cancel_warmup"), + pruned_after_warmup: counter("pruned_after_warmup"), + cancel_cpu_queue: counter("cancel_cpu_queue"), + cancel_cpu: counter("cancel_cpu"), + success: counter("success"), } } } @@ -161,7 +126,7 @@ static SPLIT_SEARCH_OUTCOME: LazyLock = LazyLock::new(|| { }); pub(crate) static SPLIT_SEARCH_OUTCOME_TOTAL: LazyLock = - LazyLock::new(SplitSearchOutcomeCounters::new_registered); + LazyLock::new(SplitSearchOutcomeCounters::new_global); static LEAF_SEARCH_SINGLE_SPLIT_TASKS_BASE: LazyLock = LazyLock::new(|| { gauge!( From d599bd61ad7d8bcaa3002b8bc37b04fedbfbb76b Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Tue, 5 May 2026 15:03:41 +0200 Subject: [PATCH 44/54] Remove redundant io counter test --- quickwit/quickwit-common/src/io.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/quickwit/quickwit-common/src/io.rs b/quickwit/quickwit-common/src/io.rs index b02bf6b1053..c7be6698b6d 100644 --- a/quickwit/quickwit-common/src/io.rs +++ b/quickwit/quickwit-common/src/io.rs @@ -359,19 +359,6 @@ mod tests { assert_eq!(io_controls.num_bytes(), 2_000_000u64); } - #[tokio::test] - async fn test_controlled_writer_registered_counter_async() { - let io_controls = - IoControls::default().set_component("test_controlled_writer_registered_counter_async"); - let mut controlled_write = io_controls.clone().wrap_write(sink()); - let buf = vec![44u8; 1_000]; - - controlled_write.write_all(&buf).await.unwrap(); - controlled_write.flush().await.unwrap(); - - assert_eq!(io_controls.num_bytes(), 1_000u64); - } - #[test] fn test_controlled_writer_limited_sync() { let io_controls = IoControls::default().set_throughput_limit(ByteSize::mb(2)); From 12529e62501eac1505a0522813c231c3b284303b Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 15:15:41 +0200 Subject: [PATCH 45/54] Compact metric macro call sites across the codebase Collapse unnecessarily multi-line label_names!, labels!, label_values!, counter!, gauge!, and histogram! invocations onto single or fewer lines where they fit within ~100 characters. Co-authored-by: Cursor --- quickwit/quickwit-common/src/runtimes.rs | 3 +- .../src/model/shard_table.rs | 26 +-- .../src/actors/doc_processor.rs | 10 +- .../src/actors/indexing_pipeline.rs | 30 +-- .../src/actors/merge_pipeline.rs | 15 +- .../metrics_pipeline/parquet_uploader.rs | 5 +- .../quickwit-indexing/src/actors/uploader.rs | 15 +- .../quickwit-ingest/src/ingest_api_service.rs | 7 +- .../quickwit-ingest/src/ingest_v2/ingester.rs | 27 +-- quickwit/quickwit-ingest/src/lib.rs | 9 +- quickwit/quickwit-jaeger/src/lib.rs | 62 +++---- quickwit/quickwit-jaeger/src/v1.rs | 26 +-- quickwit/quickwit-jaeger/src/v2.rs | 48 ++--- .../src/actors/garbage_collector.rs | 26 +-- .../quickwit-lambda-client/src/invoker.rs | 12 +- .../benches/quickwit_metrics.rs | 172 ++++++------------ .../quickwit-metrics/examples/http_service.rs | 21 +-- quickwit/quickwit-metrics/tests/counter.rs | 22 +-- quickwit/quickwit-metrics/tests/gauge.rs | 16 +- quickwit/quickwit-metrics/tests/histogram.rs | 17 +- .../quickwit-opentelemetry/src/otlp/logs.rs | 35 +--- .../src/otlp/otel_metrics.rs | 37 +--- .../quickwit-opentelemetry/src/otlp/traces.rs | 40 +--- .../src/ingest/processor.rs | 21 +-- .../quickwit-search/src/metrics_trackers.rs | 40 +--- 25 files changed, 224 insertions(+), 518 deletions(-) diff --git a/quickwit/quickwit-common/src/runtimes.rs b/quickwit/quickwit-common/src/runtimes.rs index 18a59b9b6c9..04c3a6cef8f 100644 --- a/quickwit/quickwit-common/src/runtimes.rs +++ b/quickwit/quickwit-common/src/runtimes.rs @@ -219,8 +219,7 @@ impl RuntimeMetricsRecorder { scheduled_tasks: gauge!(parent: TOKIO_SCHEDULED_TASKS, labels: [labels]), worker_busy_duration_milliseconds_total: counter!( parent: TOKIO_WORKER_BUSY_DURATION_MILLISECONDS_TOTAL, - labels: [labels], - ), + labels: [labels]), worker_busy_ratio: gauge!(parent: TOKIO_WORKER_BUSY_RATIO, labels: [labels]), worker_threads: gauge!(parent: TOKIO_WORKER_THREADS, labels: [labels]), } diff --git a/quickwit/quickwit-control-plane/src/model/shard_table.rs b/quickwit/quickwit-control-plane/src/model/shard_table.rs index 10af4845b80..ba95d629735 100644 --- a/quickwit/quickwit-control-plane/src/model/shard_table.rs +++ b/quickwit/quickwit-control-plane/src/model/shard_table.rs @@ -465,16 +465,10 @@ impl ShardTable { if index_label == index_id { let shard_stats = table_entry.shards_stats(); let labels = label_values!(INDEX_ID_LABELS => index_label.to_string()); - gauge!( - parent: OPEN_SHARDS, - labels: [labels], - ) - .set(shard_stats.num_open_shards as f64); - gauge!( - parent: CLOSED_SHARDS, - labels: [labels], - ) - .set(shard_stats.num_closed_shards as f64); + gauge!(parent: OPEN_SHARDS, labels: [labels]) + .set(shard_stats.num_open_shards as f64); + gauge!(parent: CLOSED_SHARDS, labels: [labels]) + .set(shard_stats.num_closed_shards as f64); return; } // Per-index metrics are disabled, so we update the metrics for all sources. @@ -489,16 +483,8 @@ impl ShardTable { } } let labels = label_values!(INDEX_ID_LABELS => index_label.to_string()); - gauge!( - parent: OPEN_SHARDS, - labels: [labels], - ) - .set(num_open_shards as f64); - gauge!( - parent: CLOSED_SHARDS, - labels: [labels], - ) - .set(num_closed_shards as f64); + gauge!(parent: OPEN_SHARDS, labels: [labels]).set(num_open_shards as f64); + gauge!(parent: CLOSED_SHARDS, labels: [labels]).set(num_closed_shards as f64); } pub fn update_shards( diff --git a/quickwit/quickwit-indexing/src/actors/doc_processor.rs b/quickwit/quickwit-indexing/src/actors/doc_processor.rs index b1d293f4195..cab1b6aab43 100644 --- a/quickwit/quickwit-indexing/src/actors/doc_processor.rs +++ b/quickwit/quickwit-indexing/src/actors/doc_processor.rs @@ -292,14 +292,8 @@ impl DocProcessorCounter { ); DocProcessorCounter { num_docs: Default::default(), - num_docs_metric: counter!( - parent: PROCESSED_DOCS_TOTAL, - labels: [labels], - ), - num_bytes_metric: counter!( - parent: PROCESSED_BYTES, - labels: [labels], - ), + num_docs_metric: counter!(parent: PROCESSED_DOCS_TOTAL, labels: [labels]), + num_bytes_metric: counter!(parent: PROCESSED_BYTES, labels: [labels]), } } diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index 46143f00a82..686bd942495 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -124,10 +124,7 @@ impl Actor for IndexingPipeline { impl IndexingPipeline { pub fn new(params: IndexingPipelineParams) -> Self { - let indexing_pipelines_gauge = gauge!( - parent: INDEXING_PIPELINES, - "index" => params.pipeline_id.index_uid.index_id.clone(), - ); + let indexing_pipelines_gauge = gauge!(parent: INDEXING_PIPELINES, "index" => params.pipeline_id.index_uid.index_id.clone()); let indexing_pipelines_gauge_guard = GaugeGuard::new(&indexing_pipelines_gauge, 1.0); let params_fingerprint = params.params_fingerprint; IndexingPipeline { @@ -313,19 +310,13 @@ impl IndexingPipeline { let (publisher_mailbox, publisher_handle) = ctx .spawn_actor() .set_kill_switch(self.kill_switch.clone()) - .set_backpressure_micros_counter(counter!( - parent: BACKPRESSURE_MICROS, - labels: [label_values!(ACTOR_NAME => "publisher")], - )) + .set_backpressure_micros_counter(counter!(parent: BACKPRESSURE_MICROS, labels: [label_values!(ACTOR_NAME => "publisher")])) .spawn(publisher); let sequencer = Sequencer::new(publisher_mailbox); let (sequencer_mailbox, sequencer_handle) = ctx .spawn_actor() - .set_backpressure_micros_counter(counter!( - parent: BACKPRESSURE_MICROS, - labels: [label_values!(ACTOR_NAME => "sequencer")], - )) + .set_backpressure_micros_counter(counter!(parent: BACKPRESSURE_MICROS, labels: [label_values!(ACTOR_NAME => "sequencer")])) .set_kill_switch(self.kill_switch.clone()) .spawn(sequencer); @@ -342,10 +333,7 @@ impl IndexingPipeline { ); let (uploader_mailbox, uploader_handle) = ctx .spawn_actor() - .set_backpressure_micros_counter(counter!( - parent: BACKPRESSURE_MICROS, - labels: [label_values!(ACTOR_NAME => "uploader")], - )) + .set_backpressure_micros_counter(counter!(parent: BACKPRESSURE_MICROS, labels: [label_values!(ACTOR_NAME => "uploader")])) .set_kill_switch(self.kill_switch.clone()) .spawn(uploader); @@ -376,10 +364,7 @@ impl IndexingPipeline { ); let (indexer_mailbox, indexer_handle) = ctx .spawn_actor() - .set_backpressure_micros_counter(counter!( - parent: BACKPRESSURE_MICROS, - labels: [label_values!(ACTOR_NAME => "indexer")], - )) + .set_backpressure_micros_counter(counter!(parent: BACKPRESSURE_MICROS, labels: [label_values!(ACTOR_NAME => "indexer")])) .set_kill_switch(self.kill_switch.clone()) .spawn(indexer); @@ -393,10 +378,7 @@ impl IndexingPipeline { )?; let (doc_processor_mailbox, doc_processor_handle) = ctx .spawn_actor() - .set_backpressure_micros_counter(counter!( - parent: BACKPRESSURE_MICROS, - labels: [label_values!(ACTOR_NAME => "doc_processor")], - )) + .set_backpressure_micros_counter(counter!(parent: BACKPRESSURE_MICROS, labels: [label_values!(ACTOR_NAME => "doc_processor")])) .set_kill_switch(self.kill_switch.clone()) .spawn(doc_processor); let source_runtime = SourceRuntime { diff --git a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs index 7b57a5af751..656200717c2 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs @@ -274,10 +274,7 @@ impl MergePipeline { let (merge_publisher_mailbox, merge_publisher_handle) = ctx .spawn_actor() .set_kill_switch(self.kill_switch.clone()) - .set_backpressure_micros_counter(counter!( - parent: BACKPRESSURE_MICROS, - labels: [label_values!(ACTOR_NAME => "merge_publisher")], - )) + .set_backpressure_micros_counter(counter!(parent: BACKPRESSURE_MICROS, labels: [label_values!(ACTOR_NAME => "merge_publisher")])) .spawn(merge_publisher); // Merge uploader @@ -323,10 +320,7 @@ impl MergePipeline { let (merge_executor_mailbox, merge_executor_handle) = ctx .spawn_actor() .set_kill_switch(self.kill_switch.clone()) - .set_backpressure_micros_counter(counter!( - parent: BACKPRESSURE_MICROS, - labels: [label_values!(ACTOR_NAME => "merge_executor")], - )) + .set_backpressure_micros_counter(counter!(parent: BACKPRESSURE_MICROS, labels: [label_values!(ACTOR_NAME => "merge_executor")])) .spawn(merge_executor); let merge_split_downloader = MergeSplitDownloader { @@ -338,10 +332,7 @@ impl MergePipeline { let (merge_split_downloader_mailbox, merge_split_downloader_handle) = ctx .spawn_actor() .set_kill_switch(self.kill_switch.clone()) - .set_backpressure_micros_counter(counter!( - parent: BACKPRESSURE_MICROS, - labels: [label_values!(ACTOR_NAME => "merge_split_downloader")], - )) + .set_backpressure_micros_counter(counter!(parent: BACKPRESSURE_MICROS, labels: [label_values!(ACTOR_NAME => "merge_split_downloader")])) .spawn(merge_split_downloader); // Merge planner diff --git a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs index c3c4cd31a87..84648c8ed2d 100644 --- a/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_uploader.rs @@ -123,10 +123,7 @@ impl ParquetUploader { let _guard = ctx.protect_zone(); let concurrent_upload_permits = CONCURRENT_UPLOAD_PERMITS_METRICS .get_or_init(|| Semaphore::const_new(self.max_concurrent_uploads)); - let gauge = gauge!( - parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: [label_values!(COMPONENT => "metrics")], - ); + let gauge = gauge!(parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, labels: [label_values!(COMPONENT => "metrics")]); gauge.set(concurrent_upload_permits.available_permits() as f64); concurrent_upload_permits .acquire() diff --git a/quickwit/quickwit-indexing/src/actors/uploader.rs b/quickwit/quickwit-indexing/src/actors/uploader.rs index 7dbf185283b..49a6124fd95 100644 --- a/quickwit/quickwit-indexing/src/actors/uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/uploader.rs @@ -204,24 +204,15 @@ impl Uploader { match self.uploader_type { UploaderType::IndexUploader => ( &CONCURRENT_UPLOAD_PERMITS_INDEX, - gauge!( - parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: [label_values!(COMPONENT => "indexer")], - ), + gauge!(parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, labels: [label_values!(COMPONENT => "indexer")]), ), UploaderType::MergeUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, - gauge!( - parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: [label_values!(COMPONENT => "merger")], - ), + gauge!(parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, labels: [label_values!(COMPONENT => "merger")]), ), UploaderType::DeleteUploader => ( &CONCURRENT_UPLOAD_PERMITS_MERGE, - gauge!( - parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, - labels: [label_values!(COMPONENT => "merger")], - ), + gauge!(parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, labels: [label_values!(COMPONENT => "merger")]), ), }; let concurrent_upload_permits = concurrent_upload_permits_once_cell diff --git a/quickwit/quickwit-ingest/src/ingest_api_service.rs b/quickwit/quickwit-ingest/src/ingest_api_service.rs index 8c4141bb4aa..4a6de0dad63 100644 --- a/quickwit/quickwit-ingest/src/ingest_api_service.rs +++ b/quickwit/quickwit-ingest/src/ingest_api_service.rs @@ -203,11 +203,8 @@ impl IngestApiService { num_docs += batch_num_docs; let labels = label_values!(VALIDITY => "valid"); - counter!( - parent: DOCS_BYTES_TOTAL, - labels: [labels], - ) - .increment(batch_num_bytes as u64); + counter!(parent: DOCS_BYTES_TOTAL, labels: [labels]) + .increment(batch_num_bytes as u64); counter!(parent: DOCS_TOTAL, labels: [labels]).increment(batch_num_docs as u64); } // TODO we could fsync here and disable autosync to have better i/o perfs. diff --git a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs index 57a03a2ac6c..07798d103ae 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs @@ -336,8 +336,7 @@ impl Ingester { counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, labels: [label_values!(STATUS => "success")], - ) - .increment(1); + ).increment(1); let wal_usage = state_guard.mrecordlog.resource_usage(); report_wal_usage(wal_usage); @@ -348,8 +347,7 @@ impl Ingester { counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, labels: [label_values!(STATUS => "error")], - ) - .increment(1); + ).increment(1); } Err(_) => { warn!("advise reset shards request timed out"); @@ -357,8 +355,7 @@ impl Ingester { counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, labels: [label_values!(STATUS => "timeout")], - ) - .increment(1); + ).increment(1); } }; // We still hold the permit while sleeping so we effectively rate limit the reset shards @@ -574,13 +571,11 @@ impl Ingester { counter!( parent: DOCS_TOTAL, labels: [label_values!(VALIDITY => "invalid")], - ) - .increment(parse_failures.len() as u64); + ).increment(parse_failures.len() as u64); counter!( parent: DOCS_BYTES_TOTAL, labels: [label_values!(VALIDITY => "invalid")], - ) - .increment(original_batch_num_bytes); + ).increment(original_batch_num_bytes); let persist_success = PersistSuccess { subrequest_id: subrequest.subrequest_id, index_uid: subrequest.index_uid, @@ -597,24 +592,20 @@ impl Ingester { counter!( parent: DOCS_TOTAL, labels: [label_values!(VALIDITY => "valid")], - ) - .increment(valid_doc_batch.num_docs() as u64); + ).increment(valid_doc_batch.num_docs() as u64); counter!( parent: DOCS_BYTES_TOTAL, labels: [label_values!(VALIDITY => "valid")], - ) - .increment(valid_doc_batch.num_bytes() as u64); + ).increment(valid_doc_batch.num_bytes() as u64); if !parse_failures.is_empty() { counter!( parent: DOCS_TOTAL, labels: [label_values!(VALIDITY => "invalid")], - ) - .increment(parse_failures.len() as u64); + ).increment(parse_failures.len() as u64); counter!( parent: DOCS_BYTES_TOTAL, labels: [label_values!(VALIDITY => "invalid")], - ) - .increment(original_batch_num_bytes - valid_doc_batch.num_bytes() as u64); + ).increment(original_batch_num_bytes - valid_doc_batch.num_bytes() as u64); } let valid_batch_num_bytes = valid_doc_batch.num_bytes() as u64; shard.rate_meter.update(valid_batch_num_bytes); diff --git a/quickwit/quickwit-ingest/src/lib.rs b/quickwit/quickwit-ingest/src/lib.rs index bde0bd8db0c..bfc8f9aff88 100644 --- a/quickwit/quickwit-ingest/src/lib.rs +++ b/quickwit/quickwit-ingest/src/lib.rs @@ -113,8 +113,7 @@ macro_rules! with_lock_metrics { quickwit_metrics::gauge!( parent: $crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT, labels: [labels], - ) - .increment(1.0); + ).increment(1.0); let now = std::time::Instant::now(); let guard = $future; @@ -129,13 +128,11 @@ macro_rules! with_lock_metrics { quickwit_metrics::gauge!( parent: $crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUESTS_IN_FLIGHT, labels: [labels], - ) - .decrement(1.0); + ).decrement(1.0); quickwit_metrics::histogram!( parent: $crate::ingest_v2::metrics::WAL_ACQUIRE_LOCK_REQUEST_DURATION_SECS, labels: [labels], - ) - .record(elapsed.as_secs_f64()); + ).record(elapsed.as_secs_f64()); guard } diff --git a/quickwit/quickwit-jaeger/src/lib.rs b/quickwit/quickwit-jaeger/src/lib.rs index fd271585d8e..de770e29e5d 100644 --- a/quickwit/quickwit-jaeger/src/lib.rs +++ b/quickwit/quickwit-jaeger/src/lib.rs @@ -420,55 +420,37 @@ impl JaegerService { current_span.record("num_spans", num_spans_total); current_span.record("num_bytes", num_bytes_total); - counter!( - parent: FETCHED_TRACES_TOTAL, - labels: [label_values!( - OPERATION_INDEX_LABELS => - operation_name, OTEL_TRACES_INDEX_ID - )], - ) - .increment(num_traces); + let labels = label_values!( + OPERATION_INDEX_LABELS => operation_name, OTEL_TRACES_INDEX_ID + ); + counter!(parent: FETCHED_TRACES_TOTAL, labels: [labels]) + .increment(num_traces); let elapsed = request_start.elapsed().as_secs_f64(); - histogram!( - parent: REQUEST_DURATION_SECONDS, - labels: [label_values!( - OPERATION_INDEX_ERROR_LABELS => - operation_name, OTEL_TRACES_INDEX_ID, "false" - )], - ) - .record(elapsed); + let err_labels = label_values!( + OPERATION_INDEX_ERROR_LABELS => + operation_name, OTEL_TRACES_INDEX_ID, "false" + ); + histogram!(parent: REQUEST_DURATION_SECONDS, labels: [err_labels]) + .record(elapsed); }); Ok(ReceiverStream::new(rx)) } } pub(crate) fn record_error(operation_name: &'static str, request_start: Instant) { - counter!( - parent: REQUEST_ERRORS_TOTAL, - labels: [label_values!( - OPERATION_INDEX_LABELS => - operation_name, OTEL_TRACES_INDEX_ID - )], - ) - .increment(1); + let labels = label_values!(OPERATION_INDEX_LABELS => operation_name, OTEL_TRACES_INDEX_ID); + counter!(parent: REQUEST_ERRORS_TOTAL, labels: [labels]).increment(1); let elapsed = request_start.elapsed().as_secs_f64(); - histogram!( - parent: REQUEST_DURATION_SECONDS, - labels: [label_values!( - OPERATION_INDEX_ERROR_LABELS => - operation_name, OTEL_TRACES_INDEX_ID, "true" - )], - ) - .record(elapsed); + let err_labels = label_values!( + OPERATION_INDEX_ERROR_LABELS => operation_name, OTEL_TRACES_INDEX_ID, "true" + ); + histogram!(parent: REQUEST_DURATION_SECONDS, labels: [err_labels]).record(elapsed); } pub(crate) fn record_send(operation_name: &'static str, num_spans: usize, num_bytes: usize) { - let labels = label_values!( - OPERATION_INDEX_LABELS => - operation_name, OTEL_TRACES_INDEX_ID - ); + let labels = label_values!(OPERATION_INDEX_LABELS => operation_name, OTEL_TRACES_INDEX_ID); counter!(parent: FETCHED_SPANS_TOTAL, labels: [labels]).increment(num_spans as u64); counter!(parent: TRANSFERRED_BYTES_TOTAL, labels: [labels]).increment(num_bytes as u64); } @@ -1093,7 +1075,9 @@ fn collect_trace_ids( #[allow(clippy::result_large_err)] fn json_deserialize<'a, T>(json: &'a str, label: &'static str) -> Result -where T: Deserialize<'a> { +where + T: Deserialize<'a>, +{ match serde_json::from_str(json) { Ok(deserialized) => Ok(deserialized), Err(error) => { @@ -1107,7 +1091,9 @@ where T: Deserialize<'a> { #[allow(clippy::result_large_err)] fn postcard_deserialize<'a, T>(json: &'a [u8], label: &'static str) -> Result -where T: Deserialize<'a> { +where + T: Deserialize<'a>, +{ match postcard::from_bytes(json) { Ok(deserialized) => Ok(deserialized), Err(error) => { diff --git a/quickwit/quickwit-jaeger/src/v1.rs b/quickwit/quickwit-jaeger/src/v1.rs index 74ae3d4e0f4..b0f33d1893d 100644 --- a/quickwit/quickwit-jaeger/src/v1.rs +++ b/quickwit/quickwit-jaeger/src/v1.rs @@ -40,30 +40,20 @@ macro_rules! metrics { let operation = stringify!($operation); let index = $index; let labels = label_values!(OPERATION_INDEX_LABELS => operation, index); - counter!( - parent: REQUESTS_TOTAL, - labels: [labels], - ) - .increment(1); + counter!(parent: REQUESTS_TOTAL, labels: [labels]).increment(1); let (res, is_error) = match $expr { - ok @ Ok(_) => { - (ok, "false") - }, + ok @ Ok(_) => (ok, "false"), err @ Err(_) => { - counter!( - parent: REQUEST_ERRORS_TOTAL, - labels: [labels], - ) - .increment(1); + counter!(parent: REQUEST_ERRORS_TOTAL, labels: [labels]).increment(1); (err, "true") }, }; let elapsed = start.elapsed().as_secs_f64(); - histogram!( - parent: REQUEST_DURATION_SECONDS, - labels: [label_values!(OPERATION_INDEX_ERROR_LABELS => operation, index, is_error)], - ) - .record(elapsed); + let err_labels = label_values!( + OPERATION_INDEX_ERROR_LABELS => operation, index, is_error + ); + histogram!(parent: REQUEST_DURATION_SECONDS, labels: [err_labels]) + .record(elapsed); return res.map(Response::new); }; diff --git a/quickwit/quickwit-jaeger/src/v2.rs b/quickwit/quickwit-jaeger/src/v2.rs index c71da2725c8..826f82828a1 100644 --- a/quickwit/quickwit-jaeger/src/v2.rs +++ b/quickwit/quickwit-jaeger/src/v2.rs @@ -66,30 +66,20 @@ macro_rules! metrics { let operation = stringify!($operation); let index = $index; let labels = label_values!(OPERATION_INDEX_LABELS => operation, index); - counter!( - parent: REQUESTS_TOTAL, - labels: [labels], - ) - .increment(1); + counter!(parent: REQUESTS_TOTAL, labels: [labels]).increment(1); let (res, is_error) = match $expr { - ok @ Ok(_) => { - (ok, "false") - }, + ok @ Ok(_) => (ok, "false"), err @ Err(_) => { - counter!( - parent: REQUEST_ERRORS_TOTAL, - labels: [labels], - ) - .increment(1); + counter!(parent: REQUEST_ERRORS_TOTAL, labels: [labels]).increment(1); (err, "true") }, }; let elapsed = start.elapsed().as_secs_f64(); - histogram!( - parent: REQUEST_DURATION_SECONDS, - labels: [label_values!(OPERATION_INDEX_ERROR_LABELS => operation, index, is_error)], - ) - .record(elapsed); + let err_labels = label_values!( + OPERATION_INDEX_ERROR_LABELS => operation, index, is_error + ); + histogram!(parent: REQUEST_DURATION_SECONDS, labels: [err_labels]) + .record(elapsed); return res.map(Response::new); }; @@ -443,24 +433,14 @@ async fn stream_otel_spans_impl( record_send(operation_name, num_spans, num_bytes); - counter!( - parent: FETCHED_TRACES_TOTAL, - labels: [label_values!( - OPERATION_INDEX_LABELS => - operation_name, OTEL_TRACES_INDEX_ID - )], - ) - .increment(trace_ids.len() as u64); + let labels = label_values!(OPERATION_INDEX_LABELS => operation_name, OTEL_TRACES_INDEX_ID); + counter!(parent: FETCHED_TRACES_TOTAL, labels: [labels]).increment(trace_ids.len() as u64); let elapsed = request_start.elapsed().as_secs_f64(); - histogram!( - parent: REQUEST_DURATION_SECONDS, - labels: [label_values!( - OPERATION_INDEX_ERROR_LABELS => - operation_name, OTEL_TRACES_INDEX_ID, "false" - )], - ) - .record(elapsed); + let err_labels = label_values!( + OPERATION_INDEX_ERROR_LABELS => operation_name, OTEL_TRACES_INDEX_ID, "false" + ); + histogram!(parent: REQUEST_DURATION_SECONDS, labels: [err_labels]).record(elapsed); Ok(qw_spans) } diff --git a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs index 660f38b3a54..a043a93154f 100644 --- a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs +++ b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs @@ -210,11 +210,8 @@ impl GarbageCollector { .await; let tantivy_run_duration = tantivy_start.elapsed().as_secs(); - counter!( - parent: GC_SECONDS_TOTAL, - labels: [labels_split], - ) - .increment(tantivy_run_duration); + counter!(parent: GC_SECONDS_TOTAL, labels: [labels_split]) + .increment(tantivy_run_duration); let result = match gc_res { Ok(removal_info) => { @@ -222,8 +219,7 @@ impl GarbageCollector { counter!( parent: GC_RUNS, labels: [labels_split, label_values!(labels_result => "success")], - ) - .increment(1); + ).increment(1); GcRunResult { num_deleted_splits: removal_info.removed_split_entries.len(), num_deleted_bytes: removal_info @@ -245,8 +241,7 @@ impl GarbageCollector { counter!( parent: GC_RUNS, labels: [labels_split, label_values!(labels_result => "error")], - ) - .increment(1); + ).increment(1); error!(error=?error, "failed to run garbage collection"); GcRunResult::failed() } @@ -271,11 +266,8 @@ impl GarbageCollector { .await; let parquet_run_duration = parquet_start.elapsed().as_secs(); - counter!( - parent: GC_SECONDS_TOTAL, - labels: [labels_split], - ) - .increment(parquet_run_duration); + counter!(parent: GC_SECONDS_TOTAL, labels: [labels_split]) + .increment(parquet_run_duration); let result = match gc_res { Ok(removal_info) => { @@ -283,8 +275,7 @@ impl GarbageCollector { counter!( parent: GC_RUNS, labels: [labels_split, label_values!(labels_result => "success")], - ) - .increment(1); + ).increment(1); GcRunResult { num_deleted_splits: removal_info.removed_split_count(), num_deleted_bytes: removal_info.removed_bytes() as usize, @@ -302,8 +293,7 @@ impl GarbageCollector { counter!( parent: GC_RUNS, labels: [labels_split, label_values!(labels_result => "error")], - ) - .increment(1); + ).increment(1); error!(error=?error, "failed to run parquet garbage collection"); GcRunResult::failed() } diff --git a/quickwit/quickwit-lambda-client/src/invoker.rs b/quickwit/quickwit-lambda-client/src/invoker.rs index f2395b7e5e6..63f7c45aa74 100644 --- a/quickwit/quickwit-lambda-client/src/invoker.rs +++ b/quickwit/quickwit-lambda-client/src/invoker.rs @@ -176,16 +176,8 @@ impl LambdaLeafSearchInvoker for AwsLambdaInvoker { let elapsed = start.elapsed().as_secs_f64(); let status = if result.is_ok() { "success" } else { "error" }; let labels = labels!("status" => status); - counter!( - parent: LEAF_SEARCH_REQUESTS_TOTAL, - labels: [labels], - ) - .increment(1); - histogram!( - parent: LEAF_SEARCH_DURATION_SECONDS, - labels: [labels], - ) - .record(elapsed); + counter!(parent: LEAF_SEARCH_REQUESTS_TOTAL, labels: [labels]).increment(1); + histogram!(parent: LEAF_SEARCH_DURATION_SECONDS, labels: [labels]).record(elapsed); result } } diff --git a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs index ea089f12bf4..09bfd27f4ee 100644 --- a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs +++ b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs @@ -113,8 +113,7 @@ fn on_the_fly_counter(c: &mut Criterion) { name: "otf_counter", description: "bench counter", subsystem: "bench" - ) - .increment(1); + ).increment(1); }); }); @@ -125,8 +124,7 @@ fn on_the_fly_counter(c: &mut Criterion) { description: "bench counter", subsystem: "bench", "service" => "api" - ) - .increment(1); + ).increment(1); }); }); @@ -139,8 +137,7 @@ fn on_the_fly_counter(c: &mut Criterion) { "service" => "api", "method" => "GET", "endpoint" => "/health" - ) - .increment(1); + ).increment(1); }); }); @@ -155,8 +152,7 @@ fn on_the_fly_counter(c: &mut Criterion) { "endpoint" => "/health", "status" => "200", "region" => "us-east-1" - ) - .increment(1); + ).increment(1); }); }); @@ -174,8 +170,7 @@ fn on_the_fly_gauge(c: &mut Criterion) { name: "otf_gauge", description: "bench gauge", subsystem: "bench" - ) - .set(42.0); + ).set(42.0); }); }); @@ -186,8 +181,7 @@ fn on_the_fly_gauge(c: &mut Criterion) { description: "bench gauge", subsystem: "bench", "service" => "api" - ) - .set(42.0); + ).set(42.0); }); }); @@ -200,8 +194,7 @@ fn on_the_fly_gauge(c: &mut Criterion) { "service" => "api", "method" => "GET", "endpoint" => "/health" - ) - .set(42.0); + ).set(42.0); }); }); @@ -216,8 +209,7 @@ fn on_the_fly_gauge(c: &mut Criterion) { "endpoint" => "/health", "status" => "200", "region" => "us-east-1" - ) - .set(42.0); + ).set(42.0); }); }); @@ -236,8 +228,7 @@ fn on_the_fly_histogram(c: &mut Criterion) { description: "bench histogram", subsystem: "bench", buckets: vec![0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0] - ) - .record(0.123); + ).record(0.123); }); }); @@ -249,8 +240,7 @@ fn on_the_fly_histogram(c: &mut Criterion) { subsystem: "bench", buckets: vec![0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0], "service" => "api" - ) - .record(0.123); + ).record(0.123); }); }); @@ -264,8 +254,7 @@ fn on_the_fly_histogram(c: &mut Criterion) { "service" => "api", "method" => "GET", "endpoint" => "/health" - ) - .record(0.123); + ).record(0.123); }); }); @@ -281,8 +270,7 @@ fn on_the_fly_histogram(c: &mut Criterion) { "endpoint" => "/health", "status" => "200", "region" => "us-east-1" - ) - .record(0.123); + ).record(0.123); }); }); @@ -522,8 +510,7 @@ fn parent_counter(c: &mut Criterion) { "method" => "GET", "endpoint" => "/health", "status" => "200" - ) - .increment(1); + ).increment(1); }); }); @@ -549,8 +536,7 @@ fn parent_gauge(c: &mut Criterion) { "method" => "GET", "endpoint" => "/health", "status" => "200" - ) - .set(42.0); + ).set(42.0); }); }); @@ -576,8 +562,7 @@ fn parent_histogram(c: &mut Criterion) { "method" => "GET", "endpoint" => "/health", "status" => "200" - ) - .record(0.123); + ).record(0.123); }); }); @@ -749,11 +734,8 @@ fn labels_counter(c: &mut Criterion) { group.bench_function("static/1", |b| { b.iter(|| { - counter!( - parent: PARENT_COUNTER, - labels: [label_values!(LABELS_1 => "GET")] - ) - .increment(1); + counter!(parent: PARENT_COUNTER, labels: [label_values!(LABELS_1 => "GET")]) + .increment(1); }); }); @@ -762,8 +744,7 @@ fn labels_counter(c: &mut Criterion) { counter!( parent: PARENT_COUNTER, labels: [label_values!(LABELS_3 => "GET", "/health", "200")] - ) - .increment(1); + ).increment(1); }); }); @@ -772,8 +753,7 @@ fn labels_counter(c: &mut Criterion) { counter!( parent: PARENT_COUNTER, labels: [label_values!(LABELS_1 => "GET".to_string())] - ) - .increment(1); + ).increment(1); }); }); @@ -785,11 +765,8 @@ fn labels_counter(c: &mut Criterion) { b.iter(|| { let m = methods[idx % methods.len()]; idx += 1; - counter!( - parent: PARENT_COUNTER, - labels: [label_values!(LABELS_1 => m)] - ) - .increment(1); + counter!(parent: PARENT_COUNTER, labels: [label_values!(LABELS_1 => m)]) + .increment(1); }); }); @@ -804,11 +781,8 @@ fn labels_gauge(c: &mut Criterion) { group.bench_function("static/1", |b| { b.iter(|| { - gauge!( - parent: PARENT_GAUGE, - labels: [label_values!(LABELS_1 => "GET")] - ) - .set(42.0); + gauge!(parent: PARENT_GAUGE, labels: [label_values!(LABELS_1 => "GET")]) + .set(42.0); }); }); @@ -817,8 +791,7 @@ fn labels_gauge(c: &mut Criterion) { gauge!( parent: PARENT_GAUGE, labels: [label_values!(LABELS_3 => "GET", "/health", "200")] - ) - .set(42.0); + ).set(42.0); }); }); @@ -833,11 +806,8 @@ fn labels_histogram(c: &mut Criterion) { group.bench_function("static/1", |b| { b.iter(|| { - histogram!( - parent: PARENT_HISTOGRAM, - labels: [label_values!(LABELS_1 => "GET")] - ) - .record(0.123); + histogram!(parent: PARENT_HISTOGRAM, labels: [label_values!(LABELS_1 => "GET")]) + .record(0.123); }); }); @@ -846,8 +816,7 @@ fn labels_histogram(c: &mut Criterion) { histogram!( parent: PARENT_HISTOGRAM, labels: [label_values!(LABELS_3 => "GET", "/health", "200")] - ) - .record(0.123); + ).record(0.123); }); }); @@ -874,35 +843,26 @@ fn composite_counter(c: &mut Criterion) { counter!( parent: PARENT_COUNTER, labels: [label_values!(COMP_ALL_3 => "GET", "/health", "200")], - ) - .increment(1); + ).increment(1); }); }); group.bench_function("compose_1x3", |b| { b.iter(|| { - counter!( - parent: PARENT_COUNTER, - labels: [ - label_values!(COMP_METHOD => "GET"), - label_values!(COMP_ENDPOINT => "/health"), - label_values!(COMP_STATUS => "200"), - ], - ) - .increment(1); + counter!(parent: PARENT_COUNTER, labels: [ + label_values!(COMP_METHOD => "GET"), + label_values!(COMP_ENDPOINT => "/health"), + label_values!(COMP_STATUS => "200"), + ]).increment(1); }); }); group.bench_function("compose_1x2", |b| { b.iter(|| { - counter!( - parent: PARENT_COUNTER, - labels: [ - label_values!(COMP_METHOD => "GET"), - label_values!(COMP_ENDPOINT => "/health"), - ], - ) - .increment(1); + counter!(parent: PARENT_COUNTER, labels: [ + label_values!(COMP_METHOD => "GET"), + label_values!(COMP_ENDPOINT => "/health"), + ]).increment(1); }); }); @@ -920,35 +880,26 @@ fn composite_gauge(c: &mut Criterion) { gauge!( parent: PARENT_GAUGE, labels: [label_values!(COMP_ALL_3 => "GET", "/health", "200")], - ) - .set(42.0); + ).set(42.0); }); }); group.bench_function("compose_1x3", |b| { b.iter(|| { - gauge!( - parent: PARENT_GAUGE, - labels: [ - label_values!(COMP_METHOD => "GET"), - label_values!(COMP_ENDPOINT => "/health"), - label_values!(COMP_STATUS => "200"), - ], - ) - .set(42.0); + gauge!(parent: PARENT_GAUGE, labels: [ + label_values!(COMP_METHOD => "GET"), + label_values!(COMP_ENDPOINT => "/health"), + label_values!(COMP_STATUS => "200"), + ]).set(42.0); }); }); group.bench_function("compose_1x2", |b| { b.iter(|| { - gauge!( - parent: PARENT_GAUGE, - labels: [ - label_values!(COMP_METHOD => "GET"), - label_values!(COMP_ENDPOINT => "/health"), - ], - ) - .set(42.0); + gauge!(parent: PARENT_GAUGE, labels: [ + label_values!(COMP_METHOD => "GET"), + label_values!(COMP_ENDPOINT => "/health"), + ]).set(42.0); }); }); @@ -966,35 +917,26 @@ fn composite_histogram(c: &mut Criterion) { histogram!( parent: PARENT_HISTOGRAM, labels: [label_values!(COMP_ALL_3 => "GET", "/health", "200")], - ) - .record(0.123); + ).record(0.123); }); }); group.bench_function("compose_1x3", |b| { b.iter(|| { - histogram!( - parent: PARENT_HISTOGRAM, - labels: [ - label_values!(COMP_METHOD => "GET"), - label_values!(COMP_ENDPOINT => "/health"), - label_values!(COMP_STATUS => "200"), - ], - ) - .record(0.123); + histogram!(parent: PARENT_HISTOGRAM, labels: [ + label_values!(COMP_METHOD => "GET"), + label_values!(COMP_ENDPOINT => "/health"), + label_values!(COMP_STATUS => "200"), + ]).record(0.123); }); }); group.bench_function("compose_1x2", |b| { b.iter(|| { - histogram!( - parent: PARENT_HISTOGRAM, - labels: [ - label_values!(COMP_METHOD => "GET"), - label_values!(COMP_ENDPOINT => "/health"), - ], - ) - .record(0.123); + histogram!(parent: PARENT_HISTOGRAM, labels: [ + label_values!(COMP_METHOD => "GET"), + label_values!(COMP_ENDPOINT => "/health"), + ]).record(0.123); }); }); diff --git a/quickwit/quickwit-metrics/examples/http_service.rs b/quickwit/quickwit-metrics/examples/http_service.rs index d52b7072605..aea0f8c5305 100644 --- a/quickwit/quickwit-metrics/examples/http_service.rs +++ b/quickwit/quickwit-metrics/examples/http_service.rs @@ -126,12 +126,8 @@ fn install_prometheus_recorder() { fn handle_request(method: &'static str, path: &'static str, region: &'static str) { let duration_ms: f64 = (path.len() as f64) * 0.013; - histogram!( - parent: HTTP_REQUEST_DURATION, - "method" => method, - "path" => path, - ) - .record(duration_ms); + histogram!(parent: HTTP_REQUEST_DURATION, "method" => method, "path" => path) + .record(duration_ms); let response_size = (path.len() * 100) as f64; HTTP_RESPONSE_SIZE.record(response_size); @@ -143,17 +139,10 @@ fn handle_request(method: &'static str, path: &'static str, region: &'static str } else { "200" }; - counter!( - parent: HTTP_REQUESTS_BY_METHOD, - "path" => path, - "status" => status, - ) - .increment(1); + counter!(parent: HTTP_REQUESTS_BY_METHOD, "path" => path, "status" => status) + .increment(1); - let conn_gauge = gauge!( - parent: HTTP_ACTIVE_CONNECTIONS_BY_REGION, - "method" => method, - ); + let conn_gauge = gauge!(parent: HTTP_ACTIVE_CONNECTIONS_BY_REGION, "method" => method); { let _guard = GaugeGuard::new(&conn_gauge, 1.0); } diff --git a/quickwit/quickwit-metrics/tests/counter.rs b/quickwit/quickwit-metrics/tests/counter.rs index 3974b820ee4..ae0ba4db3cd 100644 --- a/quickwit/quickwit-metrics/tests/counter.rs +++ b/quickwit/quickwit-metrics/tests/counter.rs @@ -216,10 +216,9 @@ fn label_composition_two_labels() { ); const REGION: quickwit_metrics::LabelNames<1> = label_names!("region"); const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); - let child = counter!( - parent: parent, - labels: [label_values!(REGION => "us-east"), label_values!(STATUS => "ok")], - ); + let child = counter!(parent: parent, labels: [ + label_values!(REGION => "us-east"), label_values!(STATUS => "ok"), + ]); child.increment(3); }); @@ -246,10 +245,9 @@ fn label_composition_three_labels() { description: "three-label composition", subsystem: "test", ); - let child = counter!( - parent: parent, - labels: [labels!("env" => "staging"), labels!("region" => "eu"), labels!("az" => "eu-1a")], - ); + let child = counter!(parent: parent, labels: [ + labels!("env" => "staging"), labels!("region" => "eu"), labels!("az" => "eu-1a"), + ]); child.increment(7); }); @@ -279,10 +277,10 @@ fn label_composition_same_hash_as_single() { const REGION: quickwit_metrics::LabelNames<1> = label_names!("region"); const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); - let via_compose = counter!( - parent: parent, - labels: [label_values!(REGION => "us"), label_values!(STATUS => "ok")], - ); + let via_compose = counter!(parent: parent, labels: [ + label_values!(REGION => "us"), + label_values!(STATUS => "ok"), + ]); let via_single = counter!( parent: parent, labels: [labels!("region" => "us", "status" => "ok")], diff --git a/quickwit/quickwit-metrics/tests/gauge.rs b/quickwit/quickwit-metrics/tests/gauge.rs index ed46783c39b..18db9c0906e 100644 --- a/quickwit/quickwit-metrics/tests/gauge.rs +++ b/quickwit/quickwit-metrics/tests/gauge.rs @@ -174,10 +174,10 @@ fn label_composition_two_labels() { ); const REGION: quickwit_metrics::LabelNames<1> = label_names!("region"); const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); - let child = gauge!( - parent: parent, - labels: [label_values!(REGION => "us-east"), label_values!(STATUS => "ok")], - ); + let child = gauge!(parent: parent, labels: [ + label_values!(REGION => "us-east"), + label_values!(STATUS => "ok"), + ]); child.set(42.0); }); @@ -207,10 +207,10 @@ fn label_composition_same_hash_as_single() { const REGION: quickwit_metrics::LabelNames<1> = label_names!("region"); const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); - let via_compose = gauge!( - parent: parent, - labels: [label_values!(REGION => "us"), label_values!(STATUS => "ok")], - ); + let via_compose = gauge!(parent: parent, labels: [ + label_values!(REGION => "us"), + label_values!(STATUS => "ok"), + ]); let via_single = gauge!( parent: parent, labels: [labels!("region" => "us", "status" => "ok")], diff --git a/quickwit/quickwit-metrics/tests/histogram.rs b/quickwit/quickwit-metrics/tests/histogram.rs index 546d362de18..8fb63ff5486 100644 --- a/quickwit/quickwit-metrics/tests/histogram.rs +++ b/quickwit/quickwit-metrics/tests/histogram.rs @@ -109,10 +109,10 @@ fn label_composition_two_labels() { ); const REGION: quickwit_metrics::LabelNames<1> = label_names!("region"); const STATUS: quickwit_metrics::LabelNames<1> = label_names!("status"); - let child = histogram!( - parent: parent, - labels: [label_values!(REGION => "us-east"), label_values!(STATUS => "ok")], - ); + let child = histogram!(parent: parent, labels: [ + label_values!(REGION => "us-east"), + label_values!(STATUS => "ok"), + ]); child.record(2.5); }); @@ -146,10 +146,11 @@ fn label_composition_three_labels() { subsystem: "test", buckets: vec![1.0], ); - let child = histogram!( - parent: parent, - labels: [labels!("env" => "staging"), labels!("region" => "eu"), labels!("az" => "eu-1a")], - ); + let child = histogram!(parent: parent, labels: [ + labels!("env" => "staging"), + labels!("region" => "eu"), + labels!("az" => "eu-1a"), + ]); child.record(0.1); }); diff --git a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs index 1912b165bf0..60c42acac08 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/logs.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/logs.rs @@ -244,11 +244,7 @@ impl OtlpGrpcLogsService { self.store_logs(index_id.clone(), doc_batch).await?; let labels = label_values!(OTLP_GRPC_LABELS => "logs", index_id, "grpc", "protobuf"); - counter!( - parent: INGESTED_LOG_RECORDS_TOTAL, - labels: [labels], - ) - .increment(num_log_records); + counter!(parent: INGESTED_LOG_RECORDS_TOTAL, labels: [labels]).increment(num_log_records); counter!(parent: INGESTED_BYTES_TOTAL, labels: [labels]).increment(num_bytes); let response = ExportLogsServiceResponse { @@ -320,35 +316,20 @@ impl OtlpGrpcLogsService { ) -> Result { let start = std::time::Instant::now(); - let labels = label_values!( - OTLP_GRPC_LABELS => - "logs", index_id.clone(), "grpc", "protobuf" - ); - counter!( - parent: REQUESTS_TOTAL, - labels: [labels], - ) - .increment(1); + let labels = + label_values!(OTLP_GRPC_LABELS => "logs", index_id.clone(), "grpc", "protobuf"); + counter!(parent: REQUESTS_TOTAL, labels: [labels]).increment(1); let (export_res, is_error) = match self.export_inner(request, index_id.clone()).await { ok @ Ok(_) => (ok, "false"), err @ Err(_) => { - counter!( - parent: REQUEST_ERRORS_TOTAL, - labels: [labels], - ) - .increment(1); + counter!(parent: REQUEST_ERRORS_TOTAL, labels: [labels]).increment(1); (err, "true") } }; let elapsed = start.elapsed().as_secs_f64(); - histogram!( - parent: REQUEST_DURATION_SECONDS, - labels: [label_values!( - OTLP_GRPC_ERROR_LABELS => - "logs", index_id, "grpc", "protobuf", is_error - )], - ) - .record(elapsed); + let error_labels = + label_values!(OTLP_GRPC_ERROR_LABELS => "logs", index_id, "grpc", "protobuf", is_error); + histogram!(parent: REQUEST_DURATION_SECONDS, labels: [error_labels]).record(elapsed); export_res } diff --git a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs index f159dcbf75c..0c0c77eb979 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/otel_metrics.rs @@ -240,11 +240,8 @@ impl OtlpGrpcMetricsService { self.store_metrics(index_id.clone(), doc_batch).await?; let labels = label_values!(OTLP_GRPC_LABELS => "metrics", index_id, "grpc", "protobuf"); - counter!( - parent: INGESTED_DATA_POINTS_TOTAL, - labels: [labels], - ) - .increment(num_data_points - num_parse_errors); + counter!(parent: INGESTED_DATA_POINTS_TOTAL, labels: [labels]) + .increment(num_data_points - num_parse_errors); counter!(parent: INGESTED_BYTES_TOTAL, labels: [labels]).increment(num_bytes); let response = ExportMetricsServiceResponse { @@ -334,37 +331,23 @@ impl OtlpGrpcMetricsService { ) -> Result { let start = std::time::Instant::now(); - let labels = label_values!( - OTLP_GRPC_LABELS => - "metrics", index_id.clone(), "grpc", "protobuf" - ); - counter!( - parent: REQUESTS_TOTAL, - labels: [labels], - ) - .increment(1); + let labels = + label_values!(OTLP_GRPC_LABELS => "metrics", index_id.clone(), "grpc", "protobuf"); + counter!(parent: REQUESTS_TOTAL, labels: [labels]).increment(1); let (export_res, is_error) = match self.export_inner(request, index_id.clone()).await { ok @ Ok(_) => (ok, "false"), err @ Err(_) => { - counter!( - parent: REQUEST_ERRORS_TOTAL, - labels: [labels], - ) - .increment(1); + counter!(parent: REQUEST_ERRORS_TOTAL, labels: [labels]).increment(1); (err, "true") } }; let elapsed = start.elapsed().as_secs_f64(); - histogram!( - parent: REQUEST_DURATION_SECONDS, - labels: [label_values!( - OTLP_GRPC_ERROR_LABELS => - "metrics", index_id, "grpc", "protobuf", is_error - )], - ) - .record(elapsed); + let error_labels = label_values!( + OTLP_GRPC_ERROR_LABELS => "metrics", index_id, "grpc", "protobuf", is_error + ); + histogram!(parent: REQUEST_DURATION_SECONDS, labels: [error_labels]).record(elapsed); export_res } diff --git a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs index e34ee2a9c81..26c200f05b1 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs @@ -706,15 +706,8 @@ impl OtlpGrpcTracesService { self.store_spans(index_id.clone(), doc_batch).await?; let labels = label_values!(OTLP_GRPC_LABELS => "trace", index_id, "grpc", "protobuf"); - counter!( - parent: INGESTED_SPANS_TOTAL, - labels: [labels], - ) - .increment(num_spans); - counter!(parent: INGESTED_BYTES_TOTAL, - labels: [labels], - ) - .increment(num_bytes); + counter!(parent: INGESTED_SPANS_TOTAL, labels: [labels]).increment(num_spans); + counter!(parent: INGESTED_BYTES_TOTAL, labels: [labels]).increment(num_bytes); let response = ExportTraceServiceResponse { // `rejected_spans=0` and `error_message=""` is considered a "full" success. @@ -785,35 +778,20 @@ impl OtlpGrpcTracesService { ) -> Result { let start = std::time::Instant::now(); - let labels = label_values!( - OTLP_GRPC_LABELS => - "trace", index_id.clone(), "grpc", "protobuf" - ); - counter!( - parent: REQUESTS_TOTAL, - labels: [labels], - ) - .increment(1); + let labels = + label_values!(OTLP_GRPC_LABELS => "trace", index_id.clone(), "grpc", "protobuf"); + counter!(parent: REQUESTS_TOTAL, labels: [labels]).increment(1); let (export_res, is_error) = match self.export_inner(request, index_id.clone()).await { ok @ Ok(_) => (ok, "false"), err @ Err(_) => { - counter!( - parent: REQUEST_ERRORS_TOTAL, - labels: [labels], - ) - .increment(1); + counter!(parent: REQUEST_ERRORS_TOTAL, labels: [labels]).increment(1); (err, "true") } }; let elapsed = start.elapsed().as_secs_f64(); - histogram!( - parent: REQUEST_DURATION_SECONDS, - labels: [label_values!( - OTLP_GRPC_ERROR_LABELS => - "trace", index_id, "grpc", "protobuf", is_error - )], - ) - .record(elapsed); + let error_labels = + label_values!(OTLP_GRPC_ERROR_LABELS => "trace", index_id, "grpc", "protobuf", is_error); + histogram!(parent: REQUEST_DURATION_SECONDS, labels: [error_labels]).record(elapsed); export_res } diff --git a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs index 9bcc94b8955..6b6b62688e6 100644 --- a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs +++ b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs @@ -66,30 +66,21 @@ impl ParquetIngestProcessor { let labels_kind = labels!("kind" => "points"); let labels_operation = labels!("operation" => "ingest"); // Record bytes ingested - counter!( - parent: INGEST_BYTES_TOTAL, - labels: [labels_kind], - ) - .increment(ipc_bytes.len() as u64); + counter!(parent: INGEST_BYTES_TOTAL, labels: [labels_kind]) + .increment(ipc_bytes.len() as u64); let batch = match ipc_to_record_batch(ipc_bytes) { Ok(batch) => batch, Err(e) => { - counter!( - parent: ERRORS_TOTAL, - labels: [labels_kind, labels_operation], - ) - .increment(1); + counter!(parent: ERRORS_TOTAL, labels: [labels_kind, labels_operation]) + .increment(1); return Err(e); } }; if let Err(e) = self.validate_schema(&batch) { - counter!( - parent: ERRORS_TOTAL, - labels: [labels_kind, labels_operation], - ) - .increment(1); + counter!(parent: ERRORS_TOTAL, labels: [labels_kind, labels_operation]) + .increment(1); return Err(e); } diff --git a/quickwit/quickwit-search/src/metrics_trackers.rs b/quickwit/quickwit-search/src/metrics_trackers.rs index 3fc0d7f9519..0fe172b0ac5 100644 --- a/quickwit/quickwit-search/src/metrics_trackers.rs +++ b/quickwit/quickwit-search/src/metrics_trackers.rs @@ -75,21 +75,11 @@ impl PinnedDrop for RootSearchMetricsFuture { }; let labels = label_values!(STATUS_LABELS => status); - counter!( - parent: ROOT_SEARCH_REQUESTS_TOTAL, - labels: [labels], - ) - .increment(1); - histogram!( - parent: ROOT_SEARCH_REQUEST_DURATION_SECONDS, - labels: [labels], - ) - .record(self.start.elapsed().as_secs_f64()); - histogram!( - parent: ROOT_SEARCH_TARGETED_SPLITS, - labels: [labels], - ) - .record(num_targeted_splits as f64); + counter!(parent: ROOT_SEARCH_REQUESTS_TOTAL, labels: [labels]).increment(1); + histogram!(parent: ROOT_SEARCH_REQUEST_DURATION_SECONDS, labels: [labels]) + .record(self.start.elapsed().as_secs_f64()); + histogram!(parent: ROOT_SEARCH_TARGETED_SPLITS, labels: [labels]) + .record(num_targeted_splits as f64); } } @@ -127,21 +117,11 @@ where F: Future> fn drop(self: Pin<&mut Self>) { let status = self.status.unwrap_or("cancelled"); let labels = label_values!(STATUS_LABELS => status); - counter!( - parent: LEAF_SEARCH_REQUESTS_TOTAL, - labels: [labels], - ) - .increment(1); - histogram!( - parent: LEAF_SEARCH_REQUEST_DURATION_SECONDS, - labels: [labels], - ) - .record(self.start.elapsed().as_secs_f64()); - histogram!( - parent: LEAF_SEARCH_TARGETED_SPLITS, - labels: [labels], - ) - .record(self.targeted_splits as f64); + counter!(parent: LEAF_SEARCH_REQUESTS_TOTAL, labels: [labels]).increment(1); + histogram!(parent: LEAF_SEARCH_REQUEST_DURATION_SECONDS, labels: [labels]) + .record(self.start.elapsed().as_secs_f64()); + histogram!(parent: LEAF_SEARCH_TARGETED_SPLITS, labels: [labels]) + .record(self.targeted_splits as f64); } } From abfcda8524e7e13df4f0a688b4a3f2f6efb182f7 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 15:24:09 +0200 Subject: [PATCH 46/54] Replace __bind_labels! tt-muncher with direct repetition The recursive macro is no longer needed now that the labels: arm uses [$($labels:expr),+]. Hash, count, and iterator are folded inline via simple $(...)+ repetition, removing ~60 lines of macro machinery. Co-authored-by: Cursor --- quickwit/quickwit-metrics/src/counter.rs | 18 +++---- quickwit/quickwit-metrics/src/gauge.rs | 18 +++---- quickwit/quickwit-metrics/src/histogram.rs | 18 +++---- quickwit/quickwit-metrics/src/inner.rs | 59 ---------------------- 4 files changed, 27 insertions(+), 86 deletions(-) diff --git a/quickwit/quickwit-metrics/src/counter.rs b/quickwit/quickwit-metrics/src/counter.rs index 3a3bfd4443d..2bea9bf24c3 100644 --- a/quickwit/quickwit-metrics/src/counter.rs +++ b/quickwit/quickwit-metrics/src/counter.rs @@ -265,21 +265,21 @@ macro_rules! counter { }; // Parent extension via one or more pre-built Labels bundles. - // Composes hash, count, and label iterators across all labels via the - // __bind_labels! tt-muncher — zero allocation on the hot path. ( parent: $parent:expr, labels: [$($labels:expr),+ $(,)?] $(,)? - ) => {{ - $crate::__bind_labels!( + ) => { + $crate::__metric_extension!( metric_type: $crate::Counter, register_fn: $crate::__counter_get_or_register, parent: $parent, metric_info: $parent.__info(), - hash: $parent.get_hash(), - count: 0usize, - iter: std::iter::empty::<$crate::__metrics::Label>(), - $(next: $labels,)+ + hash: $crate::__key_hash( + $parent.get_hash(), + std::iter::empty()$(.chain($labels.iter()))+, + ), + label_count: 0usize $(+ $labels.len())+, + labels_iter: std::iter::empty()$(.chain($labels.__to_labels()))+ ) - }}; + }; } diff --git a/quickwit/quickwit-metrics/src/gauge.rs b/quickwit/quickwit-metrics/src/gauge.rs index e6bb587014f..03a3e8bc19b 100644 --- a/quickwit/quickwit-metrics/src/gauge.rs +++ b/quickwit/quickwit-metrics/src/gauge.rs @@ -323,21 +323,21 @@ macro_rules! gauge { }; // Parent extension via one or more pre-built Labels bundles. - // Composes hash, count, and label iterators across all labels via the - // __bind_labels! tt-muncher — zero allocation on the hot path. ( parent: $parent:expr, labels: [$($labels:expr),+ $(,)?] $(,)? - ) => {{ - $crate::__bind_labels!( + ) => { + $crate::__metric_extension!( metric_type: $crate::Gauge, register_fn: $crate::__gauge_get_or_register, parent: $parent, metric_info: $parent.__info(), - hash: $parent.get_hash(), - count: 0usize, - iter: std::iter::empty::<$crate::__metrics::Label>(), - $(next: $labels,)+ + hash: $crate::__key_hash( + $parent.get_hash(), + std::iter::empty()$(.chain($labels.iter()))+, + ), + label_count: 0usize $(+ $labels.len())+, + labels_iter: std::iter::empty()$(.chain($labels.__to_labels()))+ ) - }}; + }; } diff --git a/quickwit/quickwit-metrics/src/histogram.rs b/quickwit/quickwit-metrics/src/histogram.rs index 22da9b5af29..ea08b5a62b3 100644 --- a/quickwit/quickwit-metrics/src/histogram.rs +++ b/quickwit/quickwit-metrics/src/histogram.rs @@ -304,21 +304,21 @@ macro_rules! histogram { }; // Parent extension via one or more pre-built Labels bundles. - // Composes hash, count, and label iterators across all labels via the - // __bind_labels! tt-muncher — zero allocation on the hot path. ( parent: $parent:expr, labels: [$($labels:expr),+ $(,)?] $(,)? - ) => {{ - $crate::__bind_labels!( + ) => { + $crate::__metric_extension!( metric_type: $crate::Histogram, register_fn: $crate::__histogram_get_or_register, parent: $parent, metric_info: $parent.__info().info, - hash: $parent.get_hash(), - count: 0usize, - iter: std::iter::empty::<$crate::__metrics::Label>(), - $(next: $labels,)+ + hash: $crate::__key_hash( + $parent.get_hash(), + std::iter::empty()$(.chain($labels.iter()))+, + ), + label_count: 0usize $(+ $labels.len())+, + labels_iter: std::iter::empty()$(.chain($labels.__to_labels()))+ ) - }}; + }; } diff --git a/quickwit/quickwit-metrics/src/inner.rs b/quickwit/quickwit-metrics/src/inner.rs index 39f894171cb..06b1f124760 100644 --- a/quickwit/quickwit-metrics/src/inner.rs +++ b/quickwit/quickwit-metrics/src/inner.rs @@ -251,65 +251,6 @@ macro_rules! __metric_extension { }}; } -/// Recursive tt-muncher that binds each `Labels` expression exactly once, -/// then folds hash, count, and iterator chain across all of them before -/// delegating to [`__metric_extension!`]. -/// -/// Each recursion step creates a nested scope so that earlier bindings remain -/// live when the base case finally emits the extension. Zero allocation on -/// the hot path. -#[doc(hidden)] -#[macro_export] -macro_rules! __bind_labels { - // Base case: no more labels to peel. Emit __metric_extension!. - ( - metric_type: $metric_type:ty, - register_fn: $register_fn:path, - parent: $parent:expr, - metric_info: $metric_info:expr, - hash: $hash:expr, - count: $count:expr, - iter: $iter:expr, - ) => { - $crate::__metric_extension!( - metric_type: $metric_type, - register_fn: $register_fn, - parent: $parent, - metric_info: $metric_info, - hash: $hash, - label_count: $count, - labels_iter: $iter - ) - }; - - // Recursive case: bind the next labels expr, fold into hash/count/iter, - // then recurse with remaining labels. - ( - metric_type: $metric_type:ty, - register_fn: $register_fn:path, - parent: $parent:expr, - metric_info: $metric_info:expr, - hash: $hash:expr, - count: $count:expr, - iter: $iter:expr, - next: $next:expr $(, next: $rest:expr)* $(,)? - ) => {{ - let __ref = &$next; - let hash = $crate::__key_hash($hash, __ref.iter()); - let count = $count + __ref.len(); - $crate::__bind_labels!( - metric_type: $metric_type, - register_fn: $register_fn, - parent: $parent, - metric_info: $metric_info, - hash: hash, - count: count, - iter: $iter.chain(__ref.__to_labels()), - $(next: $rest,)* - ) - }}; -} - // ─── Tests ─── #[cfg(test)] From 513c6f7d03e49795b7a5ce10a16cd2dda746ce07 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 16:49:50 +0200 Subject: [PATCH 47/54] Use Arc identity for metric equality and add Counter/Gauge::local() Switch PartialEq/Hash impls on Counter, Gauge, and Histogram from cache-key hash comparison to Arc::as_ptr() identity. This eliminates any collision risk and is semantically correct since the global DashMap guarantees one Arc per unique name+labels combination. Add Counter::local() and Gauge::local() for detached noop accumulators with independent shadow atomics. Rename get_hash() to __hash() and mark it #[doc(hidden)]. Co-authored-by: Cursor --- quickwit/quickwit-metrics/benches/cache.rs | 2 +- quickwit/quickwit-metrics/src/counter.rs | 49 ++++++++++++++------- quickwit/quickwit-metrics/src/gauge.rs | 51 +++++++++++++++------- quickwit/quickwit-metrics/src/histogram.rs | 30 ++++++------- quickwit/quickwit-metrics/tests/counter.rs | 37 +++++++++++++++- quickwit/quickwit-metrics/tests/gauge.rs | 44 ++++++++++++++++++- 6 files changed, 165 insertions(+), 48 deletions(-) diff --git a/quickwit/quickwit-metrics/benches/cache.rs b/quickwit/quickwit-metrics/benches/cache.rs index bf989457a37..fd8bc0c9d93 100644 --- a/quickwit/quickwit-metrics/benches/cache.rs +++ b/quickwit/quickwit-metrics/benches/cache.rs @@ -100,7 +100,7 @@ fn key_hash_bench(c: &mut Criterion) { install_recorder(); let _ = &*DYN_PARENT_COUNTER; - let parent_hash = DYN_PARENT_COUNTER.get_hash(); + let parent_hash = DYN_PARENT_COUNTER.__hash(); let mut group = c.benchmark_group("cache/key_hash"); diff --git a/quickwit/quickwit-metrics/src/counter.rs b/quickwit/quickwit-metrics/src/counter.rs index 2bea9bf24c3..a2513d2f676 100644 --- a/quickwit/quickwit-metrics/src/counter.rs +++ b/quickwit/quickwit-metrics/src/counter.rs @@ -115,24 +115,24 @@ impl std::fmt::Debug for Counter { } } -/// Uses the pre-computed cache-key hash so counters can be stored in -/// `HashMap`/`HashSet` without re-hashing the key contents. -impl std::hash::Hash for Counter { - fn hash(&self, state: &mut H) { - self.get_hash().hash(state); - } -} - -/// Two counters are equal when they share the same cache-key hash, -/// i.e. they were declared with identical name + labels. +/// Two counters are equal when they point to the same `Arc` allocation, +/// i.e. they were produced by cloning the same handle. The global +/// `DashMap` guarantees that all call sites with identical name + labels +/// share one `Arc`, so identity equality implies semantic equality. impl PartialEq for Counter { fn eq(&self, other: &Self) -> bool { - self.get_hash() == other.get_hash() + Arc::as_ptr(&self.0) == Arc::as_ptr(&other.0) } } impl Eq for Counter {} +impl std::hash::Hash for Counter { + fn hash(&self, state: &mut H) { + Arc::as_ptr(&self.0).hash(state); + } +} + impl Counter { // NOTE: never call it directly, use the macro instead because it ensures the hash is // pre-computed correctly. @@ -151,8 +151,8 @@ impl Counter { self.0.info } - /// Returns the pre-computed cache-key hash for this counter. - pub fn get_hash(&self) -> u64 { + #[doc(hidden)] + pub fn __hash(&self) -> u64 { self.0.hash } @@ -179,6 +179,25 @@ impl Counter { pub fn get(&self) -> u64 { self.0.shadow.load(Ordering::Relaxed) } + + /// Returns a detached counter with a noop recorder and its own shadow atomic. + pub fn local() -> Counter { + // Static key from empty parts — clone is free (Cow::Borrowed). + static KEY: metrics::Key = metrics::Key::from_static_parts("", &[]); + static METADATA: metrics::Metadata<'static> = + metrics::Metadata::new("local", metrics::Level::DEBUG, None); + static INFO: MetricInfo = MetricInfo { + key_name: "", + description: "local counter", + kind: crate::MetricKind::Counter, + metadata: &METADATA, + static_labels: &[], + }; + // Hash is irrelevant: equality is based on Arc identity (see + // PartialEq impl), and local counters never enter the DashMap. + let inner = CounterInner::new(0, &INFO, KEY.clone(), metrics::Counter::noop()); + Counter(Arc::new(inner)) + } } /// Bridges `Counter` into the `metrics` recorder trait so it can be @@ -258,7 +277,7 @@ macro_rules! counter { parent: $parent, metric_info: $parent.__info(), // Seed with parent hash, fold in each (name, value) pair. - hash: $crate::__key_hash!($parent.get_hash(), $(($label, $value)),+), + hash: $crate::__key_hash!($parent.__hash(), $(($label, $value)),+), label_count: $crate::__count!($($label)*), labels_iter: [$($crate::__metrics::Label::new($label, $value)),+].into_iter() ) @@ -275,7 +294,7 @@ macro_rules! counter { parent: $parent, metric_info: $parent.__info(), hash: $crate::__key_hash( - $parent.get_hash(), + $parent.__hash(), std::iter::empty()$(.chain($labels.iter()))+, ), label_count: 0usize $(+ $labels.len())+, diff --git a/quickwit/quickwit-metrics/src/gauge.rs b/quickwit/quickwit-metrics/src/gauge.rs index 03a3e8bc19b..45d65a6ae04 100644 --- a/quickwit/quickwit-metrics/src/gauge.rs +++ b/quickwit/quickwit-metrics/src/gauge.rs @@ -110,24 +110,24 @@ impl std::fmt::Debug for Gauge { } } -/// Uses the pre-computed cache-key hash so gauges can be stored in -/// `HashMap`/`HashSet` without re-hashing the key contents. -impl std::hash::Hash for Gauge { - fn hash(&self, state: &mut H) { - self.get_hash().hash(state); - } -} - -/// Two gauges are equal when they share the same cache-key hash, -/// i.e. they were declared with identical name + labels. +/// Two gauges are equal when they point to the same `Arc` allocation, +/// i.e. they were produced by cloning the same handle. The global +/// `DashMap` guarantees that all call sites with identical name + labels +/// share one `Arc`, so identity equality implies semantic equality. impl PartialEq for Gauge { fn eq(&self, other: &Self) -> bool { - self.get_hash() == other.get_hash() + Arc::as_ptr(&self.0) == Arc::as_ptr(&other.0) } } impl Eq for Gauge {} +impl std::hash::Hash for Gauge { + fn hash(&self, state: &mut H) { + Arc::as_ptr(&self.0).hash(state); + } +} + impl Gauge { // NOTE: never call it directly, use the macro instead because it ensures the hash is // pre-computed correctly. @@ -146,8 +146,8 @@ impl Gauge { self.0.info } - /// Returns the pre-computed cache-key hash for this gauge. - pub fn get_hash(&self) -> u64 { + #[doc(hidden)] + pub fn __hash(&self) -> u64 { self.0.hash } @@ -197,6 +197,27 @@ impl GaugeFn for Gauge { } } +impl Gauge { + /// Returns a detached gauge with a noop recorder and its own shadow atomic. + pub fn local() -> Gauge { + // Static key from empty parts — clone is free (Cow::Borrowed). + static KEY: metrics::Key = metrics::Key::from_static_parts("", &[]); + static METADATA: metrics::Metadata<'static> = + metrics::Metadata::new("local", metrics::Level::DEBUG, None); + static INFO: MetricInfo = MetricInfo { + key_name: "", + description: "local gauge", + kind: crate::MetricKind::Gauge, + metadata: &METADATA, + static_labels: &[], + }; + // Hash is irrelevant: equality is based on Arc identity (see + // PartialEq impl), and local gauges never enter the DashMap. + let inner = GaugeInner::new(0, &INFO, KEY.clone(), metrics::Gauge::noop()); + Gauge(Arc::new(inner)) + } +} + /// RAII guard that tracks increments to a [`Gauge`] and decrements the /// tracked amount when dropped. /// @@ -316,7 +337,7 @@ macro_rules! gauge { parent: $parent, metric_info: $parent.__info(), // Seed with parent hash, fold in each (name, value) pair. - hash: $crate::__key_hash!($parent.get_hash(), $(($label, $value)),+), + hash: $crate::__key_hash!($parent.__hash(), $(($label, $value)),+), label_count: $crate::__count!($($label)*), labels_iter: [$($crate::__metrics::Label::new($label, $value)),+].into_iter() ) @@ -333,7 +354,7 @@ macro_rules! gauge { parent: $parent, metric_info: $parent.__info(), hash: $crate::__key_hash( - $parent.get_hash(), + $parent.__hash(), std::iter::empty()$(.chain($labels.iter()))+, ), label_count: 0usize $(+ $labels.len())+, diff --git a/quickwit/quickwit-metrics/src/histogram.rs b/quickwit/quickwit-metrics/src/histogram.rs index ea08b5a62b3..949c8f117ec 100644 --- a/quickwit/quickwit-metrics/src/histogram.rs +++ b/quickwit/quickwit-metrics/src/histogram.rs @@ -124,24 +124,24 @@ impl std::fmt::Debug for Histogram { } } -/// Uses the pre-computed cache-key hash so histograms can be stored in -/// `HashMap`/`HashSet` without re-hashing the key contents. -impl std::hash::Hash for Histogram { - fn hash(&self, state: &mut H) { - self.get_hash().hash(state); - } -} - -/// Two histograms are equal when they share the same cache-key hash, -/// i.e. they were declared with identical name + labels. +/// Two histograms are equal when they point to the same `Arc` allocation, +/// i.e. they were produced by cloning the same handle. The global +/// `DashMap` guarantees that all call sites with identical name + labels +/// share one `Arc`, so identity equality implies semantic equality. impl PartialEq for Histogram { fn eq(&self, other: &Self) -> bool { - self.get_hash() == other.get_hash() + Arc::as_ptr(&self.0) == Arc::as_ptr(&other.0) } } impl Eq for Histogram {} +impl std::hash::Hash for Histogram { + fn hash(&self, state: &mut H) { + Arc::as_ptr(&self.0).hash(state); + } +} + impl Histogram { // NOTE: never call it directly, use the macro instead because it ensures the hash is // pre-computed correctly. @@ -160,8 +160,8 @@ impl Histogram { self.0.info } - /// Returns the pre-computed cache-key hash for this histogram. - pub fn get_hash(&self) -> u64 { + #[doc(hidden)] + pub fn __hash(&self) -> u64 { self.0.hash } @@ -297,7 +297,7 @@ macro_rules! histogram { // Unwrap HistogramConfig -> MetricInfo for the extension. metric_info: $parent.__info().info, // Seed with parent hash, fold in each (name, value) pair. - hash: $crate::__key_hash!($parent.get_hash(), $(($label, $value)),+), + hash: $crate::__key_hash!($parent.__hash(), $(($label, $value)),+), label_count: $crate::__count!($($label)*), labels_iter: [$($crate::__metrics::Label::new($label, $value)),+].into_iter() ) @@ -314,7 +314,7 @@ macro_rules! histogram { parent: $parent, metric_info: $parent.__info().info, hash: $crate::__key_hash( - $parent.get_hash(), + $parent.__hash(), std::iter::empty()$(.chain($labels.iter()))+, ), label_count: 0usize $(+ $labels.len())+, diff --git a/quickwit/quickwit-metrics/tests/counter.rs b/quickwit/quickwit-metrics/tests/counter.rs index ae0ba4db3cd..68637db47e5 100644 --- a/quickwit/quickwit-metrics/tests/counter.rs +++ b/quickwit/quickwit-metrics/tests/counter.rs @@ -17,7 +17,7 @@ mod common; use common::with_recorder; use metrics::with_local_recorder; use metrics_util::debugging::{DebugValue, DebuggingRecorder}; -use quickwit_metrics::{counter, label_names, label_values, labels}; +use quickwit_metrics::{Counter, counter, label_names, label_values, labels}; #[test] fn base_increments() { @@ -311,3 +311,38 @@ fn observable_parent_distinct_labels_separate_shadow() { assert_eq!(child_b.get(), 7); }); } + +#[test] +fn local_increments_shadow_only() { + let c = Counter::local(); + assert_eq!(c.get(), 0); + c.increment(10); + c.increment(3); + assert_eq!(c.get(), 13); +} + +#[test] +fn local_counters_are_independent() { + let a = Counter::local(); + let b = Counter::local(); + a.increment(5); + b.increment(9); + assert_eq!(a.get(), 5); + assert_eq!(b.get(), 9); +} + +#[test] +fn local_counters_are_never_equal() { + let a = Counter::local(); + let b = Counter::local(); + assert_ne!(a, b); +} + +#[test] +fn local_counter_clone_is_equal() { + let a = Counter::local(); + let b = a.clone(); + assert_eq!(a, b); + a.increment(1); + assert_eq!(b.get(), 1); +} diff --git a/quickwit/quickwit-metrics/tests/gauge.rs b/quickwit/quickwit-metrics/tests/gauge.rs index 18db9c0906e..6cadb90046c 100644 --- a/quickwit/quickwit-metrics/tests/gauge.rs +++ b/quickwit/quickwit-metrics/tests/gauge.rs @@ -17,7 +17,7 @@ mod common; use common::with_recorder; use metrics::with_local_recorder; use metrics_util::debugging::{DebugValue, DebuggingRecorder}; -use quickwit_metrics::{GaugeGuard, gauge, label_names, label_values, labels}; +use quickwit_metrics::{Gauge, GaugeGuard, gauge, label_names, label_values, labels}; #[test] fn set() { @@ -306,3 +306,45 @@ fn observable_parent_children_share_shadow() { assert_eq!(child_b.get(), 8.0); }); } + +#[test] +fn local_set_and_get() { + let g = Gauge::local(); + assert_eq!(g.get(), 0.0); + g.set(42.0); + assert_eq!(g.get(), 42.0); +} + +#[test] +fn local_increment_decrement() { + let g = Gauge::local(); + g.increment(10.0); + g.decrement(3.0); + assert_eq!(g.get(), 7.0); +} + +#[test] +fn local_gauges_are_independent() { + let a = Gauge::local(); + let b = Gauge::local(); + a.set(5.0); + b.set(9.0); + assert_eq!(a.get(), 5.0); + assert_eq!(b.get(), 9.0); +} + +#[test] +fn local_gauges_are_never_equal() { + let a = Gauge::local(); + let b = Gauge::local(); + assert_ne!(a, b); +} + +#[test] +fn local_gauge_clone_is_equal() { + let a = Gauge::local(); + let b = a.clone(); + assert_eq!(a, b); + a.set(7.0); + assert_eq!(b.get(), 7.0); +} From 37bf6369c26eb97555f35d54b67bfedf8f1b27be Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 17:25:49 +0200 Subject: [PATCH 48/54] Add const-compatible labels! arm and use LabelNames in storage metrics Add a literal arm to labels! that uses SharedString::const_str() for const-compatible label construction. Use LabelNames constants (OUTCOME, ACTION, COMPONENT_NAME, COMPONENT_CAPACITY_POLICY) in quickwit-storage to eliminate repeated label key strings. Co-authored-by: Cursor --- quickwit/quickwit-common/src/metrics.rs | 2 +- quickwit/quickwit-common/src/tower/metrics.rs | 15 +- quickwit/quickwit-metrics/src/labels.rs | 12 +- quickwit/quickwit-serve/src/metrics.rs | 10 +- quickwit/quickwit-storage/src/metrics.rs | 161 +++++++----------- 5 files changed, 81 insertions(+), 119 deletions(-) diff --git a/quickwit/quickwit-common/src/metrics.rs b/quickwit/quickwit-common/src/metrics.rs index a97f3781534..ea2ad6b847c 100644 --- a/quickwit/quickwit-common/src/metrics.rs +++ b/quickwit/quickwit-common/src/metrics.rs @@ -251,7 +251,7 @@ mod tests { let global_counter = counter!( name: "scoped_counter_test", description: "Scoped counter test.", - subsystem: "", + subsystem: "test", ); let counter = ScopedCounter::Global(global_counter.clone()); diff --git a/quickwit/quickwit-common/src/tower/metrics.rs b/quickwit/quickwit-common/src/tower/metrics.rs index a7ec95c029e..d8cb4d13599 100644 --- a/quickwit/quickwit-common/src/tower/metrics.rs +++ b/quickwit/quickwit-common/src/tower/metrics.rs @@ -30,25 +30,25 @@ pub trait RpcName { static GRPC_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( - name: "grpc_requests_total", + name: "requests_total", description: "Total number of gRPC requests processed.", - subsystem: "", + subsystem: "grpc", ) }); static GRPC_REQUESTS_IN_FLIGHT: LazyLock = LazyLock::new(|| { gauge!( - name: "grpc_requests_in_flight", + name: "requests_in_flight", description: "Number of gRPC requests in-flight.", - subsystem: "", + subsystem: "grpc", ) }); static GRPC_REQUEST_DURATION_SECONDS: LazyLock = LazyLock::new(|| { histogram!( - name: "grpc_request_duration_seconds", + name: "request_duration_seconds", description: "Duration of request in seconds.", - subsystem: "", + subsystem: "grpc", buckets: exponential_buckets(0.001, 2.0, 12).unwrap(), ) }); @@ -165,7 +165,8 @@ impl PinnedDrop for ResponseFuture { } impl Future for ResponseFuture -where F: Future> +where + F: Future>, { type Output = Result; diff --git a/quickwit/quickwit-metrics/src/labels.rs b/quickwit/quickwit-metrics/src/labels.rs index 1018a79fa2a..3d6cf160f37 100644 --- a/quickwit/quickwit-metrics/src/labels.rs +++ b/quickwit/quickwit-metrics/src/labels.rs @@ -33,7 +33,7 @@ use crate::__key_hash; /// ``` #[macro_export] macro_rules! label_names { - ($($name:expr),+ $(,)?) => { + ($($name:literal),+ $(,)?) => { $crate::LabelNames::__new([$($name),+]) }; } @@ -81,7 +81,15 @@ macro_rules! label_values { /// ``` #[macro_export] macro_rules! labels { - ($($key:expr => $val:expr),+ $(,)?) => { + // All-literal arm: const-compatible via SharedString::const_str. + ($($key:literal => $val:literal),+ $(,)?) => { + $crate::Labels::__from_parts( + [$($key),+], + [$($crate::__metrics::SharedString::const_str($val)),+], + ) + }; + // General arm: accepts any expression convertible to SharedString. + ($($key:literal => $val:expr),+ $(,)?) => { $crate::Labels::__from_parts( [$($key),+], [$($crate::__metrics::SharedString::from($val)),+], diff --git a/quickwit/quickwit-serve/src/metrics.rs b/quickwit/quickwit-serve/src/metrics.rs index 79e8aac6861..49e1d4823d0 100644 --- a/quickwit/quickwit-serve/src/metrics.rs +++ b/quickwit/quickwit-serve/src/metrics.rs @@ -21,7 +21,7 @@ pub(crate) static HTTP_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "http_requests_total", description: "Total number of HTTP requests processed.", - subsystem: "", + subsystem: "serve", ) }); @@ -29,7 +29,7 @@ pub(crate) static REQUEST_DURATION_SECS: LazyLock = LazyLock::new(|| histogram!( name: "request_duration_secs", description: "Response time in seconds", - subsystem: "", + subsystem: "serve", // last bucket is 163.84s buckets: exponential_buckets(0.02, 2.0, 14).unwrap(), ) @@ -39,7 +39,7 @@ pub(crate) static ONGOING_REQUESTS: LazyLock = LazyLock::new(|| { gauge!( name: "ongoing_requests", description: "Number of ongoing requests.", - subsystem: "", + subsystem: "serve", ) }); @@ -47,7 +47,7 @@ pub(crate) static PENDING_REQUESTS: LazyLock = LazyLock::new(|| { gauge!( name: "pending_requests", description: "Number of pending requests.", - subsystem: "", + subsystem: "serve", ) }); @@ -55,6 +55,6 @@ pub(crate) static CIRCUIT_BREAK_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "circuit_break_total", description: "Circuit breaker counter", - subsystem: "grpc", + subsystem: "serve", ) }); diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs index 5526c4fd66b..555550bce98 100644 --- a/quickwit/quickwit-storage/src/metrics.rs +++ b/quickwit/quickwit-storage/src/metrics.rs @@ -19,7 +19,16 @@ use std::collections::HashMap; use std::sync::{LazyLock, RwLock}; use quickwit_config::CacheConfig; -use quickwit_metrics::{Counter, Gauge, GaugeGuard, Histogram, counter, gauge, histogram}; +use quickwit_metrics::{ + Counter, Gauge, GaugeGuard, Histogram, LabelNames, Labels, counter, gauge, histogram, + label_names, label_values, labels, +}; + +const ACTION_DELETE_OBJECT: Labels<1> = labels!("action" => "delete_object"); +const OUTCOME: LabelNames<1> = label_names!("outcome"); +const COMPONENT_NAME: LabelNames<1> = label_names!("component_name"); +const COMPONENT_CAPACITY_POLICY: LabelNames<3> = + label_names!("component_name", "capacity", "policy"); static GET_SLICE_TIMEOUT_OUTCOME_TOTAL: LazyLock = LazyLock::new(|| { counter!( @@ -29,34 +38,21 @@ static GET_SLICE_TIMEOUT_OUTCOME_TOTAL: LazyLock = LazyLock::new(|| { ) }); -pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_0_TIMEOUT: LazyLock = LazyLock::new(|| { - counter!( - parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, - "outcome" => "success_after_0_timeout", - ) -}); +pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_0_TIMEOUT: LazyLock = LazyLock::new( + || counter!(parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, labels: [label_values!(OUTCOME => "success_after_0_timeout")]), +); -pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_1_TIMEOUT: LazyLock = LazyLock::new(|| { - counter!( - parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, - "outcome" => "success_after_1_timeout", - ) -}); +pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_1_TIMEOUT: LazyLock = LazyLock::new( + || counter!(parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, labels: [label_values!(OUTCOME => "success_after_1_timeout")]), +); -pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_2_PLUS_TIMEOUT: LazyLock = - LazyLock::new(|| { - counter!( - parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, - "outcome" => "success_after_2+_timeout", - ) - }); +pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_2_PLUS_TIMEOUT: LazyLock = LazyLock::new( + || counter!(parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, labels: [label_values!(OUTCOME => "success_after_2+_timeout")]), +); -pub static GET_SLICE_TIMEOUT_ALL_TIMEOUTS: LazyLock = LazyLock::new(|| { - counter!( - parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, - "outcome" => "all_timeouts", - ) -}); +pub static GET_SLICE_TIMEOUT_ALL_TIMEOUTS: LazyLock = LazyLock::new( + || counter!(parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, labels: [label_values!(OUTCOME => "all_timeouts")]), +); static OBJECT_STORAGE_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { counter!( @@ -75,33 +71,21 @@ static OBJECT_STORAGE_REQUEST_DURATION: LazyLock = LazyLock::new(|| { ) }); -pub static OBJECT_STORAGE_DELETE_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { - counter!( - parent: OBJECT_STORAGE_REQUESTS_TOTAL, - "action" => "delete_object", - ) -}); +pub static OBJECT_STORAGE_DELETE_REQUESTS_TOTAL: LazyLock = LazyLock::new( + || counter!(parent: OBJECT_STORAGE_REQUESTS_TOTAL, labels: [ACTION_DELETE_OBJECT]), +); -pub static OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL: LazyLock = LazyLock::new(|| { - counter!( - parent: OBJECT_STORAGE_REQUESTS_TOTAL, - "action" => "delete_objects", - ) -}); +pub static OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL: LazyLock = LazyLock::new( + || counter!(parent: OBJECT_STORAGE_REQUESTS_TOTAL, labels: [ACTION_DELETE_OBJECT]), +); -pub static OBJECT_STORAGE_DELETE_REQUEST_DURATION: LazyLock = LazyLock::new(|| { - histogram!( - parent: OBJECT_STORAGE_REQUEST_DURATION, - "action" => "delete_object", - ) -}); +pub static OBJECT_STORAGE_DELETE_REQUEST_DURATION: LazyLock = LazyLock::new( + || histogram!(parent: OBJECT_STORAGE_REQUEST_DURATION, labels: [ACTION_DELETE_OBJECT]), +); -pub static OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION: LazyLock = LazyLock::new(|| { - histogram!( - parent: OBJECT_STORAGE_REQUEST_DURATION, - "action" => "delete_objects", - ) -}); +pub static OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION: LazyLock = LazyLock::new( + || histogram!(parent: OBJECT_STORAGE_REQUEST_DURATION, labels: [ACTION_DELETE_OBJECT]), +); pub static OBJECT_STORAGE_GET_TOTAL: LazyLock = LazyLock::new(|| { counter!( @@ -147,7 +131,7 @@ pub static OBJECT_STORAGE_PUT_PARTS: LazyLock = LazyLock::new(|| { counter!( name: "object_storage_puts_parts", description: "Number of object parts uploaded.", - subsystem: "", + subsystem: "storage", ) }); @@ -188,16 +172,17 @@ pub struct SingleCacheMetrics { impl CacheMetrics { pub fn for_component(component_name: &str) -> Self { let component_name = component_name.to_string(); + let labels = label_values!(COMPONENT_NAME => component_name.clone()); CacheMetrics { - component_name: component_name.clone(), + component_name, cache_metrics: SingleCacheMetrics { - in_cache_count: gauge!(parent: CACHE_IN_CACHE_COUNT, "component_name" => component_name.clone()), - in_cache_num_bytes: gauge!(parent: CACHE_IN_CACHE_NUM_BYTES, "component_name" => component_name.clone()), - hits_num_items: counter!(parent: CACHE_HITS_TOTAL, "component_name" => component_name.clone()), - hits_num_bytes: counter!(parent: CACHE_HITS_BYTES, "component_name" => component_name.clone()), - misses_num_items: counter!(parent: CACHE_MISSES_TOTAL, "component_name" => component_name.clone()), - evict_num_items: counter!(parent: CACHE_EVICT_TOTAL, "component_name" => component_name.clone()), - evict_num_bytes: counter!(parent: CACHE_EVICT_BYTES, "component_name" => component_name), + in_cache_count: gauge!(parent: CACHE_IN_CACHE_COUNT, labels: [labels]), + in_cache_num_bytes: gauge!(parent: CACHE_IN_CACHE_NUM_BYTES, labels: [labels]), + hits_num_items: counter!(parent: CACHE_HITS_TOTAL, labels: [labels]), + hits_num_bytes: counter!(parent: CACHE_HITS_BYTES, labels: [labels]), + misses_num_items: counter!(parent: CACHE_MISSES_TOTAL, labels: [labels]), + evict_num_items: counter!(parent: CACHE_EVICT_TOTAL, labels: [labels]), + evict_num_bytes: counter!(parent: CACHE_EVICT_BYTES, labels: [labels]), }, virtual_caches_metrics: RwLock::default(), } @@ -209,52 +194,20 @@ impl CacheMetrics { return virtual_cache_metrics.clone(); } - let capacity = config.capacity().as_u64().to_string(); - let policy = config.policy().to_string(); - let component_name = self.component_name.clone(); + let labels = label_values!( + COMPONENT_CAPACITY_POLICY => + self.component_name.clone(), + config.capacity().as_u64().to_string(), + config.policy().to_string(), + ); let new_virtual_cache_metrics = SingleCacheMetrics { - in_cache_count: gauge!( - parent: VIRTUAL_CACHE_IN_CACHE_COUNT, - "component_name" => component_name.clone(), - "capacity" => capacity.clone(), - "policy" => policy.clone(), - ), - in_cache_num_bytes: gauge!( - parent: VIRTUAL_CACHE_IN_CACHE_NUM_BYTES, - "component_name" => component_name.clone(), - "capacity" => capacity.clone(), - "policy" => policy.clone(), - ), - hits_num_items: counter!( - parent: VIRTUAL_CACHE_HITS_TOTAL, - "component_name" => component_name.clone(), - "capacity" => capacity.clone(), - "policy" => policy.clone(), - ), - hits_num_bytes: counter!( - parent: VIRTUAL_CACHE_HITS_BYTES, - "component_name" => component_name.clone(), - "capacity" => capacity.clone(), - "policy" => policy.clone(), - ), - misses_num_items: counter!( - parent: VIRTUAL_CACHE_MISSES_TOTAL, - "component_name" => component_name.clone(), - "capacity" => capacity.clone(), - "policy" => policy.clone(), - ), - evict_num_items: counter!( - parent: VIRTUAL_CACHE_EVICT_TOTAL, - "component_name" => component_name.clone(), - "capacity" => capacity.clone(), - "policy" => policy.clone(), - ), - evict_num_bytes: counter!( - parent: VIRTUAL_CACHE_EVICT_BYTES, - "component_name" => component_name, - "capacity" => capacity, - "policy" => policy, - ), + in_cache_count: gauge!(parent: VIRTUAL_CACHE_IN_CACHE_COUNT, labels: [labels]), + in_cache_num_bytes: gauge!(parent: VIRTUAL_CACHE_IN_CACHE_NUM_BYTES, labels: [labels]), + hits_num_items: counter!(parent: VIRTUAL_CACHE_HITS_TOTAL, labels: [labels]), + hits_num_bytes: counter!(parent: VIRTUAL_CACHE_HITS_BYTES, labels: [labels]), + misses_num_items: counter!(parent: VIRTUAL_CACHE_MISSES_TOTAL, labels: [labels]), + evict_num_items: counter!(parent: VIRTUAL_CACHE_EVICT_TOTAL, labels: [labels]), + evict_num_bytes: counter!(parent: VIRTUAL_CACHE_EVICT_BYTES, labels: [labels]), }; self.virtual_caches_metrics From 9a58858586eadff4dd10ac6da19d0006a4d3ea20 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 17:46:09 +0200 Subject: [PATCH 49/54] Expose quickwit-storage metrics module publicly, remove re-exports Make `metrics` a `pub mod` instead of re-exporting individual symbols from the crate root. Internal consumers now import directly from `crate::metrics::`, external consumers from `quickwit_storage::metrics::`. Co-authored-by: Cursor --- .../src/caching_directory.rs | 2 +- quickwit/quickwit-search/src/leaf.rs | 2 +- quickwit/quickwit-search/src/leaf_cache.rs | 4 +- .../quickwit-search/src/list_fields_cache.rs | 2 +- quickwit/quickwit-search/src/list_terms.rs | 2 +- quickwit/quickwit-search/src/service.rs | 2 +- quickwit/quickwit-serve/src/rest.rs | 19 ++++----- .../src/cache/quickwit_cache.rs | 3 +- .../src/file_descriptor_cache.rs | 2 +- quickwit/quickwit-storage/src/lib.rs | 15 +------ quickwit/quickwit-storage/src/metrics.rs | 40 +++++++++---------- .../src/object_storage/azure_blob_storage.rs | 14 +++---- .../src/object_storage/error.rs | 3 +- .../object_storage/s3_compatible_storage.rs | 24 +++++------ .../src/opendal_storage/base.rs | 16 ++++---- .../quickwit-storage/src/split_cache/mod.rs | 2 +- .../src/timeout_and_retry_storage.rs | 8 ++-- 17 files changed, 75 insertions(+), 85 deletions(-) diff --git a/quickwit/quickwit-directories/src/caching_directory.rs b/quickwit/quickwit-directories/src/caching_directory.rs index 4da1981eed2..c40ac3ec760 100644 --- a/quickwit/quickwit-directories/src/caching_directory.rs +++ b/quickwit/quickwit-directories/src/caching_directory.rs @@ -38,7 +38,7 @@ impl CachingDirectory { /// removing any item from the cache. pub fn new_unbounded(underlying: Arc) -> CachingDirectory { let byte_range_cache = - ByteRangeCache::with_infinite_capacity(&quickwit_storage::SHORTLIVED_CACHE); + ByteRangeCache::with_infinite_capacity(&quickwit_storage::metrics::SHORTLIVED_CACHE); CachingDirectory::new(underlying, byte_range_cache) } diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index 278848178e8..88551de2b7d 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -533,7 +533,7 @@ async fn leaf_search_single_split( let split_id = split.split_id.to_string(); let byte_range_cache = - ByteRangeCache::with_infinite_capacity(&quickwit_storage::SHORTLIVED_CACHE); + ByteRangeCache::with_infinite_capacity(&quickwit_storage::metrics::SHORTLIVED_CACHE); let (index, hot_directory) = open_index_with_caches( &ctx.searcher_context, storage, diff --git a/quickwit/quickwit-search/src/leaf_cache.rs b/quickwit/quickwit-search/src/leaf_cache.rs index 3b492c1342a..2407d2031c5 100644 --- a/quickwit/quickwit-search/src/leaf_cache.rs +++ b/quickwit/quickwit-search/src/leaf_cache.rs @@ -48,7 +48,7 @@ impl LeafSearchCache { LeafSearchCache { content: MemorySizedCache::from_config( config, - &quickwit_storage::PARTIAL_REQUEST_CACHE, + &quickwit_storage::metrics::PARTIAL_REQUEST_CACHE, ), } } @@ -195,7 +195,7 @@ pub struct PredicateCacheImpl { impl PredicateCacheImpl { pub fn new(config: &CacheConfig) -> Self { PredicateCacheImpl { - content: MemorySizedCache::from_config(config, &quickwit_storage::PREDICATE_CACHE), + content: MemorySizedCache::from_config(config, &quickwit_storage::metrics::PREDICATE_CACHE), } } } diff --git a/quickwit/quickwit-search/src/list_fields_cache.rs b/quickwit/quickwit-search/src/list_fields_cache.rs index 6066d6cb9f6..3077fa35189 100644 --- a/quickwit/quickwit-search/src/list_fields_cache.rs +++ b/quickwit/quickwit-search/src/list_fields_cache.rs @@ -31,7 +31,7 @@ impl ListFieldsCache { ListFieldsCache { content: MemorySizedCache::from_config( config, - &quickwit_storage::PARTIAL_REQUEST_CACHE, + &quickwit_storage::metrics::PARTIAL_REQUEST_CACHE, ), } } diff --git a/quickwit/quickwit-search/src/list_terms.rs b/quickwit/quickwit-search/src/list_terms.rs index bc53c4b6279..8b6724bd900 100644 --- a/quickwit/quickwit-search/src/list_terms.rs +++ b/quickwit/quickwit-search/src/list_terms.rs @@ -216,7 +216,7 @@ async fn leaf_list_terms_single_split( storage: Arc, split: SplitIdAndFooterOffsets, ) -> crate::Result { - let cache = ByteRangeCache::with_infinite_capacity(&quickwit_storage::SHORTLIVED_CACHE); + let cache = ByteRangeCache::with_infinite_capacity(&quickwit_storage::metrics::SHORTLIVED_CACHE); let (index, _) = open_index_with_caches(searcher_context, storage, &split, None, Some(cache)).await?; let split_schema = index.schema(); diff --git a/quickwit/quickwit-search/src/service.rs b/quickwit/quickwit-search/src/service.rs index 37cc3ecd483..f9a95dcda10 100644 --- a/quickwit/quickwit-search/src/service.rs +++ b/quickwit/quickwit-search/src/service.rs @@ -459,7 +459,7 @@ impl SearcherContext { ) -> Self { let global_split_footer_cache = MemorySizedCache::from_config( &searcher_config.split_footer_cache, - &quickwit_storage::SPLIT_FOOTER_CACHE, + &quickwit_storage::metrics::SPLIT_FOOTER_CACHE, ); let leaf_search_split_semaphore = SearchPermitProvider::new( searcher_config.max_num_concurrent_split_searches, diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index 84eb103dc60..af941ab4bc2 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -21,7 +21,7 @@ use hyper_util::server::conn::auto::Builder; use hyper_util::service::TowerToHyperService; use quickwit_common::tower::BoxFutureInfaillible; use quickwit_config::{disable_ingest_v1, enable_ingest_v2}; -use quickwit_metrics::{counter, histogram}; +use quickwit_metrics::{counter, histogram, labels}; use quickwit_search::SearchService; use tokio::io::{AsyncRead, AsyncWrite}; use tokio::net::{TcpListener, TcpStream}; @@ -106,7 +106,9 @@ impl CompressionPredicate { impl Predicate for CompressionPredicate { fn should_compress(&self, response: &http::Response) -> bool - where B: http_body::Body { + where + B: http_body::Body, + { if let Some(size_above) = self.size_above_opt { size_above.should_compress(response) } else { @@ -138,19 +140,18 @@ pub(crate) async fn start_rest_server( ) -> anyhow::Result<()> { let request_counter = warp::log::custom(|info: Info| { let elapsed = info.elapsed(); - let status = info.status(); - let method = info.method().as_str().to_string(); - let status_code = status.as_str().to_string(); + let labels = labels!( + "method" => info.method().as_str().to_string(), + "status_code" => info.status().as_str().to_string() + ); histogram!( parent: REQUEST_DURATION_SECS, - "method" => method.clone(), - "status_code" => status_code.clone(), + labels: [labels], ) .record(elapsed.as_secs_f64()); counter!( parent: HTTP_REQUESTS_TOTAL, - "method" => method, - "status_code" => status_code, + labels: [labels], ) .increment(1); }); diff --git a/quickwit/quickwit-storage/src/cache/quickwit_cache.rs b/quickwit/quickwit-storage/src/cache/quickwit_cache.rs index 37144164edd..c89cb1c3db1 100644 --- a/quickwit/quickwit-storage/src/cache/quickwit_cache.rs +++ b/quickwit/quickwit-storage/src/cache/quickwit_cache.rs @@ -21,7 +21,8 @@ use quickwit_config::CacheConfig; use crate::cache::{MemorySizedCache, StorageCache}; use crate::metrics::CacheMetrics; -use crate::{FAST_FIELD_CACHE, OwnedBytes}; +use crate::metrics::FAST_FIELD_CACHE; +use crate::OwnedBytes; const FULL_SLICE: Range = 0..usize::MAX; diff --git a/quickwit/quickwit-storage/src/file_descriptor_cache.rs b/quickwit/quickwit-storage/src/file_descriptor_cache.rs index 175e6f1f220..b873a47375a 100644 --- a/quickwit/quickwit-storage/src/file_descriptor_cache.rs +++ b/quickwit/quickwit-storage/src/file_descriptor_cache.rs @@ -23,7 +23,7 @@ use tantivy::directory::OwnedBytes; use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use ulid::Ulid; -use crate::FD_CACHE_METRICS; +use crate::metrics::FD_CACHE_METRICS; use crate::metrics::SingleCacheMetrics; pub struct FileDescriptorCache { diff --git a/quickwit/quickwit-storage/src/lib.rs b/quickwit/quickwit-storage/src/lib.rs index 261217395b9..0e1b1e376d4 100644 --- a/quickwit/quickwit-storage/src/lib.rs +++ b/quickwit/quickwit-storage/src/lib.rs @@ -28,24 +28,11 @@ mod cache; mod debouncer; mod file_descriptor_cache; -mod metrics; +pub mod metrics; mod storage; mod timeout_and_retry_storage; pub use debouncer::AsyncDebouncer; pub(crate) use debouncer::DebouncedStorage; - -pub use self::metrics::{ - FAST_FIELD_CACHE, FD_CACHE_METRICS, GET_SLICE_TIMEOUT_ALL_TIMEOUTS, - GET_SLICE_TIMEOUT_SUCCESS_AFTER_0_TIMEOUT, GET_SLICE_TIMEOUT_SUCCESS_AFTER_1_TIMEOUT, - GET_SLICE_TIMEOUT_SUCCESS_AFTER_2_PLUS_TIMEOUT, OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION, - OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL, OBJECT_STORAGE_DELETE_REQUEST_DURATION, - OBJECT_STORAGE_DELETE_REQUESTS_TOTAL, OBJECT_STORAGE_DOWNLOAD_NUM_BYTES, - OBJECT_STORAGE_GET_ERRORS_TOTAL, OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT, - OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES, OBJECT_STORAGE_GET_TOTAL, - OBJECT_STORAGE_PUT_PARTS, OBJECT_STORAGE_PUT_TOTAL, OBJECT_STORAGE_UPLOAD_NUM_BYTES, - PARTIAL_REQUEST_CACHE, PREDICATE_CACHE, SEARCHER_SPLIT_CACHE, SHORTLIVED_CACHE, - SPLIT_FOOTER_CACHE, -}; pub use self::payload::PutPayload; pub use self::storage::Storage; diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs index 555550bce98..1668209c641 100644 --- a/quickwit/quickwit-storage/src/metrics.rs +++ b/quickwit/quickwit-storage/src/metrics.rs @@ -38,19 +38,19 @@ static GET_SLICE_TIMEOUT_OUTCOME_TOTAL: LazyLock = LazyLock::new(|| { ) }); -pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_0_TIMEOUT: LazyLock = LazyLock::new( +pub(crate) static GET_SLICE_TIMEOUT_SUCCESS_AFTER_0_TIMEOUT: LazyLock = LazyLock::new( || counter!(parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, labels: [label_values!(OUTCOME => "success_after_0_timeout")]), ); -pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_1_TIMEOUT: LazyLock = LazyLock::new( +pub(crate) static GET_SLICE_TIMEOUT_SUCCESS_AFTER_1_TIMEOUT: LazyLock = LazyLock::new( || counter!(parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, labels: [label_values!(OUTCOME => "success_after_1_timeout")]), ); -pub static GET_SLICE_TIMEOUT_SUCCESS_AFTER_2_PLUS_TIMEOUT: LazyLock = LazyLock::new( +pub(crate) static GET_SLICE_TIMEOUT_SUCCESS_AFTER_2_PLUS_TIMEOUT: LazyLock = LazyLock::new( || counter!(parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, labels: [label_values!(OUTCOME => "success_after_2+_timeout")]), ); -pub static GET_SLICE_TIMEOUT_ALL_TIMEOUTS: LazyLock = LazyLock::new( +pub(crate) static GET_SLICE_TIMEOUT_ALL_TIMEOUTS: LazyLock = LazyLock::new( || counter!(parent: GET_SLICE_TIMEOUT_OUTCOME_TOTAL, labels: [label_values!(OUTCOME => "all_timeouts")]), ); @@ -71,23 +71,23 @@ static OBJECT_STORAGE_REQUEST_DURATION: LazyLock = LazyLock::new(|| { ) }); -pub static OBJECT_STORAGE_DELETE_REQUESTS_TOTAL: LazyLock = LazyLock::new( +pub(crate) static OBJECT_STORAGE_DELETE_REQUESTS_TOTAL: LazyLock = LazyLock::new( || counter!(parent: OBJECT_STORAGE_REQUESTS_TOTAL, labels: [ACTION_DELETE_OBJECT]), ); -pub static OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL: LazyLock = LazyLock::new( +pub(crate) static OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL: LazyLock = LazyLock::new( || counter!(parent: OBJECT_STORAGE_REQUESTS_TOTAL, labels: [ACTION_DELETE_OBJECT]), ); -pub static OBJECT_STORAGE_DELETE_REQUEST_DURATION: LazyLock = LazyLock::new( +pub(crate) static OBJECT_STORAGE_DELETE_REQUEST_DURATION: LazyLock = LazyLock::new( || histogram!(parent: OBJECT_STORAGE_REQUEST_DURATION, labels: [ACTION_DELETE_OBJECT]), ); -pub static OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION: LazyLock = LazyLock::new( +pub(crate) static OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION: LazyLock = LazyLock::new( || histogram!(parent: OBJECT_STORAGE_REQUEST_DURATION, labels: [ACTION_DELETE_OBJECT]), ); -pub static OBJECT_STORAGE_GET_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static OBJECT_STORAGE_GET_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "object_storage_gets_total", description: "Number of objects fetched. Might be lower than get_slice_timeout_outcome if queries are debounced.", @@ -95,7 +95,7 @@ pub static OBJECT_STORAGE_GET_TOTAL: LazyLock = LazyLock::new(|| { ) }); -pub static OBJECT_STORAGE_GET_ERRORS_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static OBJECT_STORAGE_GET_ERRORS_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "object_storage_get_errors_total", description: "Number of GetObject errors.", @@ -103,7 +103,7 @@ pub static OBJECT_STORAGE_GET_ERRORS_TOTAL: LazyLock = LazyLock::new(|| ) }); -pub static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT: LazyLock = LazyLock::new(|| { +pub(crate) static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT: LazyLock = LazyLock::new(|| { gauge!( name: "object_storage_get_slice_in_flight_count", description: "Number of GetObject for which the memory was allocated but the download is still in progress.", @@ -111,7 +111,7 @@ pub static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT: LazyLock = LazyLock: ) }); -pub static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES: LazyLock = LazyLock::new(|| { +pub(crate) static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES: LazyLock = LazyLock::new(|| { gauge!( name: "object_storage_get_slice_in_flight_num_bytes", description: "Memory allocated for GetObject requests that are still in progress.", @@ -119,7 +119,7 @@ pub static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES: LazyLock = LazyL ) }); -pub static OBJECT_STORAGE_PUT_TOTAL: LazyLock = LazyLock::new(|| { +pub(crate) static OBJECT_STORAGE_PUT_TOTAL: LazyLock = LazyLock::new(|| { counter!( name: "object_storage_puts_total", description: "Number of objects uploaded. May differ from object_storage_requests_parts due to multipart upload.", @@ -127,7 +127,7 @@ pub static OBJECT_STORAGE_PUT_TOTAL: LazyLock = LazyLock::new(|| { ) }); -pub static OBJECT_STORAGE_PUT_PARTS: LazyLock = LazyLock::new(|| { +pub(crate) static OBJECT_STORAGE_PUT_PARTS: LazyLock = LazyLock::new(|| { counter!( name: "object_storage_puts_parts", description: "Number of object parts uploaded.", @@ -135,7 +135,7 @@ pub static OBJECT_STORAGE_PUT_PARTS: LazyLock = LazyLock::new(|| { ) }); -pub static OBJECT_STORAGE_DOWNLOAD_NUM_BYTES: LazyLock = LazyLock::new(|| { +pub(crate) static OBJECT_STORAGE_DOWNLOAD_NUM_BYTES: LazyLock = LazyLock::new(|| { counter!( name: "object_storage_download_num_bytes", description: "Amount of data downloaded from an object storage.", @@ -143,7 +143,7 @@ pub static OBJECT_STORAGE_DOWNLOAD_NUM_BYTES: LazyLock = LazyLock::new( ) }); -pub static OBJECT_STORAGE_UPLOAD_NUM_BYTES: LazyLock = LazyLock::new(|| { +pub(crate) static OBJECT_STORAGE_UPLOAD_NUM_BYTES: LazyLock = LazyLock::new(|| { counter!( name: "object_storage_upload_num_bytes", description: "Amount of data uploaded to an object storage.", @@ -331,10 +331,10 @@ static VIRTUAL_CACHE_EVICT_BYTES: LazyLock = LazyLock::new(|| { ) }); -pub static FAST_FIELD_CACHE: LazyLock = +pub(crate) static FAST_FIELD_CACHE: LazyLock = LazyLock::new(|| CacheMetrics::for_component("fastfields")); -pub static FD_CACHE_METRICS: LazyLock = +pub(crate) static FD_CACHE_METRICS: LazyLock = LazyLock::new(|| CacheMetrics::for_component("fd")); pub static PARTIAL_REQUEST_CACHE: LazyLock = @@ -343,7 +343,7 @@ pub static PARTIAL_REQUEST_CACHE: LazyLock = pub static PREDICATE_CACHE: LazyLock = LazyLock::new(|| CacheMetrics::for_component("predicate")); -pub static SEARCHER_SPLIT_CACHE: LazyLock = +pub(crate) static SEARCHER_SPLIT_CACHE: LazyLock = LazyLock::new(|| CacheMetrics::for_component("searcher_split")); pub static SHORTLIVED_CACHE: LazyLock = @@ -356,7 +356,7 @@ pub static SPLIT_FOOTER_CACHE: LazyLock = pub static CACHE_METRICS_FOR_TESTS: LazyLock = LazyLock::new(|| CacheMetrics::for_component("fortest")); -pub fn object_storage_get_slice_in_flight_guards( +pub(crate) fn object_storage_get_slice_in_flight_guards( get_request_size: usize, ) -> (GaugeGuard, GaugeGuard) { let bytes_guard = GaugeGuard::new( diff --git a/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs b/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs index 1ca91d9bdcf..639f4292f4a 100644 --- a/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs +++ b/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs @@ -240,8 +240,8 @@ impl AzureBlobStorage { name: &'a str, payload: Box, ) -> StorageResult<()> { - crate::OBJECT_STORAGE_PUT_PARTS.increment(1); - crate::OBJECT_STORAGE_UPLOAD_NUM_BYTES.increment(payload.len()); + crate::metrics::OBJECT_STORAGE_PUT_PARTS.increment(1); + crate::metrics::OBJECT_STORAGE_UPLOAD_NUM_BYTES.increment(payload.len()); retry(&self.retry_params, || async { let data = Bytes::from(payload.read_all().await?.to_vec()); let hash = azure_storage_blobs::prelude::Hash::from(md5::compute(&data[..]).0); @@ -274,8 +274,8 @@ impl AzureBlobStorage { .map(|(num, range)| { let moved_blob_client = blob_client.clone(); let moved_payload = payload.clone(); - crate::OBJECT_STORAGE_PUT_PARTS.increment(1); - crate::OBJECT_STORAGE_UPLOAD_NUM_BYTES.increment(range.end - range.start); + crate::metrics::OBJECT_STORAGE_PUT_PARTS.increment(1); + crate::metrics::OBJECT_STORAGE_UPLOAD_NUM_BYTES.increment(range.end - range.start); async move { retry(&self.retry_params, || async { // zero pad block ids to make them sortable as strings @@ -345,7 +345,7 @@ impl Storage for AzureBlobStorage { path: &Path, payload: Box, ) -> crate::StorageResult<()> { - crate::OBJECT_STORAGE_PUT_TOTAL.increment(1); + crate::metrics::OBJECT_STORAGE_PUT_TOTAL.increment(1); let name = self.blob_name(path); let total_len = payload.len(); let part_num_bytes = self.multipart_policy.part_num_bytes(total_len); @@ -372,7 +372,7 @@ impl Storage for AzureBlobStorage { .compat(); let mut body_stream_reader = BufReader::new(chunk_response_body_stream); let num_bytes_copied = tokio::io::copy_buf(&mut body_stream_reader, output).await?; - crate::OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.increment(num_bytes_copied); + crate::metrics::OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.increment(num_bytes_copied); } output.flush().await?; Ok(()) @@ -571,7 +571,7 @@ async fn download_all( segments.push(bytes); } } - crate::OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.increment(total_num_bytes as u64); + crate::metrics::OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.increment(total_num_bytes as u64); Ok(coalesce_segments(segments, total_num_bytes)) } diff --git a/quickwit/quickwit-storage/src/object_storage/error.rs b/quickwit/quickwit-storage/src/object_storage/error.rs index 9c80cbef34a..9b7720b51ec 100644 --- a/quickwit/quickwit-storage/src/object_storage/error.rs +++ b/quickwit/quickwit-storage/src/object_storage/error.rs @@ -24,7 +24,8 @@ use aws_sdk_s3::operation::put_object::PutObjectError; use aws_sdk_s3::operation::upload_part::UploadPartError; use quickwit_metrics::counter; -use crate::{OBJECT_STORAGE_GET_ERRORS_TOTAL, StorageError, StorageErrorKind}; +use crate::metrics::OBJECT_STORAGE_GET_ERRORS_TOTAL; +use crate::{StorageError, StorageErrorKind}; impl From> for StorageError where E: std::error::Error + ToStorageErrorKind + Send + Sync + 'static diff --git a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs index d8528a27922..4ac7e3ba3cb 100644 --- a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs +++ b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs @@ -301,8 +301,8 @@ impl S3CompatibleObjectStorage { .await .map_err(|io_error| Retry::Permanent(StorageError::from(io_error)))?; - crate::OBJECT_STORAGE_PUT_PARTS.increment(1); - crate::OBJECT_STORAGE_UPLOAD_NUM_BYTES.increment(len); + crate::metrics::OBJECT_STORAGE_PUT_PARTS.increment(1); + crate::metrics::OBJECT_STORAGE_UPLOAD_NUM_BYTES.increment(len); self.s3_client .put_object() @@ -434,8 +434,8 @@ impl S3CompatibleObjectStorage { .map_err(Retry::Permanent)?; let md5 = BASE64_STANDARD.encode(part.md5.0); - crate::OBJECT_STORAGE_PUT_PARTS.increment(1); - crate::OBJECT_STORAGE_UPLOAD_NUM_BYTES.increment(part.len()); + crate::metrics::OBJECT_STORAGE_PUT_PARTS.increment(1); + crate::metrics::OBJECT_STORAGE_UPLOAD_NUM_BYTES.increment(part.len()); let upload_part_output = self .s3_client @@ -555,7 +555,7 @@ impl S3CompatibleObjectStorage { let key = self.key(path); let range_str = range_opt.map(|range| format!("bytes={}-{}", range.start, range.end - 1)); - crate::OBJECT_STORAGE_GET_TOTAL.increment(1); + crate::metrics::OBJECT_STORAGE_GET_TOTAL.increment(1); let get_object_output = self .s3_client @@ -648,9 +648,9 @@ impl S3CompatibleObjectStorage { for (path_chunk, delete) in &mut delete_requests_it { let delete_objects_res: StorageResult = aws_retry(&self.retry_params, || async { - crate::OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL.increment(1); + crate::metrics::OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL.increment(1); let _timer = - HistogramTimer::new(&crate::OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION); + HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION); self.s3_client .delete_objects() .bucket(self.bucket.clone()) @@ -726,7 +726,7 @@ async fn download_all(byte_stream: ByteStream) -> StorageResult { // `AggregatedBytes::into_bytes` returns the underlying `Bytes` without copying when the body // was received as a single segment, and concatenates into a fresh `Bytes` otherwise. let bytes = aggregated.into_bytes(); - crate::OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.increment(bytes.len() as u64); + crate::metrics::OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.increment(bytes.len() as u64); Ok(bytes) } @@ -766,7 +766,7 @@ impl Storage for S3CompatibleObjectStorage { path: &Path, payload: Box, ) -> crate::StorageResult<()> { - crate::OBJECT_STORAGE_PUT_TOTAL.increment(1); + crate::metrics::OBJECT_STORAGE_PUT_TOTAL.increment(1); let _permit = REQUEST_SEMAPHORE.acquire().await; let key = self.key(path); let total_len = payload.len(); @@ -786,7 +786,7 @@ impl Storage for S3CompatibleObjectStorage { aws_retry(&self.retry_params, || self.get_object(path, None)).await?; let mut body_read = BufReader::new(get_object_output.body.into_async_read()); let num_bytes_copied = tokio::io::copy_buf(&mut body_read, output).await?; - crate::OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.increment(num_bytes_copied); + crate::metrics::OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.increment(num_bytes_copied); output.flush().await?; Ok(()) } @@ -796,8 +796,8 @@ impl Storage for S3CompatibleObjectStorage { let bucket = self.bucket.clone(); let key = self.key(path); let delete_res = aws_retry(&self.retry_params, || async { - crate::OBJECT_STORAGE_DELETE_REQUESTS_TOTAL.increment(1); - let _timer = HistogramTimer::new(&crate::OBJECT_STORAGE_DELETE_REQUEST_DURATION); + crate::metrics::OBJECT_STORAGE_DELETE_REQUESTS_TOTAL.increment(1); + let _timer = HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_DELETE_REQUEST_DURATION); self.s3_client .delete_object() .bucket(&bucket) diff --git a/quickwit/quickwit-storage/src/opendal_storage/base.rs b/quickwit/quickwit-storage/src/opendal_storage/base.rs index 88aec761265..71fb70add3f 100644 --- a/quickwit/quickwit-storage/src/opendal_storage/base.rs +++ b/quickwit/quickwit-storage/src/opendal_storage/base.rs @@ -81,7 +81,7 @@ impl Storage for OpendalStorage { } async fn put(&self, path: &Path, payload: Box) -> StorageResult<()> { - crate::OBJECT_STORAGE_PUT_TOTAL.increment(1); + crate::metrics::OBJECT_STORAGE_PUT_TOTAL.increment(1); let path = path.as_os_str().to_string_lossy(); let mut payload_reader = payload.byte_stream().await?.into_async_read(); @@ -94,7 +94,7 @@ impl Storage for OpendalStorage { .compat_write(); tokio::io::copy(&mut payload_reader, &mut storage_writer).await?; storage_writer.get_mut().close().await?; - crate::OBJECT_STORAGE_UPLOAD_NUM_BYTES.increment(payload.len()); + crate::metrics::OBJECT_STORAGE_UPLOAD_NUM_BYTES.increment(payload.len()); Ok(()) } @@ -108,7 +108,7 @@ impl Storage for OpendalStorage { .await? .compat(); let num_bytes_copied = tokio::io::copy(&mut storage_reader, output).await?; - crate::OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.increment(num_bytes_copied); + crate::metrics::OBJECT_STORAGE_DOWNLOAD_NUM_BYTES.increment(num_bytes_copied); output.flush().await?; Ok(()) } @@ -120,7 +120,7 @@ impl Storage for OpendalStorage { // Unlike other object store implementations, in flight requests are // recorded before issuing the query to the object store. let _inflight_guards = object_storage_get_slice_in_flight_guards(size); - crate::OBJECT_STORAGE_GET_TOTAL.increment(1); + crate::metrics::OBJECT_STORAGE_GET_TOTAL.increment(1); // `Buffer::to_bytes` is zero-copy when the underlying buffer is contiguous, and coalesces // into a single `Bytes` otherwise — avoiding the extra `Vec` round-trip `to_vec` would // perform. @@ -153,8 +153,8 @@ impl Storage for OpendalStorage { async fn delete(&self, path: &Path) -> StorageResult<()> { let path = path.as_os_str().to_string_lossy(); - crate::OBJECT_STORAGE_DELETE_REQUESTS_TOTAL.increment(1); - let _timer = HistogramTimer::new(&crate::OBJECT_STORAGE_DELETE_REQUEST_DURATION); + crate::metrics::OBJECT_STORAGE_DELETE_REQUESTS_TOTAL.increment(1); + let _timer = HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_DELETE_REQUEST_DURATION); self.op.delete(&path).await?; Ok(()) } @@ -168,9 +168,9 @@ impl Storage for OpendalStorage { if storage_info.name().starts_with("sample-bucket") && storage_info.scheme() == "gcs" { let mut bulk_error = BulkDeleteError::default(); for (index, path) in paths.iter().enumerate() { - crate::OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL.increment(1); + crate::metrics::OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL.increment(1); let _timer = - HistogramTimer::new(&crate::OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION); + HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION); let result = self.op.delete(&path.as_os_str().to_string_lossy()).await; if let Err(err) = result { let storage_error_kind = err.kind(); diff --git a/quickwit/quickwit-storage/src/split_cache/mod.rs b/quickwit/quickwit-storage/src/split_cache/mod.rs index 2eca3c1c2a7..ed11b1d7217 100644 --- a/quickwit/quickwit-storage/src/split_cache/mod.rs +++ b/quickwit/quickwit-storage/src/split_cache/mod.rs @@ -217,7 +217,7 @@ impl SplitCacheBackingStorage { } fn record_hit_metrics(&self, result_opt: Option<&OwnedBytes>) { - let split_metrics = &crate::SEARCHER_SPLIT_CACHE.cache_metrics; + let split_metrics = &crate::metrics::SEARCHER_SPLIT_CACHE.cache_metrics; if let Some(result) = result_opt { split_metrics.hits_num_items.increment(1); split_metrics.hits_num_bytes.increment(result.len() as u64); diff --git a/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs b/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs index 1841c0b0e0e..99c319644c0 100644 --- a/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs +++ b/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs @@ -98,9 +98,9 @@ impl Storage for TimeoutAndRetryStorage { match tokio::time::timeout(timeout_duration, get_slice_fut).await { Ok(result) => { match attempt_id { - 0 => crate::GET_SLICE_TIMEOUT_SUCCESS_AFTER_0_TIMEOUT.increment(1), - 1 => crate::GET_SLICE_TIMEOUT_SUCCESS_AFTER_1_TIMEOUT.increment(1), - _ => crate::GET_SLICE_TIMEOUT_SUCCESS_AFTER_2_PLUS_TIMEOUT.increment(1), + 0 => crate::metrics::GET_SLICE_TIMEOUT_SUCCESS_AFTER_0_TIMEOUT.increment(1), + 1 => crate::metrics::GET_SLICE_TIMEOUT_SUCCESS_AFTER_1_TIMEOUT.increment(1), + _ => crate::metrics::GET_SLICE_TIMEOUT_SUCCESS_AFTER_2_PLUS_TIMEOUT.increment(1), } return result; } @@ -111,7 +111,7 @@ impl Storage for TimeoutAndRetryStorage { } } rate_limited_warn!(limit_per_min=60, num_bytes=num_bytes, path=%path.display(), "all get_slice attempts timeouted"); - crate::GET_SLICE_TIMEOUT_ALL_TIMEOUTS.increment(1); + crate::metrics::GET_SLICE_TIMEOUT_ALL_TIMEOUTS.increment(1); return Err( StorageErrorKind::Timeout.with_error(anyhow::anyhow!("internal timeout on get_slice")) ); From 5b015bed7f81b02fab00593f996c339290bff935 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 18:02:11 +0200 Subject: [PATCH 50/54] Add rustdoc to storage metrics and tighten visibility Remove #![allow(missing_docs)], add proper rustdoc to CacheMetrics, SingleCacheMetrics, their methods, and the four public cache statics. Tighten field visibility to pub(crate) where only internal access is needed. Co-authored-by: Cursor --- quickwit/quickwit-storage/src/metrics.rs | 65 ++++++++++++++++-------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs index 1668209c641..78aecaa7ccf 100644 --- a/quickwit/quickwit-storage/src/metrics.rs +++ b/quickwit/quickwit-storage/src/metrics.rs @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// See https://prometheus.io/docs/practices/naming/ -#![allow(missing_docs)] +//! Storage and cache metrics following +//! [Prometheus naming conventions](https://prometheus.io/docs/practices/naming/). use std::collections::HashMap; use std::sync::{LazyLock, RwLock}; @@ -111,13 +111,14 @@ pub(crate) static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_COUNT: LazyLock = La ) }); -pub(crate) static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES: LazyLock = LazyLock::new(|| { - gauge!( - name: "object_storage_get_slice_in_flight_num_bytes", - description: "Memory allocated for GetObject requests that are still in progress.", - subsystem: "storage", - ) -}); +pub(crate) static OBJECT_STORAGE_GET_SLICE_IN_FLIGHT_NUM_BYTES: LazyLock = + LazyLock::new(|| { + gauge!( + name: "object_storage_get_slice_in_flight_num_bytes", + description: "Memory allocated for GetObject requests that are still in progress.", + subsystem: "storage", + ) + }); pub(crate) static OBJECT_STORAGE_PUT_TOTAL: LazyLock = LazyLock::new(|| { counter!( @@ -151,25 +152,39 @@ pub(crate) static OBJECT_STORAGE_UPLOAD_NUM_BYTES: LazyLock = LazyLock: ) }); -/// Counters associated to a cache. +/// Metrics for a named cache component (e.g. "shortlived", "splitfooter"). +/// +/// Each `CacheMetrics` instance holds a set of counters and gauges scoped to a +/// `component_name` label. It also supports virtual sub-caches keyed by +/// [`CacheConfig`], which add `capacity` and `policy` labels. pub struct CacheMetrics { - pub component_name: String, - pub cache_metrics: SingleCacheMetrics, + component_name: String, + pub(crate) cache_metrics: SingleCacheMetrics, virtual_caches_metrics: RwLock>, } +/// Per-cache counters and gauges tracking items in cache, hits, misses, and +/// evictions. #[derive(Clone)] pub struct SingleCacheMetrics { - pub in_cache_count: Gauge, - pub in_cache_num_bytes: Gauge, - pub hits_num_items: Counter, - pub hits_num_bytes: Counter, - pub misses_num_items: Counter, - pub evict_num_items: Counter, - pub evict_num_bytes: Counter, + /// Current number of items stored in the cache. + pub(crate) in_cache_count: Gauge, + /// Current number of bytes stored in the cache. + pub(crate) in_cache_num_bytes: Gauge, + /// Total number of cache hits (items). + pub(crate) hits_num_items: Counter, + /// Total number of cache hit bytes. + pub(crate) hits_num_bytes: Counter, + /// Total number of cache misses (items). + pub(crate) misses_num_items: Counter, + /// Total number of evicted items. + pub(crate) evict_num_items: Counter, + /// Total number of evicted bytes. + pub(crate) evict_num_bytes: Counter, } impl CacheMetrics { + /// Creates a new `CacheMetrics` for the given component name. pub fn for_component(component_name: &str) -> Self { let component_name = component_name.to_string(); let labels = label_values!(COMPONENT_NAME => component_name.clone()); @@ -188,7 +203,11 @@ impl CacheMetrics { } } - pub fn virtual_cache(&self, config: &CacheConfig) -> SingleCacheMetrics { + /// Returns metrics for a virtual sub-cache identified by `config`. + /// + /// Virtual caches share the same parent component but add `capacity` and + /// `policy` labels. Instances are cached and reused across calls. + pub(crate) fn virtual_cache(&self, config: &CacheConfig) -> SingleCacheMetrics { if let Some(virtual_cache_metrics) = self.virtual_caches_metrics.read().unwrap().get(config) { return virtual_cache_metrics.clone(); @@ -337,22 +356,28 @@ pub(crate) static FAST_FIELD_CACHE: LazyLock = pub(crate) static FD_CACHE_METRICS: LazyLock = LazyLock::new(|| CacheMetrics::for_component("fd")); +/// Cache metrics for partial-request byte ranges (used by leaf search caches). pub static PARTIAL_REQUEST_CACHE: LazyLock = LazyLock::new(|| CacheMetrics::for_component("partial_request")); +/// Cache metrics for predicate-evaluated content (used by leaf search caches). pub static PREDICATE_CACHE: LazyLock = LazyLock::new(|| CacheMetrics::for_component("predicate")); pub(crate) static SEARCHER_SPLIT_CACHE: LazyLock = LazyLock::new(|| CacheMetrics::for_component("searcher_split")); +/// Cache metrics for short-lived byte range caches (used during leaf search +/// and caching directory warmup). pub static SHORTLIVED_CACHE: LazyLock = LazyLock::new(|| CacheMetrics::for_component("shortlived")); +/// Cache metrics for split footer caches (used to cache split metadata). pub static SPLIT_FOOTER_CACHE: LazyLock = LazyLock::new(|| CacheMetrics::for_component("splitfooter")); #[cfg(test)] +/// Cache metrics for tests. pub static CACHE_METRICS_FOR_TESTS: LazyLock = LazyLock::new(|| CacheMetrics::for_component("fortest")); From 046be15cf8bf76340a997a725bc424463f89d213 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Tue, 5 May 2026 18:16:55 +0200 Subject: [PATCH 51/54] chore: cargo fmt --- quickwit/quickwit-common/src/tower/metrics.rs | 3 +- .../src/model/shard_table.rs | 3 +- .../quickwit-indexing/src/actors/uploader.rs | 31 +++---- .../quickwit-ingest/src/ingest_api_service.rs | 3 +- .../quickwit-ingest/src/ingest_v2/ingester.rs | 27 ++++-- quickwit/quickwit-jaeger/src/lib.rs | 14 +-- .../src/actors/garbage_collector.rs | 12 ++- .../benches/quickwit_metrics.rs | 90 ++++++++++++------- .../quickwit-metrics/examples/http_service.rs | 3 +- .../quickwit-opentelemetry/src/otlp/traces.rs | 3 +- .../src/ingest/processor.rs | 3 +- quickwit/quickwit-search/src/leaf_cache.rs | 5 +- quickwit/quickwit-search/src/list_terms.rs | 3 +- quickwit/quickwit-serve/src/rest.rs | 4 +- .../src/cache/quickwit_cache.rs | 5 +- .../src/file_descriptor_cache.rs | 3 +- quickwit/quickwit-storage/src/lib.rs | 1 + .../object_storage/s3_compatible_storage.rs | 8 +- .../src/opendal_storage/base.rs | 5 +- .../src/timeout_and_retry_storage.rs | 3 +- 20 files changed, 131 insertions(+), 98 deletions(-) diff --git a/quickwit/quickwit-common/src/tower/metrics.rs b/quickwit/quickwit-common/src/tower/metrics.rs index d8cb4d13599..756d7c30213 100644 --- a/quickwit/quickwit-common/src/tower/metrics.rs +++ b/quickwit/quickwit-common/src/tower/metrics.rs @@ -165,8 +165,7 @@ impl PinnedDrop for ResponseFuture { } impl Future for ResponseFuture -where - F: Future>, +where F: Future> { type Output = Result; diff --git a/quickwit/quickwit-control-plane/src/model/shard_table.rs b/quickwit/quickwit-control-plane/src/model/shard_table.rs index ba95d629735..e2909712844 100644 --- a/quickwit/quickwit-control-plane/src/model/shard_table.rs +++ b/quickwit/quickwit-control-plane/src/model/shard_table.rs @@ -465,8 +465,7 @@ impl ShardTable { if index_label == index_id { let shard_stats = table_entry.shards_stats(); let labels = label_values!(INDEX_ID_LABELS => index_label.to_string()); - gauge!(parent: OPEN_SHARDS, labels: [labels]) - .set(shard_stats.num_open_shards as f64); + gauge!(parent: OPEN_SHARDS, labels: [labels]).set(shard_stats.num_open_shards as f64); gauge!(parent: CLOSED_SHARDS, labels: [labels]) .set(shard_stats.num_closed_shards as f64); return; diff --git a/quickwit/quickwit-indexing/src/actors/uploader.rs b/quickwit/quickwit-indexing/src/actors/uploader.rs index 49a6124fd95..b432622e739 100644 --- a/quickwit/quickwit-indexing/src/actors/uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/uploader.rs @@ -200,21 +200,22 @@ impl Uploader { ctx: &ActorContext, ) -> anyhow::Result> { let _guard = ctx.protect_zone(); - let (concurrent_upload_permits_once_cell, concurrent_upload_permits_gauge) = - match self.uploader_type { - UploaderType::IndexUploader => ( - &CONCURRENT_UPLOAD_PERMITS_INDEX, - gauge!(parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, labels: [label_values!(COMPONENT => "indexer")]), - ), - UploaderType::MergeUploader => ( - &CONCURRENT_UPLOAD_PERMITS_MERGE, - gauge!(parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, labels: [label_values!(COMPONENT => "merger")]), - ), - UploaderType::DeleteUploader => ( - &CONCURRENT_UPLOAD_PERMITS_MERGE, - gauge!(parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, labels: [label_values!(COMPONENT => "merger")]), - ), - }; + let (concurrent_upload_permits_once_cell, concurrent_upload_permits_gauge) = match self + .uploader_type + { + UploaderType::IndexUploader => ( + &CONCURRENT_UPLOAD_PERMITS_INDEX, + gauge!(parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, labels: [label_values!(COMPONENT => "indexer")]), + ), + UploaderType::MergeUploader => ( + &CONCURRENT_UPLOAD_PERMITS_MERGE, + gauge!(parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, labels: [label_values!(COMPONENT => "merger")]), + ), + UploaderType::DeleteUploader => ( + &CONCURRENT_UPLOAD_PERMITS_MERGE, + gauge!(parent: AVAILABLE_CONCURRENT_UPLOAD_PERMITS, labels: [label_values!(COMPONENT => "merger")]), + ), + }; let concurrent_upload_permits = concurrent_upload_permits_once_cell .get_or_init(|| Semaphore::const_new(self.max_concurrent_split_uploads)); concurrent_upload_permits_gauge.set(concurrent_upload_permits.available_permits() as f64); diff --git a/quickwit/quickwit-ingest/src/ingest_api_service.rs b/quickwit/quickwit-ingest/src/ingest_api_service.rs index 4a6de0dad63..ba625417a6a 100644 --- a/quickwit/quickwit-ingest/src/ingest_api_service.rs +++ b/quickwit/quickwit-ingest/src/ingest_api_service.rs @@ -203,8 +203,7 @@ impl IngestApiService { num_docs += batch_num_docs; let labels = label_values!(VALIDITY => "valid"); - counter!(parent: DOCS_BYTES_TOTAL, labels: [labels]) - .increment(batch_num_bytes as u64); + counter!(parent: DOCS_BYTES_TOTAL, labels: [labels]).increment(batch_num_bytes as u64); counter!(parent: DOCS_TOTAL, labels: [labels]).increment(batch_num_docs as u64); } // TODO we could fsync here and disable autosync to have better i/o perfs. diff --git a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs index 07798d103ae..57a03a2ac6c 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs @@ -336,7 +336,8 @@ impl Ingester { counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, labels: [label_values!(STATUS => "success")], - ).increment(1); + ) + .increment(1); let wal_usage = state_guard.mrecordlog.resource_usage(); report_wal_usage(wal_usage); @@ -347,7 +348,8 @@ impl Ingester { counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, labels: [label_values!(STATUS => "error")], - ).increment(1); + ) + .increment(1); } Err(_) => { warn!("advise reset shards request timed out"); @@ -355,7 +357,8 @@ impl Ingester { counter!( parent: RESET_SHARDS_OPERATIONS_TOTAL, labels: [label_values!(STATUS => "timeout")], - ).increment(1); + ) + .increment(1); } }; // We still hold the permit while sleeping so we effectively rate limit the reset shards @@ -571,11 +574,13 @@ impl Ingester { counter!( parent: DOCS_TOTAL, labels: [label_values!(VALIDITY => "invalid")], - ).increment(parse_failures.len() as u64); + ) + .increment(parse_failures.len() as u64); counter!( parent: DOCS_BYTES_TOTAL, labels: [label_values!(VALIDITY => "invalid")], - ).increment(original_batch_num_bytes); + ) + .increment(original_batch_num_bytes); let persist_success = PersistSuccess { subrequest_id: subrequest.subrequest_id, index_uid: subrequest.index_uid, @@ -592,20 +597,24 @@ impl Ingester { counter!( parent: DOCS_TOTAL, labels: [label_values!(VALIDITY => "valid")], - ).increment(valid_doc_batch.num_docs() as u64); + ) + .increment(valid_doc_batch.num_docs() as u64); counter!( parent: DOCS_BYTES_TOTAL, labels: [label_values!(VALIDITY => "valid")], - ).increment(valid_doc_batch.num_bytes() as u64); + ) + .increment(valid_doc_batch.num_bytes() as u64); if !parse_failures.is_empty() { counter!( parent: DOCS_TOTAL, labels: [label_values!(VALIDITY => "invalid")], - ).increment(parse_failures.len() as u64); + ) + .increment(parse_failures.len() as u64); counter!( parent: DOCS_BYTES_TOTAL, labels: [label_values!(VALIDITY => "invalid")], - ).increment(original_batch_num_bytes - valid_doc_batch.num_bytes() as u64); + ) + .increment(original_batch_num_bytes - valid_doc_batch.num_bytes() as u64); } let valid_batch_num_bytes = valid_doc_batch.num_bytes() as u64; shard.rate_meter.update(valid_batch_num_bytes); diff --git a/quickwit/quickwit-jaeger/src/lib.rs b/quickwit/quickwit-jaeger/src/lib.rs index de770e29e5d..5371fb1fd4f 100644 --- a/quickwit/quickwit-jaeger/src/lib.rs +++ b/quickwit/quickwit-jaeger/src/lib.rs @@ -423,16 +423,14 @@ impl JaegerService { let labels = label_values!( OPERATION_INDEX_LABELS => operation_name, OTEL_TRACES_INDEX_ID ); - counter!(parent: FETCHED_TRACES_TOTAL, labels: [labels]) - .increment(num_traces); + counter!(parent: FETCHED_TRACES_TOTAL, labels: [labels]).increment(num_traces); let elapsed = request_start.elapsed().as_secs_f64(); let err_labels = label_values!( OPERATION_INDEX_ERROR_LABELS => operation_name, OTEL_TRACES_INDEX_ID, "false" ); - histogram!(parent: REQUEST_DURATION_SECONDS, labels: [err_labels]) - .record(elapsed); + histogram!(parent: REQUEST_DURATION_SECONDS, labels: [err_labels]).record(elapsed); }); Ok(ReceiverStream::new(rx)) } @@ -1075,9 +1073,7 @@ fn collect_trace_ids( #[allow(clippy::result_large_err)] fn json_deserialize<'a, T>(json: &'a str, label: &'static str) -> Result -where - T: Deserialize<'a>, -{ +where T: Deserialize<'a> { match serde_json::from_str(json) { Ok(deserialized) => Ok(deserialized), Err(error) => { @@ -1091,9 +1087,7 @@ where #[allow(clippy::result_large_err)] fn postcard_deserialize<'a, T>(json: &'a [u8], label: &'static str) -> Result -where - T: Deserialize<'a>, -{ +where T: Deserialize<'a> { match postcard::from_bytes(json) { Ok(deserialized) => Ok(deserialized), Err(error) => { diff --git a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs index a043a93154f..fd40d7e5423 100644 --- a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs +++ b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs @@ -219,7 +219,8 @@ impl GarbageCollector { counter!( parent: GC_RUNS, labels: [labels_split, label_values!(labels_result => "success")], - ).increment(1); + ) + .increment(1); GcRunResult { num_deleted_splits: removal_info.removed_split_entries.len(), num_deleted_bytes: removal_info @@ -241,7 +242,8 @@ impl GarbageCollector { counter!( parent: GC_RUNS, labels: [labels_split, label_values!(labels_result => "error")], - ).increment(1); + ) + .increment(1); error!(error=?error, "failed to run garbage collection"); GcRunResult::failed() } @@ -275,7 +277,8 @@ impl GarbageCollector { counter!( parent: GC_RUNS, labels: [labels_split, label_values!(labels_result => "success")], - ).increment(1); + ) + .increment(1); GcRunResult { num_deleted_splits: removal_info.removed_split_count(), num_deleted_bytes: removal_info.removed_bytes() as usize, @@ -293,7 +296,8 @@ impl GarbageCollector { counter!( parent: GC_RUNS, labels: [labels_split, label_values!(labels_result => "error")], - ).increment(1); + ) + .increment(1); error!(error=?error, "failed to run parquet garbage collection"); GcRunResult::failed() } diff --git a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs index 09bfd27f4ee..22c747ebb63 100644 --- a/quickwit/quickwit-metrics/benches/quickwit_metrics.rs +++ b/quickwit/quickwit-metrics/benches/quickwit_metrics.rs @@ -113,7 +113,8 @@ fn on_the_fly_counter(c: &mut Criterion) { name: "otf_counter", description: "bench counter", subsystem: "bench" - ).increment(1); + ) + .increment(1); }); }); @@ -124,7 +125,8 @@ fn on_the_fly_counter(c: &mut Criterion) { description: "bench counter", subsystem: "bench", "service" => "api" - ).increment(1); + ) + .increment(1); }); }); @@ -137,7 +139,8 @@ fn on_the_fly_counter(c: &mut Criterion) { "service" => "api", "method" => "GET", "endpoint" => "/health" - ).increment(1); + ) + .increment(1); }); }); @@ -152,7 +155,8 @@ fn on_the_fly_counter(c: &mut Criterion) { "endpoint" => "/health", "status" => "200", "region" => "us-east-1" - ).increment(1); + ) + .increment(1); }); }); @@ -170,7 +174,8 @@ fn on_the_fly_gauge(c: &mut Criterion) { name: "otf_gauge", description: "bench gauge", subsystem: "bench" - ).set(42.0); + ) + .set(42.0); }); }); @@ -181,7 +186,8 @@ fn on_the_fly_gauge(c: &mut Criterion) { description: "bench gauge", subsystem: "bench", "service" => "api" - ).set(42.0); + ) + .set(42.0); }); }); @@ -194,7 +200,8 @@ fn on_the_fly_gauge(c: &mut Criterion) { "service" => "api", "method" => "GET", "endpoint" => "/health" - ).set(42.0); + ) + .set(42.0); }); }); @@ -209,7 +216,8 @@ fn on_the_fly_gauge(c: &mut Criterion) { "endpoint" => "/health", "status" => "200", "region" => "us-east-1" - ).set(42.0); + ) + .set(42.0); }); }); @@ -228,7 +236,8 @@ fn on_the_fly_histogram(c: &mut Criterion) { description: "bench histogram", subsystem: "bench", buckets: vec![0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0] - ).record(0.123); + ) + .record(0.123); }); }); @@ -240,7 +249,8 @@ fn on_the_fly_histogram(c: &mut Criterion) { subsystem: "bench", buckets: vec![0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0], "service" => "api" - ).record(0.123); + ) + .record(0.123); }); }); @@ -254,7 +264,8 @@ fn on_the_fly_histogram(c: &mut Criterion) { "service" => "api", "method" => "GET", "endpoint" => "/health" - ).record(0.123); + ) + .record(0.123); }); }); @@ -270,7 +281,8 @@ fn on_the_fly_histogram(c: &mut Criterion) { "endpoint" => "/health", "status" => "200", "region" => "us-east-1" - ).record(0.123); + ) + .record(0.123); }); }); @@ -510,7 +522,8 @@ fn parent_counter(c: &mut Criterion) { "method" => "GET", "endpoint" => "/health", "status" => "200" - ).increment(1); + ) + .increment(1); }); }); @@ -536,7 +549,8 @@ fn parent_gauge(c: &mut Criterion) { "method" => "GET", "endpoint" => "/health", "status" => "200" - ).set(42.0); + ) + .set(42.0); }); }); @@ -562,7 +576,8 @@ fn parent_histogram(c: &mut Criterion) { "method" => "GET", "endpoint" => "/health", "status" => "200" - ).record(0.123); + ) + .record(0.123); }); }); @@ -744,7 +759,8 @@ fn labels_counter(c: &mut Criterion) { counter!( parent: PARENT_COUNTER, labels: [label_values!(LABELS_3 => "GET", "/health", "200")] - ).increment(1); + ) + .increment(1); }); }); @@ -753,7 +769,8 @@ fn labels_counter(c: &mut Criterion) { counter!( parent: PARENT_COUNTER, labels: [label_values!(LABELS_1 => "GET".to_string())] - ).increment(1); + ) + .increment(1); }); }); @@ -765,8 +782,7 @@ fn labels_counter(c: &mut Criterion) { b.iter(|| { let m = methods[idx % methods.len()]; idx += 1; - counter!(parent: PARENT_COUNTER, labels: [label_values!(LABELS_1 => m)]) - .increment(1); + counter!(parent: PARENT_COUNTER, labels: [label_values!(LABELS_1 => m)]).increment(1); }); }); @@ -781,8 +797,7 @@ fn labels_gauge(c: &mut Criterion) { group.bench_function("static/1", |b| { b.iter(|| { - gauge!(parent: PARENT_GAUGE, labels: [label_values!(LABELS_1 => "GET")]) - .set(42.0); + gauge!(parent: PARENT_GAUGE, labels: [label_values!(LABELS_1 => "GET")]).set(42.0); }); }); @@ -791,7 +806,8 @@ fn labels_gauge(c: &mut Criterion) { gauge!( parent: PARENT_GAUGE, labels: [label_values!(LABELS_3 => "GET", "/health", "200")] - ).set(42.0); + ) + .set(42.0); }); }); @@ -816,7 +832,8 @@ fn labels_histogram(c: &mut Criterion) { histogram!( parent: PARENT_HISTOGRAM, labels: [label_values!(LABELS_3 => "GET", "/health", "200")] - ).record(0.123); + ) + .record(0.123); }); }); @@ -843,7 +860,8 @@ fn composite_counter(c: &mut Criterion) { counter!( parent: PARENT_COUNTER, labels: [label_values!(COMP_ALL_3 => "GET", "/health", "200")], - ).increment(1); + ) + .increment(1); }); }); @@ -853,7 +871,8 @@ fn composite_counter(c: &mut Criterion) { label_values!(COMP_METHOD => "GET"), label_values!(COMP_ENDPOINT => "/health"), label_values!(COMP_STATUS => "200"), - ]).increment(1); + ]) + .increment(1); }); }); @@ -862,7 +881,8 @@ fn composite_counter(c: &mut Criterion) { counter!(parent: PARENT_COUNTER, labels: [ label_values!(COMP_METHOD => "GET"), label_values!(COMP_ENDPOINT => "/health"), - ]).increment(1); + ]) + .increment(1); }); }); @@ -880,7 +900,8 @@ fn composite_gauge(c: &mut Criterion) { gauge!( parent: PARENT_GAUGE, labels: [label_values!(COMP_ALL_3 => "GET", "/health", "200")], - ).set(42.0); + ) + .set(42.0); }); }); @@ -890,7 +911,8 @@ fn composite_gauge(c: &mut Criterion) { label_values!(COMP_METHOD => "GET"), label_values!(COMP_ENDPOINT => "/health"), label_values!(COMP_STATUS => "200"), - ]).set(42.0); + ]) + .set(42.0); }); }); @@ -899,7 +921,8 @@ fn composite_gauge(c: &mut Criterion) { gauge!(parent: PARENT_GAUGE, labels: [ label_values!(COMP_METHOD => "GET"), label_values!(COMP_ENDPOINT => "/health"), - ]).set(42.0); + ]) + .set(42.0); }); }); @@ -917,7 +940,8 @@ fn composite_histogram(c: &mut Criterion) { histogram!( parent: PARENT_HISTOGRAM, labels: [label_values!(COMP_ALL_3 => "GET", "/health", "200")], - ).record(0.123); + ) + .record(0.123); }); }); @@ -927,7 +951,8 @@ fn composite_histogram(c: &mut Criterion) { label_values!(COMP_METHOD => "GET"), label_values!(COMP_ENDPOINT => "/health"), label_values!(COMP_STATUS => "200"), - ]).record(0.123); + ]) + .record(0.123); }); }); @@ -936,7 +961,8 @@ fn composite_histogram(c: &mut Criterion) { histogram!(parent: PARENT_HISTOGRAM, labels: [ label_values!(COMP_METHOD => "GET"), label_values!(COMP_ENDPOINT => "/health"), - ]).record(0.123); + ]) + .record(0.123); }); }); diff --git a/quickwit/quickwit-metrics/examples/http_service.rs b/quickwit/quickwit-metrics/examples/http_service.rs index aea0f8c5305..2b4196c4b28 100644 --- a/quickwit/quickwit-metrics/examples/http_service.rs +++ b/quickwit/quickwit-metrics/examples/http_service.rs @@ -139,8 +139,7 @@ fn handle_request(method: &'static str, path: &'static str, region: &'static str } else { "200" }; - counter!(parent: HTTP_REQUESTS_BY_METHOD, "path" => path, "status" => status) - .increment(1); + counter!(parent: HTTP_REQUESTS_BY_METHOD, "path" => path, "status" => status).increment(1); let conn_gauge = gauge!(parent: HTTP_ACTIVE_CONNECTIONS_BY_REGION, "method" => method); { diff --git a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs index 26c200f05b1..56a3988dec2 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/traces.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/traces.rs @@ -789,8 +789,7 @@ impl OtlpGrpcTracesService { } }; let elapsed = start.elapsed().as_secs_f64(); - let error_labels = - label_values!(OTLP_GRPC_ERROR_LABELS => "trace", index_id, "grpc", "protobuf", is_error); + let error_labels = label_values!(OTLP_GRPC_ERROR_LABELS => "trace", index_id, "grpc", "protobuf", is_error); histogram!(parent: REQUEST_DURATION_SECONDS, labels: [error_labels]).record(elapsed); export_res diff --git a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs index 6b6b62688e6..6963639e491 100644 --- a/quickwit/quickwit-parquet-engine/src/ingest/processor.rs +++ b/quickwit/quickwit-parquet-engine/src/ingest/processor.rs @@ -79,8 +79,7 @@ impl ParquetIngestProcessor { }; if let Err(e) = self.validate_schema(&batch) { - counter!(parent: ERRORS_TOTAL, labels: [labels_kind, labels_operation]) - .increment(1); + counter!(parent: ERRORS_TOTAL, labels: [labels_kind, labels_operation]).increment(1); return Err(e); } diff --git a/quickwit/quickwit-search/src/leaf_cache.rs b/quickwit/quickwit-search/src/leaf_cache.rs index 2407d2031c5..2d64bde7aed 100644 --- a/quickwit/quickwit-search/src/leaf_cache.rs +++ b/quickwit/quickwit-search/src/leaf_cache.rs @@ -195,7 +195,10 @@ pub struct PredicateCacheImpl { impl PredicateCacheImpl { pub fn new(config: &CacheConfig) -> Self { PredicateCacheImpl { - content: MemorySizedCache::from_config(config, &quickwit_storage::metrics::PREDICATE_CACHE), + content: MemorySizedCache::from_config( + config, + &quickwit_storage::metrics::PREDICATE_CACHE, + ), } } } diff --git a/quickwit/quickwit-search/src/list_terms.rs b/quickwit/quickwit-search/src/list_terms.rs index 8b6724bd900..ffc1b12545c 100644 --- a/quickwit/quickwit-search/src/list_terms.rs +++ b/quickwit/quickwit-search/src/list_terms.rs @@ -216,7 +216,8 @@ async fn leaf_list_terms_single_split( storage: Arc, split: SplitIdAndFooterOffsets, ) -> crate::Result { - let cache = ByteRangeCache::with_infinite_capacity(&quickwit_storage::metrics::SHORTLIVED_CACHE); + let cache = + ByteRangeCache::with_infinite_capacity(&quickwit_storage::metrics::SHORTLIVED_CACHE); let (index, _) = open_index_with_caches(searcher_context, storage, &split, None, Some(cache)).await?; let split_schema = index.schema(); diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index af941ab4bc2..e6b08bbda9a 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -106,9 +106,7 @@ impl CompressionPredicate { impl Predicate for CompressionPredicate { fn should_compress(&self, response: &http::Response) -> bool - where - B: http_body::Body, - { + where B: http_body::Body { if let Some(size_above) = self.size_above_opt { size_above.should_compress(response) } else { diff --git a/quickwit/quickwit-storage/src/cache/quickwit_cache.rs b/quickwit/quickwit-storage/src/cache/quickwit_cache.rs index c89cb1c3db1..f0da1266164 100644 --- a/quickwit/quickwit-storage/src/cache/quickwit_cache.rs +++ b/quickwit/quickwit-storage/src/cache/quickwit_cache.rs @@ -19,10 +19,9 @@ use std::sync::Arc; use async_trait::async_trait; use quickwit_config::CacheConfig; -use crate::cache::{MemorySizedCache, StorageCache}; -use crate::metrics::CacheMetrics; -use crate::metrics::FAST_FIELD_CACHE; use crate::OwnedBytes; +use crate::cache::{MemorySizedCache, StorageCache}; +use crate::metrics::{CacheMetrics, FAST_FIELD_CACHE}; const FULL_SLICE: Range = 0..usize::MAX; diff --git a/quickwit/quickwit-storage/src/file_descriptor_cache.rs b/quickwit/quickwit-storage/src/file_descriptor_cache.rs index b873a47375a..414bf27e039 100644 --- a/quickwit/quickwit-storage/src/file_descriptor_cache.rs +++ b/quickwit/quickwit-storage/src/file_descriptor_cache.rs @@ -23,8 +23,7 @@ use tantivy::directory::OwnedBytes; use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use ulid::Ulid; -use crate::metrics::FD_CACHE_METRICS; -use crate::metrics::SingleCacheMetrics; +use crate::metrics::{FD_CACHE_METRICS, SingleCacheMetrics}; pub struct FileDescriptorCache { fd_cache: Mutex>, diff --git a/quickwit/quickwit-storage/src/lib.rs b/quickwit/quickwit-storage/src/lib.rs index 0e1b1e376d4..fcb67828b1b 100644 --- a/quickwit/quickwit-storage/src/lib.rs +++ b/quickwit/quickwit-storage/src/lib.rs @@ -33,6 +33,7 @@ mod storage; mod timeout_and_retry_storage; pub use debouncer::AsyncDebouncer; pub(crate) use debouncer::DebouncedStorage; + pub use self::payload::PutPayload; pub use self::storage::Storage; diff --git a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs index 4ac7e3ba3cb..54bc456859c 100644 --- a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs +++ b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs @@ -649,8 +649,9 @@ impl S3CompatibleObjectStorage { let delete_objects_res: StorageResult = aws_retry(&self.retry_params, || async { crate::metrics::OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL.increment(1); - let _timer = - HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION); + let _timer = HistogramTimer::new( + &crate::metrics::OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION, + ); self.s3_client .delete_objects() .bucket(self.bucket.clone()) @@ -797,7 +798,8 @@ impl Storage for S3CompatibleObjectStorage { let key = self.key(path); let delete_res = aws_retry(&self.retry_params, || async { crate::metrics::OBJECT_STORAGE_DELETE_REQUESTS_TOTAL.increment(1); - let _timer = HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_DELETE_REQUEST_DURATION); + let _timer = + HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_DELETE_REQUEST_DURATION); self.s3_client .delete_object() .bucket(&bucket) diff --git a/quickwit/quickwit-storage/src/opendal_storage/base.rs b/quickwit/quickwit-storage/src/opendal_storage/base.rs index 71fb70add3f..26c6291c304 100644 --- a/quickwit/quickwit-storage/src/opendal_storage/base.rs +++ b/quickwit/quickwit-storage/src/opendal_storage/base.rs @@ -169,8 +169,9 @@ impl Storage for OpendalStorage { let mut bulk_error = BulkDeleteError::default(); for (index, path) in paths.iter().enumerate() { crate::metrics::OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL.increment(1); - let _timer = - HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION); + let _timer = HistogramTimer::new( + &crate::metrics::OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION, + ); let result = self.op.delete(&path.as_os_str().to_string_lossy()).await; if let Err(err) = result { let storage_error_kind = err.kind(); diff --git a/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs b/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs index 99c319644c0..d812caac9e1 100644 --- a/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs +++ b/quickwit/quickwit-storage/src/timeout_and_retry_storage.rs @@ -100,7 +100,8 @@ impl Storage for TimeoutAndRetryStorage { match attempt_id { 0 => crate::metrics::GET_SLICE_TIMEOUT_SUCCESS_AFTER_0_TIMEOUT.increment(1), 1 => crate::metrics::GET_SLICE_TIMEOUT_SUCCESS_AFTER_1_TIMEOUT.increment(1), - _ => crate::metrics::GET_SLICE_TIMEOUT_SUCCESS_AFTER_2_PLUS_TIMEOUT.increment(1), + _ => crate::metrics::GET_SLICE_TIMEOUT_SUCCESS_AFTER_2_PLUS_TIMEOUT + .increment(1), } return result; } From 397759f82bc7d7ee0b265e4cfdafd6a5228c4f30 Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Tue, 5 May 2026 18:17:06 +0200 Subject: [PATCH 52/54] Use Counter::local for local counters --- quickwit/quickwit-common/src/io.rs | 22 +++++-- quickwit/quickwit-common/src/metrics.rs | 88 +------------------------ quickwit/quickwit-search/src/metrics.rs | 40 +++++++---- 3 files changed, 45 insertions(+), 105 deletions(-) diff --git a/quickwit/quickwit-common/src/io.rs b/quickwit/quickwit-common/src/io.rs index c7be6698b6d..6748612f710 100644 --- a/quickwit/quickwit-common/src/io.rs +++ b/quickwit/quickwit-common/src/io.rs @@ -37,7 +37,6 @@ use pin_project::pin_project; use quickwit_metrics::{Counter, counter}; use tokio::io::AsyncWrite; -use crate::metrics::ScopedCounter; use crate::{KillSwitch, Progress, ProtectedZoneGuard}; // Max 1MB at a time. @@ -76,14 +75,25 @@ pub fn limiter(throughput: ByteSize) -> Limiter { .build() } -#[derive(Clone, Default)] +#[derive(Clone)] pub struct IoControls { throughput_limiter_opt: Option, - bytes_counter: ScopedCounter, + bytes_counter: Counter, progress: Progress, kill_switch: KillSwitch, } +impl Default for IoControls { + fn default() -> Self { + Self { + throughput_limiter_opt: None, + bytes_counter: Counter::local(), + progress: Progress::default(), + kill_switch: KillSwitch::default(), + } + } +} + impl IoControls { #[must_use] pub fn progress(&self) -> &Progress { @@ -107,10 +117,10 @@ impl IoControls { } pub fn set_component(mut self, component: &'static str) -> Self { - self.bytes_counter = ScopedCounter::Global(counter!( + self.bytes_counter = counter!( parent: WRITE_BYTES, "component" => component, - )); + ); self } @@ -127,7 +137,7 @@ impl IoControls { } pub fn set_bytes_counter(mut self, bytes_counter: Counter) -> Self { - self.bytes_counter = ScopedCounter::Global(bytes_counter); + self.bytes_counter = bytes_counter; self } diff --git a/quickwit/quickwit-common/src/metrics.rs b/quickwit/quickwit-common/src/metrics.rs index ea2ad6b847c..26db5af6941 100644 --- a/quickwit/quickwit-common/src/metrics.rs +++ b/quickwit/quickwit-common/src/metrics.rs @@ -13,60 +13,16 @@ // limitations under the License. use std::collections::BTreeMap; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::{Arc, LazyLock, OnceLock}; +use std::sync::{LazyLock, OnceLock}; #[cfg(not(test))] use std::time::Duration; use metrics_exporter_prometheus::PrometheusHandle; pub use prometheus::{exponential_buckets, linear_buckets}; -use quickwit_metrics::{Counter, Gauge, gauge}; +use quickwit_metrics::{Gauge, gauge}; static PROMETHEUS_HANDLE: OnceLock = OnceLock::new(); -#[derive(Clone, Default)] -pub struct LocalCounter { - inner: Arc, -} - -impl LocalCounter { - pub fn increment(&self, value: u64) { - self.inner.fetch_add(value, Ordering::Relaxed); - } - - pub fn get(&self) -> u64 { - self.inner.load(Ordering::Relaxed) - } -} - -#[derive(Clone)] -pub enum ScopedCounter { - Local(LocalCounter), - Global(Counter), -} - -impl Default for ScopedCounter { - fn default() -> Self { - Self::Local(LocalCounter::default()) - } -} - -impl ScopedCounter { - pub fn increment(&self, value: u64) { - match self { - Self::Local(counter) => counter.increment(value), - Self::Global(counter) => counter.increment(value), - } - } - - pub fn get(&self) -> u64 { - match self { - Self::Local(counter) => counter.get(), - Self::Global(counter) => counter.get(), - } - } -} - pub fn set_prometheus_handle(handle: PrometheusHandle) -> Result<(), String> { #[cfg(not(test))] let upkeep_handle = handle.clone(); @@ -218,49 +174,9 @@ mod tests { use metrics::with_local_recorder; use metrics_exporter_prometheus::PrometheusBuilder; use metrics_util::debugging::{DebugValue, DebuggingRecorder}; - use quickwit_metrics::counter; use super::*; - #[test] - fn local_counter_counts_locally() { - let counter = LocalCounter::default(); - let counter_clone = counter.clone(); - - counter.increment(3); - counter_clone.increment(4); - - assert_eq!(counter.get(), 7); - assert_eq!(counter_clone.get(), 7); - } - - #[test] - fn scoped_counter_counts_locally() { - let counter = ScopedCounter::default(); - let counter_clone = counter.clone(); - - counter.increment(3); - counter_clone.increment(4); - - assert_eq!(counter.get(), 7); - assert_eq!(counter_clone.get(), 7); - } - - #[test] - fn scoped_counter_wraps_global_counter() { - let global_counter = counter!( - name: "scoped_counter_test", - description: "Scoped counter test.", - subsystem: "test", - ); - let counter = ScopedCounter::Global(global_counter.clone()); - - counter.increment(5); - - assert_eq!(counter.get(), 5); - assert_eq!(global_counter.get(), 5); - } - #[test] fn metrics_text_payload_renders_prometheus_handle() { let recorder = PrometheusBuilder::new().build_recorder(); diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index b8d9c5fbcb6..77b776468c8 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -18,7 +18,7 @@ use std::fmt; use std::sync::LazyLock; use bytesize::ByteSize; -use quickwit_common::metrics::{ScopedCounter, exponential_buckets, linear_buckets}; +use quickwit_common::metrics::{exponential_buckets, linear_buckets}; use quickwit_metrics::{ Counter, Gauge, Histogram, LabelNames, counter, gauge, histogram, label_names, }; @@ -27,7 +27,7 @@ pub(crate) const STATUS_LABELS: LabelNames<1> = label_names!("status"); fn print_if_not_null( field_name: &'static str, - counter: &ScopedCounter, + counter: &Counter, f: &mut fmt::Formatter, ) -> fmt::Result { let val = counter.get(); @@ -37,16 +37,30 @@ fn print_if_not_null( Ok(()) } -#[derive(Default)] pub struct SplitSearchOutcomeCounters { - pub cancel_before_warmup: ScopedCounter, - pub cache_hit: ScopedCounter, - pub pruned_before_warmup: ScopedCounter, - pub cancel_warmup: ScopedCounter, - pub pruned_after_warmup: ScopedCounter, - pub cancel_cpu_queue: ScopedCounter, - pub cancel_cpu: ScopedCounter, - pub success: ScopedCounter, + pub cancel_before_warmup: Counter, + pub cache_hit: Counter, + pub pruned_before_warmup: Counter, + pub cancel_warmup: Counter, + pub pruned_after_warmup: Counter, + pub cancel_cpu_queue: Counter, + pub cancel_cpu: Counter, + pub success: Counter, +} + +impl Default for SplitSearchOutcomeCounters { + fn default() -> Self { + SplitSearchOutcomeCounters { + cancel_before_warmup: Counter::local(), + cache_hit: Counter::local(), + pruned_before_warmup: Counter::local(), + cancel_warmup: Counter::local(), + pruned_after_warmup: Counter::local(), + cancel_cpu_queue: Counter::local(), + cancel_cpu: Counter::local(), + success: Counter::local(), + } + } } impl fmt::Display for SplitSearchOutcomeCounters { @@ -67,10 +81,10 @@ impl SplitSearchOutcomeCounters { /// Create a new SplitSearchOutcomeCounters instance, registered in prometheus. pub fn new_global() -> Self { let counter = |category: &'static str| { - ScopedCounter::Global(counter!( + counter!( parent: &SPLIT_SEARCH_OUTCOME, "category" => category, - )) + ) }; SplitSearchOutcomeCounters { cancel_before_warmup: counter("cancel_before_warmup"), From 821125163fb06e549038940c5a97faf2b4919677 Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Tue, 5 May 2026 18:57:01 +0200 Subject: [PATCH 53/54] Fix build info metric registration --- quickwit/quickwit-cli/src/logger.rs | 9 ++--- quickwit/quickwit-cli/src/main.rs | 22 +--------- quickwit/quickwit-cli/src/metrics.rs | 24 ++++++++++- quickwit/quickwit-common/src/metrics.rs | 53 ++++--------------------- 4 files changed, 36 insertions(+), 72 deletions(-) diff --git a/quickwit/quickwit-cli/src/logger.rs b/quickwit/quickwit-cli/src/logger.rs index af187ee0a02..fa4346089db 100644 --- a/quickwit/quickwit-cli/src/logger.rs +++ b/quickwit/quickwit-cli/src/logger.rs @@ -285,11 +285,8 @@ fn startup_env_filter(level: Level) -> anyhow::Result { type ReloadLayer = tracing_subscriber::reload::Layer; -pub fn init_telemetry( - level: Level, - ansi_colors: bool, - build_info: &BuildInfo, -) -> anyhow::Result { +pub fn init_telemetry(level: Level, ansi_colors: bool) -> anyhow::Result { + let build_info = BuildInfo::get(); let otlp_config = load_otlp_exporter_config(); let meter_provider = init_metrics_provider(build_info, &otlp_config)?; @@ -447,6 +444,8 @@ fn init_metrics_provider( metrics::set_global_recorder(recorder) .map_err(|_| anyhow::anyhow!("failed to install global metrics recorder"))?; quickwit_metrics::describe_metrics(); + + crate::metrics::register_metrics(build_info); Ok(meter_provider) } diff --git a/quickwit/quickwit-cli/src/main.rs b/quickwit/quickwit-cli/src/main.rs index c34a145a761..cbb22318905 100644 --- a/quickwit/quickwit-cli/src/main.rs +++ b/quickwit/quickwit-cli/src/main.rs @@ -14,8 +14,6 @@ #![recursion_limit = "256"] -use std::collections::BTreeMap; - use anyhow::Context; use colored::Colorize; use quickwit_cli::checklist::RED_COLOR; @@ -62,8 +60,7 @@ fn main() -> anyhow::Result<()> { rt.block_on(async move { install_default_crypto_ring_provider(); - let telemetry_handle = - init_telemetry(command.default_log_level(), ansi_colors, BuildInfo::get())?; + let telemetry_handle = init_telemetry(command.default_log_level(), ansi_colors)?; let runtime_handle = tokio::runtime::Handle::current(); scrape_tokio_runtime_metrics(&runtime_handle, "main"); @@ -90,24 +87,7 @@ fn parse_cli_command() -> (CliCommand, bool) { (command, ansi_colors) } -fn register_build_info_metric() { - use itertools::Itertools; - let build_info = BuildInfo::get(); - let mut build_kvs = BTreeMap::default(); - build_kvs.insert("build_date", build_info.build_date.to_string()); - build_kvs.insert("commit_hash", build_info.commit_short_hash.to_string()); - build_kvs.insert("version", build_info.version.to_string()); - if !build_info.commit_tags.is_empty() { - let tags_str = build_info.commit_tags.iter().join(","); - build_kvs.insert("commit_tags", tags_str); - } - build_kvs.insert("target", build_info.build_target.to_string()); - quickwit_common::metrics::register_info("build_info", "Quickwit's build info", build_kvs); -} - async fn main_impl(command: CliCommand, telemetry_handle: TelemetryHandle) -> anyhow::Result<()> { - register_build_info_metric(); - #[cfg(feature = "jemalloc")] start_jemalloc_metrics_loop(); diff --git a/quickwit/quickwit-cli/src/metrics.rs b/quickwit/quickwit-cli/src/metrics.rs index ae5d9647d9d..38abb266643 100644 --- a/quickwit/quickwit-cli/src/metrics.rs +++ b/quickwit/quickwit-cli/src/metrics.rs @@ -15,7 +15,29 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; -use quickwit_metrics::{Histogram, histogram}; +use quickwit_metrics::{Gauge, Histogram, gauge, histogram, labels}; +use quickwit_serve::BuildInfo; + +static BUILD_INFO: LazyLock = LazyLock::new(|| { + gauge!( + name: "build_info", + description: "Quickwit's build info", + subsystem: "", + ) +}); +pub(crate) fn register_metrics(build_info: &BuildInfo) { + use itertools::Itertools; + + let commit_tags = build_info.commit_tags.iter().join(","); + let labels = labels!( + "build_date" => build_info.build_date, + "commit_hash" => build_info.commit_short_hash, + "version" => build_info.version.clone(), + "commit_tags" => commit_tags, + "target" => build_info.build_target, + ); + gauge!(parent: BUILD_INFO, labels: [labels]).set(1.0); +} pub(crate) static THREAD_UNPARK_DURATION_MICROSECONDS: LazyLock = LazyLock::new(|| { histogram!( diff --git a/quickwit/quickwit-common/src/metrics.rs b/quickwit/quickwit-common/src/metrics.rs index 26db5af6941..4dc3d4391a8 100644 --- a/quickwit/quickwit-common/src/metrics.rs +++ b/quickwit/quickwit-common/src/metrics.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::BTreeMap; use std::sync::{LazyLock, OnceLock}; #[cfg(not(test))] use std::time::Duration; @@ -58,20 +57,6 @@ fn spawn_prometheus_upkeep(handle: PrometheusHandle) -> Result<(), String> { .map_err(|error| format!("failed to spawn Prometheus metrics upkeep thread: {error}")) } -pub fn register_info(name: &'static str, help: &'static str, kvs: BTreeMap<&'static str, String>) { - let key_name = format!("quickwit_{name}"); - let labels = kvs - .into_iter() - .map(|(label, value)| metrics::Label::new(label, value)) - .collect::>(); - let key = metrics::Key::from_parts(key_name.clone(), labels); - let metadata = metrics::Metadata::new("", metrics::Level::INFO, Some(module_path!())); - metrics::with_recorder(|recorder| { - recorder.describe_counter(metrics::KeyName::from(key_name), None, help.into()); - recorder.register_counter(&key, &metadata).increment(1); - }); -} - pub fn index_label(index_id: &str) -> &str { static PER_INDEX_METRICS_ENABLED: LazyLock = LazyLock::new(|| !crate::get_bool_from_env("QW_DISABLE_PER_INDEX_METRICS", false)); @@ -173,7 +158,7 @@ fn in_flight_data_gauge(component: &'static str) -> Gauge { mod tests { use metrics::with_local_recorder; use metrics_exporter_prometheus::PrometheusBuilder; - use metrics_util::debugging::{DebugValue, DebuggingRecorder}; + use quickwit_metrics::labels; use super::*; @@ -183,40 +168,18 @@ mod tests { set_prometheus_handle(recorder.handle()).expect("Prometheus handle should be set once"); with_local_recorder(&recorder, || { - register_info( - "prometheus_payload_info", - "prometheus payload info", - BTreeMap::new(), + let info_metric = gauge!( + name: "prometheus_payload_info", + description: "prometheus payload info", + subsystem: "", ); + quickwit_metrics::describe_metrics(); + gauge!(parent: info_metric, labels: [labels!("version" => "test")]).set(1.0); }); let payload = metrics_text_payload().expect("Prometheus payload should render"); assert!(payload.contains("# HELP quickwit_prometheus_payload_info")); - assert!(payload.contains("quickwit_prometheus_payload_info 1")); - } - - #[test] - fn register_info_records_labeled_counter() { - let recorder = DebuggingRecorder::new(); - let snapshotter = recorder.snapshotter(); - with_local_recorder(&recorder, || { - let labels = BTreeMap::from([("version", "test".to_string())]); - register_info("build_info_test", "build info test", labels); - }); - - let snapshot = snapshotter.snapshot().into_vec(); - let (_, _, description, value) = snapshot - .into_iter() - .find(|(composite_key, _, _, _)| { - let (_, key) = composite_key.clone().into_parts(); - key.name() == "quickwit_build_info_test" - && key - .labels() - .any(|label| label.key() == "version" && label.value() == "test") - }) - .expect("build info metric should be recorded"); - assert_eq!(description.as_deref(), Some("build info test")); - assert_eq!(value, DebugValue::Counter(1)); + assert!(payload.contains(r#"quickwit_prometheus_payload_info{version="test"} 1"#)); } #[test] From c776ecfc9fa387fe0cbf4d86b9a78cf8a45df544 Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Tue, 5 May 2026 19:10:42 +0200 Subject: [PATCH 54/54] Register build info metric as counter --- quickwit/quickwit-cli/src/metrics.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/quickwit/quickwit-cli/src/metrics.rs b/quickwit/quickwit-cli/src/metrics.rs index 38abb266643..d624cd7075d 100644 --- a/quickwit/quickwit-cli/src/metrics.rs +++ b/quickwit/quickwit-cli/src/metrics.rs @@ -15,11 +15,11 @@ use std::sync::LazyLock; use quickwit_common::metrics::exponential_buckets; -use quickwit_metrics::{Gauge, Histogram, gauge, histogram, labels}; +use quickwit_metrics::{Counter, Histogram, counter, histogram, labels}; use quickwit_serve::BuildInfo; -static BUILD_INFO: LazyLock = LazyLock::new(|| { - gauge!( +static BUILD_INFO: LazyLock = LazyLock::new(|| { + counter!( name: "build_info", description: "Quickwit's build info", subsystem: "", @@ -36,7 +36,7 @@ pub(crate) fn register_metrics(build_info: &BuildInfo) { "commit_tags" => commit_tags, "target" => build_info.build_target, ); - gauge!(parent: BUILD_INFO, labels: [labels]).set(1.0); + counter!(parent: BUILD_INFO, labels: [labels]).increment(1); } pub(crate) static THREAD_UNPARK_DURATION_MICROSECONDS: LazyLock = LazyLock::new(|| {