From 3122c4923dceb33ccb737d5484919efd92811258 Mon Sep 17 00:00:00 2001 From: Tasha Date: Mon, 11 May 2026 22:43:40 +0200 Subject: [PATCH 01/12] feat(app): declarative storage bundle, hybrid Lakebase/Delta backend, label filtering, error/warning metrics Storage: move schemas, wheels volume, and Lakebase instance + logical database into databricks.yml with lifecycle.prevent_destroy; replace bootstrap_storage.sh with 'make app-bind' for adopting existing resources in workspaces created by the prior flow. Backend: introduce PgExecutor + Postgres migration runner for OLTP tables (rules, settings, RBAC, comments, schedules, scheduler bookkeeping); keep analytical tables (validation runs, profiling, quarantine, metrics) on Delta. Metrics: persist DQX observer's error_row_count / warning_row_count / input_row_count via dq_validation_runs.error_rows / warning_rows and fix Spark Connect Observation.get mutability bug that was overwriting total_rows with limit-pushed values. UI: rename 'Invalid' -> 'Errors', add 'Warnings' column to quarantine detail, replace 'Has invalid' filter with 'Has failures', surface label badges next to table names in rule selection and schedule editor, add label filtering to Execute Rules and Schedule settings. Migrations: fix FIELD_ALREADY_EXISTS idempotency in MigrationRunner so v3/v4/v5 ADD COLUMN migrations no-op on fresh deploys whose v1 baseline already includes warning_rows / warnings / error_rows. Misc: retention test coverage, custom metrics service, post-deploy grants script reuse, docs rewrite (DEPLOYMENT.md, README.md, CLAUDE.md, installation.mdx) for the new declarative storage model. --- Makefile | 28 +- app/.build-constraints.txt | 18 +- app/CLAUDE.md | 77 ++- app/DEPLOYMENT.md | 186 ++++-- app/DEVELOPMENT.md | 24 +- app/README.md | 59 +- app/databricks.yml | 199 ++++-- app/pyproject.toml | 5 + app/scripts/_align_wheel_version.py | 160 +++++ app/scripts/bind_resources.sh | 112 ++++ app/scripts/post_deploy_grants.sh | 4 +- .../databricks_labs_dqx_app/backend/CLAUDE.md | 107 +++- .../databricks_labs_dqx_app/backend/app.py | 73 ++- .../databricks_labs_dqx_app/backend/config.py | 51 +- .../backend/dependencies.py | 95 ++- .../backend/migrations/__init__.py | 605 ++++++++++++------ .../backend/migrations/postgres.py | 294 +++++++++ .../databricks_labs_dqx_app/backend/models.py | 11 + .../backend/pg_executor.py | 429 +++++++++++++ .../backend/routes/v1/config.py | 137 +++- .../backend/routes/v1/dryrun.py | 6 + .../backend/routes/v1/quarantine.py | 34 +- .../backend/run_status_manager.py | 4 +- .../backend/services/app_settings_service.py | 140 ++-- .../backend/services/comments_service.py | 12 +- .../backend/services/job_service.py | 25 +- .../backend/services/role_service.py | 58 +- .../backend/services/rules_catalog_service.py | 251 ++++++-- .../services/schedule_config_service.py | 72 ++- .../backend/services/scheduler_service.py | 394 ++++++++++-- .../backend/sql_executor.py | 121 ++++ .../ui/components/DryRunResults.tsx | 146 ++++- .../ui/lib/api-custom.ts | 83 +++ app/src/databricks_labs_dqx_app/ui/lib/api.ts | 414 ++++++++++++ .../ui/routes/_sidebar/config.tsx | 224 ++++++- .../ui/routes/_sidebar/runs-history.tsx | 86 ++- .../ui/routes/_sidebar/runs.tsx | 211 +++++- app/tasks/src/dqx_task_runner/runner.py | 154 +++-- app/tests/test_custom_metrics.py | 24 +- app/tests/test_retention.py | 247 +++++++ app/tests/test_rules_catalog_service.py | 7 +- app/uv.lock | 88 ++- app/yarn.lock | 296 ++++----- docs/dqx/docs/dev/contributing.mdx | 5 +- docs/dqx/docs/installation.mdx | 69 +- 45 files changed, 4939 insertions(+), 906 deletions(-) create mode 100644 app/scripts/_align_wheel_version.py create mode 100755 app/scripts/bind_resources.sh create mode 100644 app/src/databricks_labs_dqx_app/backend/migrations/postgres.py create mode 100644 app/src/databricks_labs_dqx_app/backend/pg_executor.py create mode 100644 app/tests/test_retention.py diff --git a/Makefile b/Makefile index a4009383a..9b48afb50 100644 --- a/Makefile +++ b/Makefile @@ -123,7 +123,31 @@ app-grant-permissions: @test -n "$(PROFILE)" || (echo "Usage: make app-grant-permissions PROFILE= [TARGET=]"; exit 1) app/scripts/post_deploy_grants.sh -p $(PROFILE) $(if $(TARGET),-t $(TARGET)) -# Full deploy: build, bundle deploy, grant permissions, and start the app. +# Adopt pre-existing storage resources into bundle management. +# +# Use ONCE per target on workspaces where the schemas / volume / +# Lakebase instance / Lakebase logical database already exist (e.g. +# from a previous bootstrap-script deploy, or from manual creation). +# Without binding, ``databricks bundle deploy`` would try to CREATE +# them and fail with "already exists" / "Instance name is not unique". +# +# Bind is idempotent at the CLI level — re-binding the same resource +# is a no-op. Skip this target on fresh workspaces; ``bundle deploy`` +# creates the resources directly. +# +# Usage: make app-bind PROFILE=my-profile TARGET=dev +app-bind: + @test -n "$(PROFILE)" || (echo "Usage: make app-bind PROFILE= TARGET="; exit 1) + @test -n "$(TARGET)" || (echo "Usage: make app-bind PROFILE= TARGET="; exit 1) + app/scripts/bind_resources.sh -p $(PROFILE) -t $(TARGET) + +# Full deploy: build, bundle deploy (creates storage on fresh +# workspaces, updates managed resources otherwise), grant permissions +# to the app SP, and start the app. Run ``make app-bind`` once before +# the FIRST deploy on a workspace where the storage was previously +# provisioned out-of-band — otherwise the bundle will try to CREATE +# the existing resources and fail. +# # Usage: make app-deploy PROFILE=my-profile TARGET=dev app-deploy: app-build @test -n "$(PROFILE)" || (echo "Usage: make app-deploy PROFILE= TARGET="; exit 1) @@ -166,4 +190,4 @@ lock-dependencies: perl -pi -e 's|registry = "https://[^"]*"|registry = "https://pypi.org/simple"|g' uv.lock .DEFAULT: all -.PHONY: all clean dev lint fmt test integration e2e perf anomaly coverage combine-coverage docs-build docs-serve-dev docs-install docs-serve docs-clean app-install app-build app-start-dev app-stop-dev app-check app-test app-grant-permissions app-deploy fork-sync build lock-dependencies lock-app-dependencies +.PHONY: all clean dev lint fmt test integration e2e perf anomaly coverage combine-coverage docs-build docs-serve-dev docs-install docs-serve docs-clean app-install app-build app-start-dev app-stop-dev app-check app-test app-grant-permissions app-bind app-deploy fork-sync build lock-dependencies lock-app-dependencies diff --git a/app/.build-constraints.txt b/app/.build-constraints.txt index c1518967e..022b0933a 100644 --- a/app/.build-constraints.txt +++ b/app/.build-constraints.txt @@ -101,15 +101,15 @@ markupsafe==3.0.3 \ --hash=sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a \ --hash=sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50 # via jinja2 -packaging==26.1 \ - --hash=sha256:5d9c0669c6285e491e0ced2eee587eaf67b670d94a19e94e3984a481aba6802f \ - --hash=sha256:f042152b681c4bfac5cae2742a55e103d27ab2ec0f3d88037136b6bfe7c9c5de +packaging==26.2 \ + --hash=sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e \ + --hash=sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661 # via # dunamai # hatchling -pathspec==1.0.4 \ - --hash=sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645 \ - --hash=sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723 +pathspec==1.1.1 \ + --hash=sha256:17db5ecd524104a120e173814c90367a96a98d07c45b2e10c2f3919fff91bf5a \ + --hash=sha256:a00ce642f577bf7f473932318056212bc4f8bfdf53128c78bbd5af0b9b20b189 # via hatchling pluggy==1.6.0 \ --hash=sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3 \ @@ -119,9 +119,9 @@ tomlkit==0.14.0 \ --hash=sha256:592064ed85b40fa213469f81ac584f67a4f2992509a7c3ea2d632208623a3680 \ --hash=sha256:cf00efca415dbd57575befb1f6634c4f42d2d87dbba376128adb42c121b87064 # via uv-dynamic-versioning -trove-classifiers==2026.1.14.14 \ - --hash=sha256:00492545a1402b09d4858605ba190ea33243d361e2b01c9c296ce06b5c3325f3 \ - --hash=sha256:1f9553927f18d0513d8e5ff80ab8980b8202ce37ecae0e3274ed2ef11880e74d +trove-classifiers==2026.4.28.13 \ + --hash=sha256:8f4b1eb4e16296b57d612965444f87a83861cc989a0451ac97fe4265ddef03b8 \ + --hash=sha256:c85bb8a53c3de7330d1699b844ed9fb809a602a09ac15dc79ad6d1a509be0676 # via hatchling uv-dynamic-versioning==0.14.0 \ --hash=sha256:574fbc07e87ace45c01d55967ad3b864871257b98ff5b8ac87c261227ac8db5b \ diff --git a/app/CLAUDE.md b/app/CLAUDE.md index ed734dcf2..f0eec0006 100644 --- a/app/CLAUDE.md +++ b/app/CLAUDE.md @@ -36,36 +36,69 @@ RBAC is enforced — routes use `require_role(*roles)` from `backend/dependencie ## Internal Storage -App uses a dedicated catalog selected at install time, with two schemas (managed by `MigrationRunner` in `backend/migrations/`): +App uses a **hybrid backend** — analytical/append tables in Delta, OLTP +tables in Lakebase Postgres. Both backends are managed by their own +migration runner in `backend/migrations/`. Schemas, volume, Lakebase +instance, and Lakebase logical Postgres database are all declared as +bundle resources in `databricks.yml` with `lifecycle.prevent_destroy: +true`, so `databricks bundle destroy` cannot drop them — see "Bundle +conventions" below. ``` {user_catalog} - ├── dqx_app ← main schema (SP-managed) - │ ├── dq_app_settings ← key/value app configuration - │ ├── dq_quality_rules ← active/approved rules - │ ├── dq_quality_rules_history ← rule change audit log - │ ├── dq_role_mappings ← role → workspace group mappings (RBAC) - │ ├── dq_comments ← comment threads on rules/runs - │ ├── dq_profiling_results ← profiler run results (suggestions in generated_rules_json) - │ ├── dq_validation_runs ← dryrun + scheduled run history - │ ├── dq_quarantine_records ← invalid rows captured by runs - │ ├── dq_metrics ← per-run quality metrics for trend tracking - │ ├── dq_schedule_configs ← per-schedule config (cron/interval, target rules) - │ ├── dq_schedule_configs_history ← schedule config change audit log - │ ├── dq_schedule_runs ← scheduler last/next run state (survives restarts) - │ └── dq_migrations ← migration version tracker - └── dqx_app_tmp ← temp views created via OBO for profiler/dryrun jobs + ├── dqx_studio ← main schema (SP-managed) + │ ├── dq_profiling_results (Delta) profiler run results + │ ├── dq_validation_runs (Delta) dryrun + scheduled run history + │ ├── dq_quarantine_records (Delta) invalid rows captured by runs + │ ├── dq_metrics (Delta) per-run quality metrics for trend tracking + │ ├── dq_app_settings (OLTP*) key/value app configuration + │ ├── dq_quality_rules (OLTP*) active/approved rules + │ ├── dq_quality_rules_history (OLTP*) rule change audit log + │ ├── dq_role_mappings (OLTP*) role → workspace group mappings (RBAC) + │ ├── dq_comments (OLTP*) comment threads on rules/runs + │ ├── dq_schedule_configs (OLTP*) per-schedule config (cron/interval, target rules) + │ ├── dq_schedule_configs_history (OLTP*) schedule config change audit log + │ ├── dq_schedule_runs (OLTP*) scheduler last/next run state (survives restarts) + │ └── dq_migrations (Delta) Delta migration version tracker + ├── dqx_studio_tmp ← temp views created via OBO for profiler/dryrun jobs + └── dqx_studio.wheels (volume) ← DQX + task-runner wheels uploaded at app startup + +Lakebase database (when enabled, default = `dqx-studio-lakebase`): + └── dqx_studio (database) + └── public (schema, configurable via DQX_LAKEBASE_SCHEMA) + ├── dq_app_settings, dq_role_mappings, dq_quality_rules, + │ dq_quality_rules_history, dq_comments, dq_schedule_configs, + │ dq_schedule_configs_history, dq_schedule_runs + └── dq_migrations (Postgres migration version tracker) ``` -A separate UC volume (`{catalog}.dqx_app.wheels` by default) holds the DQX + task-runner wheels uploaded at app startup. +`(OLTP*)` = lives in **Lakebase Postgres** when +`lakebase_instance_name` is set, otherwise **Delta** (the +`v2: Delta OLTP fallback` migration). ## Key Decisions -- **No config.yaml** — all settings stored in Delta tables -- **Dedicated catalog** — user selects at install, `dqx_app` schema created by the app -- **Rule promotion** — export rules then deploy separately to prod; or save directly to prod checks table -- **Internal storage** — Delta table (preferred); Lakebase also supported as option at install time -- **Target environments** — Dev, UAT/QA (prod-like data); app is not intended for production rule execution +- **No config.yaml** — all settings stored in Delta or Lakebase tables. +- **Dedicated catalog** — user selects at install; `dqx_studio` and `dqx_studio_tmp` schemas are declared as bundle resources and created by `databricks bundle deploy`. +- **Hybrid storage** — high-volume append tables in Delta; transactional/low-latency tables in Lakebase Postgres. +- **Rule promotion** — export rules then deploy separately to prod; or save directly to prod checks table. +- **Target environments** — Dev, UAT/QA (prod-like data); app is not intended for production rule execution. + +## Bundle conventions + +All stateful resources are declared in `databricks.yml`: + +- `resources.schemas.main_schema` — `dqx_studio` schema +- `resources.schemas.tmp_schema` — `dqx_studio_tmp` schema +- `resources.volumes.wheels` — wheels volume +- `resources.database_instances.lakebase` — Lakebase Postgres instance (autoscaling) +- `resources.database_catalogs.lakebase_db` — logical Postgres database (via `create_database_if_not_exists: true`) and a surrounding UC catalog (informational; the app connects directly via psycopg) + +Every one of them carries `lifecycle.prevent_destroy: true` (Databricks CLI 0.268+), which blocks `databricks bundle destroy` and any deploy that would force-replace the resource. To intentionally tear something down: drop the flag, `databricks bundle deployment unbind -t `, then destroy. + +For workspaces where these resources already exist (e.g. created out-of-band before this layout existed), run `make app-bind PROFILE=... TARGET=...` once per target to adopt them — otherwise `databricks bundle deploy` errors out with "already exists" / "Instance name is not unique". + +Privileges on UC objects for the auto-created app SP are still reapplied with `scripts/post_deploy_grants.sh` after each deploy, because the app SP's UUID isn't known at bundle-write time. ## Architecture diff --git a/app/DEPLOYMENT.md b/app/DEPLOYMENT.md index 5dc182529..c54c3356e 100644 --- a/app/DEPLOYMENT.md +++ b/app/DEPLOYMENT.md @@ -4,21 +4,48 @@ Production deployment uses [Declarative Automation Bundles](https://docs.databri ## Prerequisites -Before starting, make sure you have all of the following — several steps require elevated permissions, so confirm access before you begin. +Before you start, confirm you have **all** of the items below. The single most common deployment failure is missing one permission — and the error you see is almost always downstream of the missing grant, not on the grant itself. -**Tooling** -- **Databricks CLI** installed and authenticated against your workspace -- **`jq`** (used by the post-deploy grants script) -- **`make`** (used by the one-command deploy target) +### Tooling -**Access** -- **Workspace admin** — required to create service principals, grant catalog permissions, and enable workspace-level features +- **Databricks CLI** v0.268+ installed and authenticated against your workspace (`databricks auth login -p `). v0.268 is the minimum that supports `lifecycle.prevent_destroy` on bundle resources. +- **`jq`** (used by the post-deploy grants script and the resource-bind helper) +- **`make`** (drives the one-command deploy target) -**Workspace configuration** -- An **existing Unity Catalog catalog** where the app's schemas and volumes will be created — the bundle does not create the catalog itself -- **Databricks Apps** feature enabled on the workspace -- **User token passthrough** enabled for Databricks Apps (see [Step 2](#step-2-enable-user-token-passthrough)) -- **Serverless compute** enabled on the workspace (the task-runner job runs on serverless) +### Required permissions + +The deploying user (you) needs the permissions below. They are **all** consumed by `make app-deploy`; deployment will halt the first time it hits a missing one. We've listed which step in the flow each permission unblocks so you can debug surgically if a grant gets missed. + +| # | Permission | Granted on | Used by | What fails without it | +|---|---|---|---|---| +| 1 | **Workspace access** entitlement | You, in the workspace | All CLI calls | `databricks` CLI can't reach the workspace | +| 2 | **Databricks SQL access** entitlement | You, in the workspace | `bundle deploy` (creates the X-Small SQL warehouse) | `Error: not authorized to create SQL Endpoint` | +| 3 | **Allow cluster create** entitlement | You, in the workspace | `bundle deploy` (warehouse + job clusters) | Warehouse / job creation rejected | +| 4 | **Databricks Apps: Can Manage** workspace permission | You, in the workspace | `bundle deploy` of the App resource | App creation rejected | +| 5 | **Databricks Database (Lakebase): Manager** entitlement | You, in the workspace | `bundle deploy` of the `database_instances` and `database_catalogs` resources | `Error: User does not have permission to create database instances` | +| 6 | **USE CATALOG** + **CREATE SCHEMA** on `` | Your user or an admin group you're in | `bundle deploy` of the `schemas` and `volumes` resources | `Error: User does not have CREATE_SCHEMA on catalog ''` | +| 7 | **MANAGE** on `` (or be the catalog owner) | Your user or an admin group you're in | `post_deploy_grants.sh` (issues `GRANT USE CATALOG / ALL PRIVILEGES … TO ` and `… TO account users`) | `Error: User does not have privilege MANAGE on catalog ''` | +| 8 | **Service Principal: User** role on the task-runner SP | Your user, on the SP you'll use as `dqx_service_principal_application_id` | `bundle deploy` of the `jobs.dqx_task_runner` resource (sets `run_as.service_principal_name`) | `Error: User is not authorized to use this service principal` | +| 9 | **Service Principal: Manager** role on the task-runner SP, *or* a pre-shared OAuth client secret | Your user, on the same SP | Only needed if you want to **mint a fresh OAuth secret yourself** for the task-runner (e.g. via `databricks service-principal-secrets-proxy create `) | `Error: User is not authorized to perform this operation` when minting a new secret | +| 10 | **Account admin** (one-time, post-deploy) | Account level | Updating the app's OAuth custom-app integration to include the `all-apis` scope (see [Expand OAuth Scopes](#optional-expand-oauth-scopes)) | Some app features (job submission, advanced SCIM lookups) return 403 | + +**Two convenience patterns** that reduce the per-user grants in rows 6 and 7: + +- **Make the catalog ownership easy:** ask an admin to add you to an existing UC-admin group that already holds `MANAGE` (or `ALL PRIVILEGES`) on ``. This unlocks rows 6 and 7 in one membership change instead of two per-object grants. +- **Workspace admin shortcut:** if you become workspace admin, rows 1–5 + 8–9 collapse automatically. Rows 6 and 7 (UC) and 10 (account admin) still need to be granted explicitly — workspace admin does **not** confer Unity Catalog or account-level rights. + +### Workspace features that must be enabled + +These are configured at the workspace or account level — not by you, not by the bundle. Confirm with your admin before the first deploy: + +- **Databricks Apps** is enabled on the workspace +- **User token passthrough** (a.k.a. user authorization / OBO) is enabled for Databricks Apps — see [Step 2](#step-2-enable-user-token-passthrough). Without this the app can't make OBO calls and Unity Catalog browsing fails. +- **Serverless compute** is enabled on the workspace — the task-runner job runs exclusively on serverless +- **Lakebase Postgres** is enabled on the workspace (default OLTP backend). The Lakebase instance, logical Postgres database, and surrounding UC catalog are declared as bundle resources and provisioned by `databricks bundle deploy`. They carry `lifecycle.prevent_destroy: true` so a `bundle destroy` cannot drop them and wipe OLTP state — see [Stateful storage and destroy protection](#stateful-storage-and-destroy-protection). + +### The catalog must already exist + +The bundle **does not create the catalog itself** — that's deliberate. Catalogs are typically owned by a governance team and creating them requires `CREATE CATALOG` on the metastore. Pick an existing catalog you (or an admin group you're in) have rights on, and set `catalog_name` in [Step 4](#step-4-configure-databricksyml). The bundle creates the schemas (`dqx_studio`, `dqx_studio_tmp`) and the wheels volume *inside* that catalog. The Lakebase-backed UC catalog (`dqx_studio_lakebase` by default) is created at the metastore level by the `database_catalogs` resource — you need `CREATE CATALOG` on the metastore for the first deploy if it doesn't already exist. ## Step 1: Create a Service Principal @@ -28,7 +55,7 @@ The bundle requires a service principal to run the task-runner job. This is sepa 1. Go to **Settings → Identity and Access → Service Principals** 2. Click **Add service principal → Create new** 3. Give it a name (e.g., `dqx-task-runner-sp`) -4. Note the **Application ID** — you'll use it in Step 3 as `dqx_service_principal_application_id` +4. Note the **Application ID** — you'll use it in [Step 4](#step-4-configure-databricksyml) as `dqx_service_principal_application_id` 5. **Grant yourself (or the identity you'll deploy the bundle with) the `User` role on this new SP.** Open the SP you just created, go to the **Permissions** tab, click **Add permissions**, search for your user (or deploy-time principal), and assign the role **`User`** (equivalent to `servicePrincipal.user` in the SCIM API). This lets your deploying identity configure jobs with `run_as: service_principal_name` pointing at this SP. Without it, `databricks bundle deploy` will fail with a permission error when it tries to set up the task-runner job. @@ -44,7 +71,34 @@ The app uses On-Behalf-Of (OBO) tokens to access Unity Catalog resources with th Contact your workspace admin or enable it via the workspace settings if not already active. -## Step 3: Configure `databricks.yml` +## Step 3: Stateful storage and destroy protection + +DQX Studio's stateful resources — the two schemas (`dqx_studio`, `dqx_studio_tmp`), the wheels volume, the Lakebase instance, and the Lakebase logical Postgres database — are all declared as bundle resources in `app/databricks.yml`. Each one carries `lifecycle.prevent_destroy: true` (Databricks CLI 0.268+), which **blocks `databricks bundle destroy` from dropping the resource** and wiping the data. Use this command line to verify: + +```bash +grep -A1 'lifecycle:' app/databricks.yml | head +``` + +You'll see one `prevent_destroy: true` for each of: `schemas.main_schema`, `schemas.tmp_schema`, `volumes.wheels`, `database_instances.lakebase`, `database_catalogs.lakebase_db`. + +What this means in practice: + +- **Fresh workspace** — `databricks bundle deploy` creates everything in the right order (schemas → volume → Lakebase instance → Lakebase logical DB → SQL warehouse → job → app). No extra bootstrap step. +- **Existing workspace** where these resources were created out-of-band (e.g. from a previous version of this app that used a bootstrap script) — you must **bind** them into bundle management once per target. See [Migrating an existing workspace](#migrating-an-existing-workspace). +- **Schema drift** — if you change `catalog_name`, `schema_name`, or `lakebase_instance_name` in a way that would force the bundle to delete and recreate the resource, `prevent_destroy` blocks the destroy step and the deploy fails fast (good — the alternative is silent data loss). Treat those names as immutable. +- **Intentional teardown** — to drop a protected resource, remove `lifecycle.prevent_destroy: true` from `databricks.yml`, run `databricks bundle deployment unbind -t ` to detach it from bundle state, then destroy it manually. + +### Migrating an existing workspace + +If the workspace was previously deployed with the old bootstrap-script flow (or if the resources were created manually), `databricks bundle deploy` will fail with "already exists" / "Instance name is not unique" on the first run because it's trying to CREATE resources that already exist. Fix this in one command: + +```bash +make app-bind PROFILE= TARGET= +``` + +`make app-bind` invokes `app/scripts/bind_resources.sh`, which calls `databricks bundle deployment bind` for each stateful resource (one bind per target). After bind, `bundle deploy` sees the resource as already-managed and does diff-and-update instead of CREATE. Bind is a one-time operation per target — once bound, subsequent deploys don't need it. Re-running `make app-bind` on a fully-bound target is a safe no-op. + +## Step 4: Configure `databricks.yml` Update a deploy target. The minimum required is a `catalog_name` and `dqx_service_principal_application_id`; everything else has a sensible default and can be overridden per target. In `app/databricks.yml`: @@ -54,15 +108,8 @@ targets: workspace: profile: variables: - # Required catalog_name: dqx_service_principal_application_id: - - # Optional — uncomment and override defaults per target as needed - # admin_group: - # app_name: - # sql_warehouse_name: - # schema_name: presets: trigger_pause_status: PAUSED ``` @@ -78,25 +125,36 @@ All target-level variables, their defaults, and what they control: | `admin_group` | `admins` | No | Workspace group whose members get the in-app `ADMIN` role unconditionally (bootstrap admin path). The default `admins` is the built-in workspace admins group — every workspace admin becomes a DQX admin automatically. Override with a dedicated group (e.g. `dqx-admins-prod`) for narrower bootstrap access. Additional roles are assigned at runtime via the in-app Role Management UI. | | `app_name` | `dqx-studio` | No | Deployed Databricks App name. Override per target (e.g. `dqx-studio-dev`, `dqx-studio-prod`) when deploying multiple targets to the same workspace, or for personal sandboxes. | | `sql_warehouse_name` | `dqx-studio-sql-warehouse` | No | Deployed SQL warehouse name (the bundle creates an X-Small serverless warehouse for app queries). Override per target to avoid duplicates in shared workspaces. | -| `schema_name` | `dqx_app` | No | Main schema inside the catalog — holds rules, run history, role mappings, and other app state. Override only if you need a non-default schema layout. | +| `schema_name` | `dqx_studio` | No | Main schema — holds run history, profiling, metrics, quarantine, and OLTP fallback tables. Declared as `resources.schemas.main_schema` in the bundle with `lifecycle.prevent_destroy: true`. | +| `tmp_schema_name` | `dqx_studio_tmp` | No | Per-user temp-view schema. Declared as `resources.schemas.tmp_schema` with `lifecycle.prevent_destroy: true`. | +| `wheels_volume_name` | `wheels` | No | UC volume under `.` for the DQX + task-runner wheels. Declared as `resources.volumes.wheels` with `lifecycle.prevent_destroy: true`. | +| `lakebase_instance_name` | `dqx-studio-lakebase` | No | Lakebase Postgres instance for OLTP state. Declared as `resources.database_instances.lakebase` with `lifecycle.prevent_destroy: true`. Autoscaling by default per [Lakebase Autoscaling](https://docs.databricks.com/aws/en/oltp/upgrade-to-autoscaling). | +| `lakebase_database_name` | `dqx_studio` | No | Logical Postgres database inside the Lakebase instance. Created by `resources.database_catalogs.lakebase_db` (`create_database_if_not_exists: true`). | +| `lakebase_uc_catalog_name` | `dqx_studio_lakebase` | No | UC catalog created by the `database_catalogs` resource. The app connects to Postgres directly via psycopg, so this UC catalog is informational only — it lets you ad-hoc query the Postgres tables via UC SQL. | +| `lakebase_capacity` | `CU_1` | No | Lakebase compute capacity. Valid values: `CU_1`, `CU_2`, `CU_4`, `CU_8`. To resize an existing instance, change this value and redeploy. Bump up if Lakebase queries queue in the app logs. | -> **Note on duplicate names in Databricks:** SQL warehouses, jobs, and apps within the same workspace are tracked by ID, not by name, so technically duplicates are allowed. But operators browse the Jobs / Apps / Warehouses UI by name, so distinct names per target are strongly recommended when you deploy more than one target to the same workspace. +> **Note on duplicate names in Databricks:** SQL warehouses, jobs, and apps within the same workspace are tracked by ID, not by name, so technically duplicates are allowed. Lakebase database instances are looked up by name in the bootstrap script, so they're effectively unique-per-workspace. Operators browse the Jobs / Apps / Warehouses / Databases UI by name, so distinct names per target are strongly recommended when you deploy more than one target to the same workspace. -## Step 4: One-Command Deploy (recommended) +## Step 5: One-Command Deploy (recommended) Build, deploy, grant permissions, and start the app in a single command: ```bash +# Existing workspace where the storage was previously bootstrapped +# out-of-band (e.g. with the older bootstrap script): bind once. +make app-bind PROFILE= TARGET= + +# Every deploy (fresh or otherwise): make app-deploy PROFILE= TARGET= ``` -This runs the following steps automatically: -1. `make app-build` — builds the frontend and wheels -2. `databricks bundle deploy` — creates the warehouse, schemas, volume, job, and app -3. `app/scripts/post_deploy_grants.sh` — discovers both SPs and executes all `GRANT` statements -4. `databricks bundle run` — starts the app +`make app-deploy` runs the following steps automatically: +1. `make app-build` — builds the frontend and wheels. +2. `databricks bundle deploy` — provisions or updates the schemas, wheels volume, Lakebase instance, Lakebase logical Postgres database, SQL warehouse, task-runner job, and Databricks App in dependency order. Stateful resources carry `lifecycle.prevent_destroy: true` so a future destroy can't drop them — see [Step 3](#step-3-stateful-storage-and-destroy-protection). +3. `app/scripts/post_deploy_grants.sh` — discovers both service principals and executes the `GRANT` statements on the catalog, schemas, and volume (the auto-created app SP's UUID isn't known at bundle-write time, which is why grants live in a post-deploy script). Lakebase grants are handled by the bundle's `database` resource binding. +4. `databricks bundle run` — starts the app. -> **First start**: The app runs database migrations and uploads DQX wheels to the UC volume. If the task-runner job runs before the app has started at least once, it will fail to find its wheels. Wait for `"Uploaded databricks_labs_dqx-..."` in the logs before triggering runs. +> **First start**: The app runs both Delta and Lakebase database migrations on startup, and uploads DQX wheels to the UC volume. If the task-runner job runs before the app has started at least once, it will fail to find its wheels. Wait for `"Uploaded databricks_labs_dqx-..."` in the logs before triggering runs. If Lakebase is enabled, also wait for `"Lakebase OLTP routing enabled"` before opening the UI — the app falls back to UC-only mode if Lakebase init fails (logged as `"Lakebase initialisation failed — falling back to Delta for OLTP tables"`). ### Step-by-step alternative @@ -106,11 +164,15 @@ If you prefer to run each step individually: # Build make app-build -# Deploy the bundle +# (One-time, only on a workspace whose storage was created out-of-band) +make app-bind PROFILE= TARGET= + +# Deploy the bundle (creates / updates all resources, including +# schemas, volume, Lakebase instance and logical DB) cd app && databricks bundle deploy -p -t -# Grant permissions (auto-discovers SP IDs and catalog from bundle config) -make app-grant-permissions PROFILE= +# Grant permissions to the app SP (auto-discovered after deploy) +make app-grant-permissions PROFILE= TARGET= # Start the app cd app && databricks bundle run dqx-studio -p -t @@ -126,20 +188,22 @@ The grant script discovers both SPs automatically. If you need to run the SQL ma -- App service principal GRANT USE CATALOG ON CATALOG TO ``; -GRANT ALL PRIVILEGES ON SCHEMA .dqx_app TO ``; -GRANT ALL PRIVILEGES ON SCHEMA .dqx_app_tmp TO ``; -GRANT ALL PRIVILEGES ON VOLUME .dqx_app.wheels TO ``; +GRANT ALL PRIVILEGES ON SCHEMA .dqx_studio TO ``; +GRANT ALL PRIVILEGES ON SCHEMA .dqx_studio_tmp TO ``; +GRANT ALL PRIVILEGES ON VOLUME .dqx_studio.wheels TO ``; -- Job service principal (task runner) GRANT USE CATALOG ON CATALOG TO ``; -GRANT ALL PRIVILEGES ON SCHEMA .dqx_app TO ``; -GRANT ALL PRIVILEGES ON SCHEMA .dqx_app_tmp TO ``; -GRANT ALL PRIVILEGES ON VOLUME .dqx_app.wheels TO ``; +GRANT ALL PRIVILEGES ON SCHEMA .dqx_studio TO ``; +GRANT ALL PRIVILEGES ON SCHEMA .dqx_studio_tmp TO ``; +GRANT ALL PRIVILEGES ON VOLUME .dqx_studio.wheels TO ``; -- End users need USE CATALOG to create temporary views for dry runs GRANT USE CATALOG ON CATALOG TO `account users`; ``` +> **Lakebase grants are handled differently.** When Lakebase is enabled, the bundle binds the database to the app via a `database` resource block (`permission: CAN_CONNECT_AND_CREATE`). DABs translates that into the equivalent Postgres role grants automatically — there is no separate SQL to run. The first time the app connects, `PgMigrationRunner` creates its own schema and tables inside the Lakebase database. + To grant app access to end users, go to **Apps → `` → Permissions** and assign `Can Use`. Replace `` with the value of `app_name` configured for your target (default `dqx-studio`). Access the app at: @@ -147,6 +211,21 @@ Access the app at: https:///apps/ ``` +## Lakebase backend + +DQX Studio stores its **OLTP state** — rules catalog, app settings, RBAC, comments, schedule configs, and scheduler bookkeeping — in a Lakebase Postgres instance for sub-millisecond reads and to avoid SQL warehouse cold starts. **Append-mostly observability tables** (`dq_validation_runs`, `dq_profiling_results`, `dq_metrics`, `dq_quarantine_records`) live in Delta because they're written by the Spark task runner and queried by AI/BI dashboards. + +| Backend | Tables | Why | +|---|---|---| +| Delta Lake | `dq_validation_runs`, `dq_profiling_results`, `dq_quarantine_records`, `dq_metrics` | High-volume append; Spark task runner writes them; columnar reads. | +| Lakebase Postgres | `dq_app_settings`, `dq_role_mappings`, `dq_quality_rules`, `dq_quality_rules_history`, `dq_comments`, `dq_schedule_configs`, `dq_schedule_configs_history`, `dq_schedule_runs` | OLTP — sub-ms reads from FastAPI handlers, row-level upserts, primary keys. | + +The Lakebase instance, logical Postgres database, and surrounding UC catalog are declared as bundle resources (`database_instances.lakebase`, `database_catalogs.lakebase_db`) and provisioned by `databricks bundle deploy`. All three carry `lifecycle.prevent_destroy: true` — see [Step 3](#step-3-stateful-storage-and-destroy-protection). + +### Lakebase token rotation + +Lakebase OAuth tokens expire after one hour. The app's `PgExecutor` runs a background daemon thread that refreshes the password every `DQX_LAKEBASE_TOKEN_REFRESH_MINUTES` minutes (default 50). Existing connections age out via `psycopg_pool.ConnectionPool.max_lifetime` so a long-running app can stay up indefinitely without reconnecting. + ## (Optional) Expand OAuth Scopes > **Most deployments don't need this step.** The OAuth scopes configured automatically by DABs (`sql`, `catalog.catalogs:read`, `catalog.schemas:read`, `catalog.tables:read`, `serving.serving-endpoints`) plus the identity scopes Databricks Apps grants implicitly are sufficient for all DQX Studio features on a standard workspace. @@ -246,3 +325,32 @@ The task-runner job installs wheels from the UC volume. If the volume is empty t databricks apps logs -p # Look for: "Uploaded databricks_labs_dqx--py3-none-any.whl" ``` + +**App says `"schema dqx_studio does not exist"` (or similar) on first start:** +The schemas didn't deploy, or the bundle is pointing at a different catalog than the app. Confirm with `databricks bundle validate -p -t ` that `catalog_name` and `schema_name` resolve to the expected values, then redeploy: +```bash +make app-deploy PROFILE= TARGET= +``` + +**App logs `"Lakebase initialisation failed — falling back to Delta for OLTP tables"`:** +The app has degraded to UC-only mode. Confirm the Lakebase instance exists and is `AVAILABLE`: +```bash +databricks database list-database-instances -p +``` +If the instance is missing, re-run `databricks bundle deploy`. If it's there but the app SP doesn't have `CAN_CONNECT_AND_CREATE`, check the bundle's `database` resource block under `resources.apps.dqx-studio.resources` and redeploy. + +**`databricks bundle deploy` fails with `"already exists"` / `"Instance name is not unique"` on the first deploy of a target:** +The schemas, volume, or Lakebase instance were created out-of-band before this version of the bundle (e.g. by the older bootstrap script). Run the bind step once per target to adopt them into bundle management: +```bash +make app-bind PROFILE= TARGET= +make app-deploy PROFILE= TARGET= +``` +See [Migrating an existing workspace](#migrating-an-existing-workspace). + +If the conflict is specifically `"Instance name is not unique"` for the Lakebase instance and the instance does NOT appear in `databricks database list-database-instances`, it's likely in the ~7-day soft-delete retention window (the name stays reserved). Edit your target in `databricks.yml` and override `lakebase_instance_name: `, then deploy. + +**`databricks bundle destroy` fails with `"cannot destroy resource: prevent_destroy is set"`:** +This is the safety guard doing its job — see [Step 3](#step-3-stateful-storage-and-destroy-protection). To intentionally tear down a stateful resource, remove `lifecycle.prevent_destroy: true` from the relevant block in `databricks.yml`, run `databricks bundle deployment unbind -t ` to detach it from bundle state, then destroy it manually with `databricks schemas delete` / `databricks volumes delete` / `databricks database delete-database-instance`. + +**Lakebase queries time out / app logs show pool exhaustion:** +Bump `lakebase_capacity` from `CU_1` to `CU_2` (or higher) in `databricks.yml` and redeploy. You can also raise `DQX_LAKEBASE_POOL_MAX_SIZE` (default 10) on the app's environment if many concurrent requests are hitting the OLTP path. diff --git a/app/DEVELOPMENT.md b/app/DEVELOPMENT.md index 46ad7531f..e69d04be2 100644 --- a/app/DEVELOPMENT.md +++ b/app/DEVELOPMENT.md @@ -40,13 +40,29 @@ Create a file at `app/.env` (git-ignored) with the variables below, filling in y DATABRICKS_CONFIG_PROFILE= # matches a profile in ~/.databrickscfg DATABRICKS_WAREHOUSE_ID= DQX_CATALOG=dqx # Unity Catalog catalog name -DQX_SCHEMA=dqx_app # schema inside the catalog +DQX_SCHEMA=dqx_studio # schema inside the catalog DQX_JOB_ID= # required for profiler/dry-run -DQX_WHEELS_VOLUME=/Volumes/dqx/dqx_app/wheels # UC volume path; auto-set by DABs in production +DQX_WHEELS_VOLUME=/Volumes/dqx/dqx_studio/wheels # UC volume path; auto-set by DABs in production DQX_ADMIN_GROUP=admins # workspace group granted bootstrap Admin access + +# Lakebase (optional — leave DQX_LAKEBASE_INSTANCE_NAME empty to run OLTP tables on Delta locally) +DQX_LAKEBASE_INSTANCE_NAME= # e.g. dqx-studio-lakebase; empty = Delta-only mode +DQX_LAKEBASE_DATABASE_NAME=dqx_studio # database within the Lakebase instance +DQX_LAKEBASE_SCHEMA=public # Postgres schema (default: public) +DQX_LAKEBASE_POOL_MIN_SIZE=1 # psycopg connection pool floor +DQX_LAKEBASE_POOL_MAX_SIZE=10 # psycopg connection pool ceiling +DQX_LAKEBASE_TOKEN_REFRESH_MINUTES=50 # OAuth token refresh cadence (token expires at 60) ``` -`DQX_JOB_ID` and `DQX_WHEELS_VOLUME` are injected automatically when deployed via DABs. For local dev, set them manually if you want profiler and dry-run to work. +`DQX_JOB_ID`, `DQX_WHEELS_VOLUME`, `DQX_LAKEBASE_INSTANCE_NAME`, and `DQX_LAKEBASE_DATABASE_NAME` are injected automatically when deployed via DABs. For local dev, set them manually only if you want to exercise the corresponding feature locally: + +| Want to test... | Set... | +|---|---| +| Profiler / dry-run | `DQX_JOB_ID` (and the wheel volume must exist) | +| Lakebase OLTP path | `DQX_LAKEBASE_INSTANCE_NAME` (empty = falls back to Delta — fine for most local dev) | +| Wheel sync | `DQX_WHEELS_VOLUME` | + +> **Lakebase locally:** The same OAuth token-refresh logic that runs in production also runs locally. The app authenticates as your CLI user (via `databricks-sdk` default auth chain), so your CLI principal must have `CAN_CONNECT_AND_CREATE` on the Lakebase database. Easiest path: run the bundle once against your dev workspace so the bundle's `database` resource binds the permissions, then point `DQX_LAKEBASE_INSTANCE_NAME` at the deployed instance. ## 2. Install Dependencies @@ -139,7 +155,7 @@ The profiler creates a temporary view using your OBO token and submits a Databri If the wheel upload fails locally with a `403`, grant your user write access: ```bash -databricks volumes grant .dqx_app.wheels WRITE_VOLUME --user -p +databricks volumes grant .dqx_studio.wheels WRITE_VOLUME --user -p ``` ## Troubleshooting diff --git a/app/README.md b/app/README.md index 2e19d7270..6023dad85 100644 --- a/app/README.md +++ b/app/README.md @@ -29,9 +29,9 @@ Operations that must respect the logged-in user's permissions use the `X-Forward Operations the app owns and manages run as the app's own service principal: - **Job submission** for profiler and dry-run tasks -- **Rules catalog CRUD** (reading and writing the rules Delta table) -- **Schema migrations** (creating and evolving Delta tables) -- **App settings** (reading and writing settings from the Delta table) +- **Rules catalog CRUD** (reading and writing rules — Lakebase Postgres by default, Delta in fallback mode) +- **Schema migrations** (creating and evolving both Delta and Lakebase tables) +- **App settings** (reading and writing settings — Lakebase Postgres by default) - **Wheel upload** — on startup the app uploads DQX wheels to the UC volume and patches the task-runner job environment This ensures: @@ -73,32 +73,45 @@ On every cold start the FastAPI lifespan (`backend/app.py`) hashes the locally b - **`/api/v1/*`** — FastAPI handles all API requests - **`/*`** — FastAPI serves the compiled React SPA; TanStack Router handles client-side navigation -### Internal Storage +### Internal Storage (Hybrid Backend) -The app uses a dedicated catalog (selected at install time) with two schemas plus a wheels volume: +The app uses a **hybrid storage architecture**: high-volume append/analytical tables stay on Delta in Unity Catalog, while OLTP tables (rules catalog, app settings, RBAC, comments, schedule configs) live in **Lakebase Postgres** for sub-millisecond reads (see [DEPLOYMENT.md → Lakebase backend](DEPLOYMENT.md#lakebase-backend)). + +All stateful resources — schemas, wheels volume, Lakebase instance, and Lakebase logical Postgres database — are declared as bundle resources in `databricks.yml` with `lifecycle.prevent_destroy: true`. The bundle creates them on first deploy; `databricks bundle destroy` is blocked from dropping them. For workspaces where these resources were created out-of-band, run `make app-bind` once per target to adopt them into bundle management before the first deploy (see [DEPLOYMENT.md → Migrating an existing workspace](DEPLOYMENT.md#migrating-an-existing-workspace)). ``` -{catalog} - ├── dqx_app ← main schema (SP-managed via MigrationRunner) - │ ├── dq_app_settings ← key/value app configuration - │ ├── dq_quality_rules ← active/approved rules - │ ├── dq_quality_rules_history ← rule change audit log - │ ├── dq_role_mappings ← role → workspace group mappings (RBAC) - │ ├── dq_comments ← comment threads on rules/runs - │ ├── dq_profiling_results ← profiler runs (suggestions in generated_rules_json) - │ ├── dq_validation_runs ← dryrun + scheduled run lifecycle (1 row/run) - │ ├── dq_quarantine_records ← invalid rows captured by runs - │ ├── dq_metrics ← long-format observability events (N rows/run, - │ │ matches DQX OBSERVATION_TABLE_SCHEMA so - │ │ AI/BI dashboards target the spec directly) - │ ├── dq_schedule_configs ← per-schedule config (cron/interval, target rules) - │ ├── dq_schedule_configs_history ← schedule change audit log - │ ├── dq_schedule_runs ← scheduler last/next run state - │ └── dq_migrations ← migration version tracker - ├── dqx_app_tmp ← temp views created via OBO for profiler/dryrun jobs +{catalog} (Unity Catalog) + ├── dqx_studio ← main schema (provisioned out-of-band; tables managed by MigrationRunner) + │ ├── dq_profiling_results (Delta, always) profiler runs (suggestions in generated_rules_json) + │ ├── dq_validation_runs (Delta, always) dryrun + scheduled run lifecycle (1 row/run) + │ ├── dq_quarantine_records (Delta, always) invalid rows captured by runs + │ ├── dq_metrics (Delta, always) long-format observability events + │ │ (matches DQX OBSERVATION_TABLE_SCHEMA so AI/BI + │ │ dashboards target the spec directly) + │ ├── dq_app_settings (OLTP*) key/value app configuration + │ ├── dq_quality_rules (OLTP*) active/approved rules + │ ├── dq_quality_rules_history (OLTP*) rule change audit log + │ ├── dq_role_mappings (OLTP*) role → workspace group mappings (RBAC) + │ ├── dq_comments (OLTP*) comment threads on rules/runs + │ ├── dq_schedule_configs (OLTP*) per-schedule config (cron/interval, target rules) + │ ├── dq_schedule_configs_history (OLTP*) schedule change audit log + │ ├── dq_schedule_runs (OLTP*) scheduler last/next run state + │ └── dq_migrations ← Delta migration version tracker + ├── dqx_studio_tmp ← temp views created via OBO for profiler/dryrun jobs └── wheels (UC volume) ← DQX + task-runner wheels uploaded at app startup + +Lakebase (Postgres) — when enabled (default): + dqx-studio-lakebase (database_instance) + └── dqx_studio (database) + └── public (schema) ← provisioned by PgMigrationRunner on first start + ├── dq_app_settings, dq_role_mappings, dq_quality_rules, + ├── dq_quality_rules_history, dq_comments, dq_schedule_configs, + ├── dq_schedule_configs_history, dq_schedule_runs + └── dq_migrations ← Lakebase migration version tracker ``` +`(OLTP*)` = lives in **Lakebase Postgres** when `lakebase_instance_name` is set in `databricks.yml`, otherwise in **Delta** (the `v2: Delta OLTP fallback` migration). The split is invisible to service code: `SqlExecutor` (Delta) and `PgExecutor` (Lakebase) share an identical public surface — `execute`, `query`, `query_dicts`, `upsert`, plus the dialect helpers `q(identifier)`, `json_literal_expr(json_str)`, and `ts_text(col)` that emit dialect-correct SQL fragments. + ### Role-Based Access Control Roles (`ADMIN`, `RULE_APPROVER`, `RULE_AUTHOR`, `VIEWER`, plus the orthogonal `RUNNER`) are defined in `backend/common/authorization.py` and resolved from Databricks workspace-group membership in `dq_role_mappings` (plus the bootstrap `DQX_ADMIN_GROUP`). Routes enforce roles via `require_role(*roles)` from `backend/dependencies.py`. diff --git a/app/databricks.yml b/app/databricks.yml index 6b36c387e..6c075a76f 100644 --- a/app/databricks.yml +++ b/app/databricks.yml @@ -8,10 +8,10 @@ variables: default: "dqx" schema_name: description: "Name of the schema for DQX Studio storage" - default: "dqx_app" + default: "dqx_studio" tmp_schema_name: description: "Name of the schema for temporary views and intermediate processing" - default: "dqx_app_tmp" + default: "dqx_studio_tmp" admin_group: description: "Databricks workspace group name for bootstrap Admin access" default: "admins" @@ -27,6 +27,39 @@ variables: dqx_service_principal_application_id: description: "Application ID of the service principal for DQX tasks" default: "00000000-0000-0000-0000-000000000000" + # ------------------------------------------------------------------ + # Lakebase backend + # ------------------------------------------------------------------ + # The app stores its OLTP state (rules catalog, app settings, RBAC, + # comments, schedule configs, scheduler bookkeeping) in a Lakebase + # Postgres instance for sub-millisecond reads and to avoid SQL + # warehouse cold starts. Append-mostly observability tables + # (dq_validation_runs, dq_profiling_results, dq_metrics, + # dq_quarantine_records) stay in Delta — those are written by the + # Spark task runner and consumed by AI/BI dashboards. + # + # The Lakebase instance and its logical Postgres database are + # declared as bundle resources below with + # ``lifecycle.prevent_destroy: true``. New Lakebase instances are + # autoscaling by default (see + # https://docs.databricks.com/aws/en/oltp/upgrade-to-autoscaling). + lakebase_instance_name: + description: "Name of the Lakebase Postgres instance for OLTP state." + default: "dqx-studio-lakebase" + lakebase_database_name: + description: "Logical Postgres database the app connects to inside the Lakebase instance." + default: "dqx_studio" + lakebase_uc_catalog_name: + description: > + UC catalog created by the database_catalogs resource. ``database_catalogs`` + surfaces the Lakebase Postgres database in Unity Catalog (so it's queryable + via UC SQL) and is the only DAB resource that can create the logical + Postgres database — that's why we use it. The app itself connects to + Postgres directly via psycopg, so this UC catalog is informational only. + default: "dqx_studio_lakebase" + lakebase_capacity: + description: "Lakebase compute capacity (CU_1, CU_2, CU_4, CU_8)" + default: "CU_1" app_config: type: complex description: "Configuration for DQX Studio" @@ -55,6 +88,14 @@ variables: value: "${var.admin_group}" - name: "DQX_WHEELS_VOLUME" value: "/Volumes/${var.catalog_name}/${var.schema_name}/${var.wheels_volume_name}" + # Lakebase wiring. The instance and logical Postgres database + # are declared as bundle resources further down (with + # ``lifecycle.prevent_destroy: true`` so ``bundle destroy`` + # cannot drop them). + - name: "DQX_LAKEBASE_INSTANCE_NAME" + value: "${var.lakebase_instance_name}" + - name: "DQX_LAKEBASE_DATABASE_NAME" + value: "${var.lakebase_database_name}" sync: include: @@ -62,8 +103,20 @@ sync: artifacts: default: + # ``apx build`` injects a build timestamp into the application + # wheel's *filename* but leaves the wheel's internal METADATA + # untouched. pip reads the version from METADATA, so successive + # rebuilds (without a new git commit) all advertise the same + # version and pip silently skips reinstall in the Databricks App + # container's persistent venv — the running app keeps executing + # stale code. ``_align_wheel_version.py`` rewrites METADATA, + # RECORD and the dist-info dir to match the timestamped filename + # so every rebuild produces a unique package version and pip + # always installs fresh. build: > uv run apx build && + uv run python scripts/_align_wheel_version.py + $(ls -1 .build/databricks_labs_dqx_app-*.whl | head -1) && uv build ../ --wheel --out-dir .build/ && uv build tasks/ --wheel --out-dir .build/tasks/ && dqx_whl=$(ls -1 .build/databricks_labs_dqx-*.whl | head -1 | xargs basename) && @@ -108,6 +161,17 @@ resources: job: id: ${resources.jobs.dqx_task_runner.id} permission: "CAN_MANAGE" + # Lakebase Postgres for OLTP state (rules catalog, app + # settings, RBAC, comments, schedule configs, scheduler + # bookkeeping). The instance + logical Postgres database are + # declared as bundle resources below; this entry binds the app + # to them so DABs sets up CAN_CONNECT_AND_CREATE automatically. + - name: "dqx-lakebase" + description: "Lakebase Postgres backend for app OLTP state" + database: + database_name: ${resources.database_catalogs.lakebase_db.database_name} + instance_name: ${resources.database_instances.lakebase.name} + permission: "CAN_CONNECT_AND_CREATE" permissions: - group_name: "account users" @@ -125,50 +189,64 @@ resources: - group_name: "users" level: "CAN_USE" - volumes: - dqx_wheels: - catalog_name: ${var.catalog_name} - schema_name: ${var.schema_name} - name: ${var.wheels_volume_name} - volume_type: MANAGED - grants: - - principal: ${resources.apps.dqx-studio.id} - privileges: - - ALL_PRIVILEGES - - principal: ${var.dqx_service_principal_application_id} - privileges: - - ALL_PRIVILEGES - + # ------------------------------------------------------------------ + # Stateful storage (schemas, volume, Lakebase instance + database). + # + # Every entry below is tagged ``lifecycle.prevent_destroy: true`` so + # that ``databricks bundle destroy`` (or a deploy that would + # otherwise replace the resource) fails fast instead of dropping the + # backing data. To intentionally tear something down: remove the + # flag, ``databricks bundle deployment unbind ``, then destroy. + # + # For an existing workspace whose resources were provisioned + # out-of-band (e.g. from the previous bootstrap script), one-time + # binding is required before the first deploy — see ``make app-bind`` + # and DEPLOYMENT.md → "Migrating an existing workspace". + # ------------------------------------------------------------------ schemas: - dqx_schema: + main_schema: catalog_name: ${var.catalog_name} name: ${var.schema_name} - grants: - - principal: ${resources.apps.dqx-studio.id} - privileges: - - ALL_PRIVILEGES - - principal: ${var.dqx_service_principal_application_id} - privileges: - - ALL_PRIVILEGES - - dqx_tmp_schema: + comment: "DQX Studio main schema (rules, run history, profiling, metrics, quarantine, OLTP fallback)" + lifecycle: + prevent_destroy: true + + tmp_schema: catalog_name: ${var.catalog_name} name: ${var.tmp_schema_name} - grants: - - principal: ${resources.apps.dqx-studio.id} - privileges: - - ALL_PRIVILEGES - - MANAGE - - principal: ${var.dqx_service_principal_application_id} - privileges: - - ALL_PRIVILEGES - - MANAGE - # account users need CREATE_TABLE so the app can create per-user - # temporary views in this schema via OBO tokens. - - principal: "account users" - privileges: - - USE_SCHEMA - - CREATE_TABLE + comment: "DQX Studio temp schema for per-user dry-run views" + lifecycle: + prevent_destroy: true + + volumes: + wheels: + catalog_name: ${var.catalog_name} + schema_name: ${resources.schemas.main_schema.name} + name: ${var.wheels_volume_name} + volume_type: MANAGED + comment: "DQX Studio wheel storage for the task runner" + lifecycle: + prevent_destroy: true + + database_instances: + lakebase: + name: ${var.lakebase_instance_name} + capacity: ${var.lakebase_capacity} + lifecycle: + prevent_destroy: true + + database_catalogs: + # Creates the logical Postgres database AND a Unity Catalog catalog + # that surfaces it for ad-hoc UC SQL queries. The app itself + # connects to Postgres directly via psycopg; the UC catalog is + # informational and not on the request path. + lakebase_db: + database_instance_name: ${resources.database_instances.lakebase.name} + name: ${var.lakebase_uc_catalog_name} + database_name: ${var.lakebase_database_name} + create_database_if_not_exists: true + lifecycle: + prevent_destroy: true jobs: dqx_task_runner: @@ -223,18 +301,27 @@ resources: default: "unknown" targets: - dev: - workspace: - profile: - variables: - # Required - catalog_name: - dqx_service_principal_application_id: - - # Optional — uncomment and override defaults per target as needed - # admin_group: - # app_name: - # sql_warehouse_name: - # schema_name: - presets: - trigger_pause_status: PAUSED + # Template for adding a new target. Required variables are + # ``catalog_name`` and ``dqx_service_principal_application_id``; + # everything else has a sensible default in the ``variables:`` block + # at the top of this file and can be overridden per target below. + # + # example-target: + # workspace: + # profile: + # variables: + # catalog_name: + # dqx_service_principal_application_id: + # # Optional overrides: + # # admin_group: + # # app_name: + # # sql_warehouse_name: + # # schema_name: + # # tmp_schema_name: + # # wheels_volume_name: + # # lakebase_instance_name: + # # lakebase_database_name: + # # lakebase_uc_catalog_name: + # # lakebase_capacity: CU_1 | CU_2 | CU_4 | CU_8 + # presets: + # trigger_pause_status: PAUSED diff --git a/app/pyproject.toml b/app/pyproject.toml index 81c37e46e..d6679e9da 100644 --- a/app/pyproject.toml +++ b/app/pyproject.toml @@ -12,6 +12,11 @@ dependencies = [ "databricks-sdk~=0.73", "databricks-sql-connector[pyarrow]==4.2.5", "openpyxl>=3.1", + # Lakebase (Postgres) backend for OLTP state. ``binary`` ships a + # pre-built libpq so we don't need a system Postgres install at + # runtime; ``pool`` adds the threaded ConnectionPool used by + # PgExecutor. Both are optional features of the same package. + "psycopg[binary,pool]>=3.2", ] [dependency-groups] diff --git a/app/scripts/_align_wheel_version.py b/app/scripts/_align_wheel_version.py new file mode 100644 index 000000000..3e57331b3 --- /dev/null +++ b/app/scripts/_align_wheel_version.py @@ -0,0 +1,160 @@ +"""Re-align a wheel's METADATA / RECORD / dist-info dir to its filename. + +Background +---------- +``apx build`` post-processes the freshly-built application wheel by +injecting a build timestamp into the **filename** (e.g. renaming +``databricks_labs_dqx_app-0.13.0.post38.dev0+65c9602-py3-none-any.whl`` +to +``databricks_labs_dqx_app-0.13.0.post38.dev0+65c9602.post20260506102508-py3-none-any.whl``) +but does **not** update the wheel's internal ``METADATA``, ``RECORD`` or +``*.dist-info`` directory name. + +The result is a wheel whose filename advertises a new version each +build but whose package metadata still reports the unchanged +git-derived version. ``pip`` consults the metadata, sees the version +is already installed, and silently skips reinstall — so a Databricks +App container's persistent venv keeps running stale code on every +redeploy. + +This script repairs the wheel produced by ``apx build`` by: + +1. parsing the version segment out of the filename +2. unpacking the wheel +3. renaming the ``*.dist-info`` directory to use that version +4. rewriting ``METADATA`` so ``Version:`` matches +5. regenerating ``RECORD`` checksums +6. repacking the wheel in place + +After running this each rebuild produces a wheel with a unique +metadata version, forcing pip to install fresh code on every deploy. + +Usage +----- + python _align_wheel_version.py path/to/wheel.whl + +The script overwrites the input wheel; safe to run idempotently. +""" + +from __future__ import annotations + +import base64 +import hashlib +import re +import shutil +import sys +import tempfile +import zipfile +from pathlib import Path + +# Wheel filename format (PEP 427): +# {distribution}-{version}(-{build tag})?-{python tag}-{abi tag}-{platform tag}.whl +# Distribution and version may both contain dots; the version segment +# starts right after the leading "{distribution}-" and ends at the +# next "-py" / "-cp" / "-pp" tag. Everything after that until the +# trailing ``.whl`` is the compatibility tag triple. +_WHEEL_FILENAME_RE = re.compile( + r"^(?P[A-Za-z0-9_]+)-(?P[^-]+(?:\+[^-]+)?)-(?P(?:py|cp|pp|ip)\d.*?)\.whl$" +) + + +def _filename_version(wheel: Path) -> tuple[str, str]: + """Return ``(distribution, version)`` parsed from the wheel filename. + + Uses a regex rather than ``packaging.utils.parse_wheel_filename`` + because the latter normalises the distribution name and rejects + local versions that contain dots — both of which are present in the + wheels apx produces. + """ + m = _WHEEL_FILENAME_RE.match(wheel.name) + if not m: + raise SystemExit(f"unrecognised wheel filename: {wheel.name}") + return m.group("dist"), m.group("version") + + +def _record_entry(rel_path: str, data: bytes) -> str: + digest = base64.urlsafe_b64encode(hashlib.sha256(data).digest()).rstrip(b"=").decode() + return f"{rel_path},sha256={digest},{len(data)}" + + +def align(wheel_path: Path) -> None: + """Rewrite ``wheel_path`` so its metadata version matches its filename.""" + dist, filename_version = _filename_version(wheel_path) + + with tempfile.TemporaryDirectory() as raw_tmp: + tmp = Path(raw_tmp) + with zipfile.ZipFile(wheel_path, "r") as zf: + zf.extractall(tmp) + + old_dist_infos = [p for p in tmp.iterdir() if p.is_dir() and p.name.endswith(".dist-info")] + if not old_dist_infos: + raise SystemExit(f"no .dist-info dir in {wheel_path.name}") + if len(old_dist_infos) > 1: + raise SystemExit(f"multiple .dist-info dirs in {wheel_path.name}: {old_dist_infos}") + old_dist_info = old_dist_infos[0] + + # Hatchling preserves underscores in the distribution prefix + # of the dist-info directory, so reuse that prefix verbatim + # rather than recomputing from ``dist`` (which packaging.utils + # would normalise to dashes). The directory name is + # ``{prefix}-{version}.dist-info``; strip the literal suffix + # then split off the version. + stem = old_dist_info.name[: -len(".dist-info")] + existing_prefix = stem.rsplit("-", 1)[0] + new_dist_info = tmp / f"{existing_prefix}-{filename_version}.dist-info" + if new_dist_info.exists(): + shutil.rmtree(new_dist_info) + old_dist_info.rename(new_dist_info) + + metadata = new_dist_info / "METADATA" + if not metadata.exists(): + raise SystemExit(f"missing METADATA in {wheel_path.name}") + text = metadata.read_text(encoding="utf-8") + new_text, n = re.subn( + r"^Version: .*$", + f"Version: {filename_version}", + text, + count=1, + flags=re.MULTILINE, + ) + if n == 0: + raise SystemExit(f"no Version: line in METADATA of {wheel_path.name}") + metadata.write_text(new_text, encoding="utf-8") + + record = new_dist_info / "RECORD" + record_rel = record.relative_to(tmp).as_posix() + entries: list[str] = [] + for f in sorted(tmp.rglob("*")): + if not f.is_file(): + continue + rel = f.relative_to(tmp).as_posix() + if rel == record_rel: + continue + entries.append(_record_entry(rel, f.read_bytes())) + entries.append(f"{record_rel},,") + record.write_text("\n".join(entries) + "\n", encoding="utf-8") + + tmp_wheel = wheel_path.with_suffix(wheel_path.suffix + ".tmp") + with zipfile.ZipFile(tmp_wheel, "w", zipfile.ZIP_DEFLATED) as zf: + for f in sorted(tmp.rglob("*")): + if f.is_file(): + zf.write(f, f.relative_to(tmp).as_posix()) + + tmp_wheel.replace(wheel_path) + + +def main(argv: list[str]) -> int: + if len(argv) != 2: + print(f"usage: {Path(argv[0]).name} ", file=sys.stderr) + return 2 + wheel = Path(argv[1]) + if not wheel.is_file(): + print(f"not a file: {wheel}", file=sys.stderr) + return 1 + align(wheel) + print(f" aligned wheel metadata to filename version: {wheel.name}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/app/scripts/bind_resources.sh b/app/scripts/bind_resources.sh new file mode 100755 index 000000000..a3cb10128 --- /dev/null +++ b/app/scripts/bind_resources.sh @@ -0,0 +1,112 @@ +#!/usr/bin/env bash +# +# One-time binding of pre-existing storage resources to the bundle. +# +# Use this when the schemas / volume / Lakebase instance / Lakebase +# logical database already exist in the target workspace (e.g. from +# the previous bootstrap-script flow, or from manual creation) and +# you're adopting them into the bundle for the first time. Without +# binding, ``databricks bundle deploy`` tries to CREATE the resources +# and fails with "already exists". +# +# Bind is idempotent at the CLI level; re-running this script on a +# fully-bound workspace is a no-op (the CLI replies "already bound"). +# +# Skip this script for fresh workspaces — ``databricks bundle deploy`` +# creates the resources directly. +# +# Usage: +# ./scripts/bind_resources.sh -p -t +# +# Requirements: +# - databricks CLI v0.268+ (lifecycle.prevent_destroy support) +# - jq installed + +set -euo pipefail + +PROFILE="" +TARGET="" + +usage() { + echo "Usage: $0 -p -t " + exit 1 +} + +while getopts "p:t:" opt; do + case $opt in + p) PROFILE="$OPTARG" ;; + t) TARGET="$OPTARG" ;; + *) usage ;; + esac +done + +[[ -z "$PROFILE" || -z "$TARGET" ]] && usage + +CLI="databricks -p $PROFILE" +BUNDLE_FLAGS=(-t "$TARGET") + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +BUNDLE_DIR="$(dirname "$SCRIPT_DIR")" +cd "$BUNDLE_DIR" + +# --------------------------------------------------------------------------- +# Read resource identifiers from the bundle config. Identifiers must +# match the ``name`` / ``catalog_name`` / etc. fields rendered for the +# selected target, so we ask the CLI for the resolved bundle. +# --------------------------------------------------------------------------- +BUNDLE_VALIDATE_STDERR=$(mktemp) +trap 'rm -f "$BUNDLE_VALIDATE_STDERR"' EXIT +if ! BUNDLE_JSON=$($CLI bundle validate "${BUNDLE_FLAGS[@]}" -o json 2>"$BUNDLE_VALIDATE_STDERR"); then + echo "ERROR: 'databricks bundle validate' failed:" >&2 + cat "$BUNDLE_VALIDATE_STDERR" >&2 + exit 1 +fi + +CATALOG=$(echo "$BUNDLE_JSON" | jq -r '.variables.catalog_name.value // .variables.catalog_name.default // empty') +SCHEMA=$(echo "$BUNDLE_JSON" | jq -r '.variables.schema_name.value // .variables.schema_name.default // "dqx_studio"') +TMP_SCHEMA=$(echo "$BUNDLE_JSON" | jq -r '.variables.tmp_schema_name.value // .variables.tmp_schema_name.default // "dqx_studio_tmp"') +VOLUME=$(echo "$BUNDLE_JSON" | jq -r '.variables.wheels_volume_name.value // .variables.wheels_volume_name.default // "wheels"') +LB_INSTANCE=$(echo "$BUNDLE_JSON" | jq -r '.variables.lakebase_instance_name.value // .variables.lakebase_instance_name.default // empty') +LB_UC_CATALOG=$(echo "$BUNDLE_JSON" | jq -r '.variables.lakebase_uc_catalog_name.value // .variables.lakebase_uc_catalog_name.default // empty') + +if [[ -z "$CATALOG" ]]; then + echo "ERROR: catalog_name is not configured in the bundle target." >&2 + exit 1 +fi + +echo "==> Binding pre-existing resources (target=$TARGET, profile=$PROFILE)" +echo " Catalog: $CATALOG" +echo " Main schema: $SCHEMA" +echo " Tmp schema: $TMP_SCHEMA" +echo " Volume: $VOLUME" +echo " Lakebase: $LB_INSTANCE" +echo " Lakebase UC cat: $LB_UC_CATALOG" +echo "" + +# ``databricks bundle deployment bind`` is interactive by default. We +# pipe ``yes`` so this runs unattended; the prompt only confirms that +# updates to the resource in the bundle will be applied to the +# existing remote resource on the next deploy — which is exactly what +# we want, so auto-confirming is safe. +bind() { + local key="$1" + local id="$2" + echo " binding ${key} -> ${id}" + if ! yes | $CLI bundle deployment bind "$key" "$id" "${BUNDLE_FLAGS[@]}"; then + echo " WARNING: bind for ${key} failed. It may already be bound, or the remote resource may not exist yet." >&2 + fi +} + +bind main_schema "${CATALOG}.${SCHEMA}" +bind tmp_schema "${CATALOG}.${TMP_SCHEMA}" +bind wheels "${CATALOG}.${SCHEMA}.${VOLUME}" + +if [[ -n "$LB_INSTANCE" ]]; then + bind lakebase "$LB_INSTANCE" +fi +if [[ -n "$LB_UC_CATALOG" ]]; then + bind lakebase_db "$LB_UC_CATALOG" +fi + +echo "" +echo "==> Bind complete. Run 'make app-deploy PROFILE=$PROFILE TARGET=$TARGET' next." diff --git a/app/scripts/post_deploy_grants.sh b/app/scripts/post_deploy_grants.sh index a09d8b23f..3cab55e59 100755 --- a/app/scripts/post_deploy_grants.sh +++ b/app/scripts/post_deploy_grants.sh @@ -91,8 +91,8 @@ fi echo " Warehouse: $WH_ID" CATALOG=$(echo "$BUNDLE_JSON" | jq -r '.variables.catalog_name.value // .variables.catalog_name.default // empty') -SCHEMA=$(echo "$BUNDLE_JSON" | jq -r '.variables.schema_name.value // .variables.schema_name.default // "dqx_app"') -TMP_SCHEMA=$(echo "$BUNDLE_JSON" | jq -r '.variables.tmp_schema_name.value // .variables.tmp_schema_name.default // "dqx_app_tmp"') +SCHEMA=$(echo "$BUNDLE_JSON" | jq -r '.variables.schema_name.value // .variables.schema_name.default // "dqx_studio"') +TMP_SCHEMA=$(echo "$BUNDLE_JSON" | jq -r '.variables.tmp_schema_name.value // .variables.tmp_schema_name.default // "dqx_studio_tmp"') VOLUME=$(echo "$BUNDLE_JSON" | jq -r '.variables.wheels_volume_name.value // .variables.wheels_volume_name.default // "wheels"') JOB_SP=$(echo "$BUNDLE_JSON" | jq -r '.variables.dqx_service_principal_application_id.value // .variables.dqx_service_principal_application_id.default // empty') diff --git a/app/src/databricks_labs_dqx_app/backend/CLAUDE.md b/app/src/databricks_labs_dqx_app/backend/CLAUDE.md index 0dd2651b4..245479fa8 100644 --- a/app/src/databricks_labs_dqx_app/backend/CLAUDE.md +++ b/app/src/databricks_labs_dqx_app/backend/CLAUDE.md @@ -12,11 +12,12 @@ backend/ ├── cache.py # CacheFactory — async in-memory TTL cache + @cached decorator ├── config.py # AppConfig (Pydantic BaseSettings, DQX_ env prefix) ├── dependencies.py # FastAPI Depends() — OBO/SP auth, RBAC, services -├── migrations/ # MigrationRunner — versioned DDL applied at startup +├── migrations/ # MigrationRunner (Delta) + PgMigrationRunner (Lakebase) ├── models.py # Pydantic request/response models ├── run_status_manager.py # Helpers for reading/updating dq_validation_runs status ├── settings.py # SettingsManager — per-user prefs in ~/.dqx/app.yml ├── sql_executor.py # SqlExecutor — Databricks Statement Execution API wrapper +├── pg_executor.py # PgExecutor — Lakebase Postgres wrapper (parity API w/ SqlExecutor) ├── sql_utils.py # Shared SQL helpers: escape_sql_string, validate_fqn, quote_fqn ├── runtime.py # Runtime singleton (lazy WorkspaceClient) ├── logger.py # Custom logging formatter @@ -168,3 +169,107 @@ uv run uvicorn databricks_labs_dqx_app.backend.app:app --reload # Dev server - **Scheduler:** runs in-process as an asyncio task, gated by an exclusive file lock (`/tmp/.dqx_scheduler.lock`) so only one uvicorn worker drives it. Disable with `DQX_SCHEDULER_DISABLED=1`. - **Caches:** `app_cache` (`cache.py`) is per-process in-memory with TTL. SP `WorkspaceClient`, OBO `WorkspaceClient`, and per-user catalog list are all cached. Use the `MISS` sentinel — never `is None` — to detect cache absence. - **SPA static files:** `spa_static.py` falls through to `index.html` only for non-asset paths (positive allowlist of asset extensions), so SPA routes containing dots still work. + +## Hybrid Storage Backend (Delta + Lakebase) + +The DQX Studio data model is split across two physical backends and the +choice is driven entirely by `databricks.yml`: + +| Backend | Tables | Why | +|---------|--------|-----| +| **Delta Lake** (always) | `dq_validation_runs`, `dq_profiling_results`, `dq_quarantine_records`, `dq_metrics` | Spark task runner writes these; high-volume append-mostly; columnar reads. | +| **Lakebase Postgres** *(default — opt-out via `lakebase_instance_name=""`)* | `dq_app_settings`, `dq_role_mappings`, `dq_quality_rules`, `dq_quality_rules_history`, `dq_comments`, `dq_schedule_configs`, `dq_schedule_configs_history`, `dq_schedule_runs` | Low-latency point reads/writes from FastAPI request handlers; row-level upserts; primary-key/foreign-key semantics. | + +When Lakebase is **disabled** (no `lakebase_instance_name` set), the OLTP +tables fall back to Delta — `MigrationRunner` runs both +`v1: Delta analytical baseline` *and* `v2: Delta OLTP fallback`. When +Lakebase is **enabled**, only `v1` runs on Delta and `PgMigrationRunner` +provisions the OLTP tables in Postgres. + +### Key types + +- `SqlExecutor` (`sql_executor.py`) wraps the Databricks Statement + Execution API for Delta. +- `PgExecutor` (`pg_executor.py`) wraps `psycopg` + a `psycopg_pool.ConnectionPool` + for Lakebase. It mirrors `SqlExecutor`'s public surface: `execute`, + `query`, `query_dicts`, `upsert`, plus the dialect helpers + `q(identifier)`, `json_literal_expr(json_str)`, `ts_text(col)`. A + background daemon thread refreshes the OAuth password every + `DQX_LAKEBASE_TOKEN_REFRESH_MINUTES` minutes (default 50; tokens + expire at 60). The pool's `kwargs["password"]` is mutated in place + so subsequent connects pick up the new credential, and existing + connections age out via `max_lifetime`. +- Services keep their `sql: SqlExecutor` annotation; the dependency + injection layer (`dependencies.get_sp_oltp_executor`) hands back + whichever executor is registered, casting to `SqlExecutor` because + the two classes share an identical method surface. +- The `SchedulerService` accepts `oltp_sql: SqlExecutor | PgExecutor | None` + and routes OLTP-table SQL (schedule configs, settings, rules) to + the OLTP executor while keeping retention/GC against the Delta + executor. + +### Retention sweep (daily) + +The scheduler runs a `DELETE` pass against the analytical tables once +per `_RETENTION_INTERVAL_HOURS` (24h). Two knobs, both stored in +`dq_app_settings` and surfaced via `GET/PUT /api/v1/config/retention`: + +| Setting key | Default | Tables affected | +|------------------------------|--------:|-----------------| +| `retention_days` | 90 | `dq_validation_runs`, `dq_profiling_results`, `dq_metrics`, plus the OLTP history tables (`dq_quality_rules_history`, `dq_schedule_configs_history`). Picked to match what trend dashboards expect. | +| `quarantine_retention_days` | 30 | `dq_quarantine_records` only. Tighter because that table holds the full source row payload (PII surface). | + +Both resolvers share a `_RETENTION_DAYS_MIN = 7` floor so a +mis-typed setting can never wipe data inside the safety window. Reads +swallow exceptions and fall back to the compiled-in default so a +SQL-warehouse hiccup never crashes the scheduler tick. + +### Writing portable SQL inside services + +Always go through the executor's dialect helpers — never hard-code +backticks, `parse_json(...)`, or `CAST(... AS STRING)`: + +```python +self._sql.q("check") # `check` (Delta) | "check" (Postgres) +self._sql.json_literal_expr(j) # parse_json('...') | '...'::jsonb +self._sql.ts_text("created_at") # CAST(created_at AS STRING) | created_at +``` + +For upserts, `SqlExecutor.upsert(table, key_cols, value_cols)` and +`PgExecutor.upsert` take the same arguments. Pass +`RawSql("current_timestamp()")` for timestamps — both backends rewrite +to their native syntax. + +### Bundle / DAB conventions + +All stateful resources are declared in `databricks.yml` with +`lifecycle.prevent_destroy: true` (Databricks CLI 0.268+): + +* `resources.schemas.main_schema` — `dqx_studio` schema +* `resources.schemas.tmp_schema` — `dqx_studio_tmp` schema +* `resources.volumes.wheels` — wheels volume +* `resources.database_instances.lakebase` — Lakebase Postgres instance + (autoscaling by default per [Lakebase Autoscaling](https://docs.databricks.com/aws/en/oltp/upgrade-to-autoscaling)) +* `resources.database_catalogs.lakebase_db` — logical Postgres database + via `create_database_if_not_exists: true`, plus a surrounding Unity + Catalog catalog (informational only; the app connects to Postgres + directly via psycopg) + +The app→database binding stays in `resources.apps.dqx-studio.resources`, +referencing the bundle resources so DABs orders the deploy correctly +(instance + logical DB created before the app binds to them). + +`prevent_destroy` blocks `databricks bundle destroy` and any deploy +that would force-replace the resource — the alternative is silent data +loss. To intentionally tear something down: remove the flag, run +`databricks bundle deployment unbind `, then destroy. + +For workspaces where the resources were provisioned out-of-band before +this layout existed (e.g. by the legacy bootstrap script), one-time +binding is required: `make app-bind PROFILE=... TARGET=...`. After bind, +`bundle deploy` adopts the existing resources instead of trying to +CREATE them. + +Privileges on UC objects for the auto-created app SP are still applied +by `scripts/post_deploy_grants.sh` after each deploy — the app SP's +UUID isn't known at bundle-write time. diff --git a/app/src/databricks_labs_dqx_app/backend/app.py b/app/src/databricks_labs_dqx_app/backend/app.py index 8ef44d5d7..81e2d54aa 100644 --- a/app/src/databricks_labs_dqx_app/backend/app.py +++ b/app/src/databricks_labs_dqx_app/backend/app.py @@ -10,9 +10,10 @@ from ._scheduler_registry import get_scheduler, set_scheduler from .config import conf -from .dependencies import get_sp_ws +from .dependencies import get_sp_ws, set_oltp_executor from .logger import logger from .migrations import MigrationRunner +from .migrations.postgres import PgMigrationRunner from .routes import api_router from .services.scheduler_service import SchedulerService from .services.view_service import mark_tmp_schema_ready @@ -173,12 +174,65 @@ async def lifespan(app: FastAPI): sp_ws = await get_sp_ws() wh_id = os.environ.get("DATABRICKS_WAREHOUSE_ID") or os.environ.get("DATABRICKS_SQL_WAREHOUSE_ID") or "" sp_sql = SqlExecutor(ws=sp_ws, warehouse_id=wh_id, catalog=conf.catalog, schema=conf.schema_name) + + # ------------------------------------------------------------------ + # Lakebase (optional) — open the pool, run Postgres migrations, and + # register the executor as the OLTP backend used by service DI. + # + # The block is entirely best-effort: if Lakebase is configured but + # the instance is misconfigured/down, we log loudly and fall back + # to UC-only mode so the app still serves. This matches how the + # rest of startup degrades (volume sync, scheduler) — partial + # functionality beats a hard crash loop. + # ------------------------------------------------------------------ + pg_executor = None + if conf.lakebase_enabled: + try: + from .pg_executor import build_pg_executor + + pg_executor = await asyncio.to_thread( + build_pg_executor, + sp_ws, + instance_name=conf.lakebase_instance_name, + database=conf.lakebase_database_name, + schema=conf.lakebase_schema_name, + token_refresh_minutes=conf.lakebase_token_refresh_minutes, + pool_min_size=conf.lakebase_pool_min_size, + pool_max_size=conf.lakebase_pool_max_size, + ) + pg_runner = PgMigrationRunner(pg_executor) + pg_applied = await asyncio.to_thread(pg_runner.run_all) + if pg_applied: + logger.info("Applied %d Lakebase migration(s)", pg_applied) + else: + logger.info("Lakebase schema is up to date") + set_oltp_executor(pg_executor) + logger.info( + "Lakebase OLTP routing enabled (instance=%s, database=%s, schema=%s)", + conf.lakebase_instance_name, + conf.lakebase_database_name, + conf.lakebase_schema_name, + ) + except Exception: + logger.exception( + "Lakebase initialisation failed — falling back to Delta for OLTP tables. " + "Verify the database_instance is provisioned and the app SP has CAN_CONNECT_AND_CREATE." + ) + pg_executor = None + set_oltp_executor(None) + else: + logger.info("Lakebase not configured (DQX_LAKEBASE_INSTANCE_NAME is empty). " "OLTP tables will live on Delta.") + set_oltp_executor(None) + + # Delta migrations always run, but the OLTP fallback DDL is + # skipped when Lakebase owns those tables — the same data model + # is created in Postgres above. runner = MigrationRunner(sql=sp_sql) - applied = runner.run_all() + applied = runner.run_all(include_oltp_fallback=pg_executor is None) if applied: - logger.info("Applied %d database migration(s)", applied) + logger.info("Applied %d Delta migration(s)", applied) else: - logger.info("Database schema is up to date") + logger.info("Delta schema is up to date") # Best-effort below — the app can recover from these failing. @@ -234,6 +288,7 @@ async def lifespan(app: FastAPI): schema=conf.schema_name, tmp_schema=conf.tmp_schema_name, job_id=conf.job_id, + oltp_sql=pg_executor, ) set_scheduler(_scheduler) _scheduler.start() @@ -252,6 +307,16 @@ async def lifespan(app: FastAPI): await sched.stop() set_scheduler(None) + # Close the Lakebase pool last so any in-flight writes from + # ``sched.stop()`` finish first. + if pg_executor is not None: + try: + await asyncio.to_thread(pg_executor.close) + logger.info("Lakebase connection pool closed") + except Exception: # noqa: BLE001 + logger.warning("Error closing Lakebase pool", exc_info=True) + set_oltp_executor(None) + app = FastAPI(title=f"{conf.app_name}", lifespan=lifespan) diff --git a/app/src/databricks_labs_dqx_app/backend/config.py b/app/src/databricks_labs_dqx_app/backend/config.py index c0c79c1d3..b0b9eee64 100644 --- a/app/src/databricks_labs_dqx_app/backend/config.py +++ b/app/src/databricks_labs_dqx_app/backend/config.py @@ -27,8 +27,8 @@ class AppConfig(BaseSettings): app_name: str = Field(default=app_name) api_prefix: str = Field(default="/api") catalog: str = Field(default="dqx") - schema_name: str = Field(default="dqx_app", validation_alias="DQX_SCHEMA") - tmp_schema_name: str = Field(default="dqx_app_tmp", validation_alias="DQX_TMP_SCHEMA") + schema_name: str = Field(default="dqx_studio", validation_alias="DQX_SCHEMA") + tmp_schema_name: str = Field(default="dqx_studio_tmp", validation_alias="DQX_TMP_SCHEMA") job_id: str = Field(default="", validation_alias="DQX_JOB_ID") wheels_volume: str = Field(default="", validation_alias="DQX_WHEELS_VOLUME") llm_endpoint: str = Field(default="databricks-claude-sonnet-4-5", validation_alias="DQX_LLM_ENDPOINT") @@ -42,10 +42,57 @@ class AppConfig(BaseSettings): dryrun_max_sample_size: int = Field(default=10_000) dryrun_default_sample_size: int = Field(default=1_000) + # ------------------------------------------------------------------ + # Lakebase (Postgres) backend + # ------------------------------------------------------------------ + # When ``lakebase_instance_name`` is set the OLTP-style tables + # (rules catalog, app settings, RBAC, comments, schedule configs, + # scheduler bookkeeping) are routed to a Lakebase Postgres instance + # instead of Delta. Bulk/append-only tables (validation runs, + # profiling results, metrics, quarantine records) always stay in + # Delta because they are written by the Spark task runner. + # + # Leaving these empty keeps the legacy "everything on Delta" + # behaviour, so existing deployments continue to work without + # changes. See ``app/databricks.yml`` for the deploy-time toggle. + lakebase_instance_name: str = Field( + default="", + validation_alias="DQX_LAKEBASE_INSTANCE_NAME", + description="Lakebase instance name. Empty disables Lakebase routing.", + ) + lakebase_database_name: str = Field( + default="dqx_studio", + validation_alias="DQX_LAKEBASE_DATABASE_NAME", + description="Database within the Lakebase instance the app connects to.", + ) + lakebase_schema_name: str = Field( + default="dqx_studio", + validation_alias="DQX_LAKEBASE_SCHEMA", + description="Postgres schema for app tables. Created at startup if missing.", + ) + lakebase_pool_min_size: int = Field(default=1, validation_alias="DQX_LAKEBASE_POOL_MIN_SIZE") + lakebase_pool_max_size: int = Field(default=10, validation_alias="DQX_LAKEBASE_POOL_MAX_SIZE") + # Lakebase OAuth tokens currently expire after one hour; refresh + # well before that so in-flight queries never see a 401. + lakebase_token_refresh_minutes: int = Field( + default=50, + validation_alias="DQX_LAKEBASE_TOKEN_REFRESH_MINUTES", + ) + @property def static_assets_path(self) -> Path: return Path(str(resources.files(app_slug))).joinpath("__dist__") + @property + def lakebase_enabled(self) -> bool: + """``True`` when the deployment was provisioned with Lakebase. + + Falls back to ``False`` (legacy UC-only mode) when the + instance name is empty so existing tests and dev setups keep + working with no Postgres dependency. + """ + return bool(self.lakebase_instance_name.strip()) + conf = AppConfig() diff --git a/app/src/databricks_labs_dqx_app/backend/dependencies.py b/app/src/databricks_labs_dqx_app/backend/dependencies.py index 9521af2d5..beff16cfc 100644 --- a/app/src/databricks_labs_dqx_app/backend/dependencies.py +++ b/app/src/databricks_labs_dqx_app/backend/dependencies.py @@ -4,7 +4,7 @@ import hashlib import os from collections.abc import Callable -from typing import TYPE_CHECKING, Annotated, Any +from typing import TYPE_CHECKING, Annotated, Any, cast if TYPE_CHECKING: from .common.connectors.sql import SQLConnector @@ -31,6 +31,43 @@ from .services.view_service import ViewService from .sql_executor import SqlExecutor +if TYPE_CHECKING: + from .pg_executor import PgExecutor + +# Type alias used by every OLTP-touching service: either the legacy +# Delta-backed :class:`SqlExecutor` or, when Lakebase is enabled, the +# :class:`PgExecutor`. The two classes share the public surface +# (``execute``, ``query``, ``query_dicts``, ``upsert``, ``q``, +# ``json_literal_expr``, ``ts_text``, ``dialect``) so service code can +# stay backend-agnostic. +OltpExecutor = "SqlExecutor | PgExecutor" + +# Process-wide OLTP executor (Lakebase Postgres). Constructed once at +# app startup by ``app.lifespan`` and re-used across all requests so +# the psycopg connection pool isn't rebuilt per call. ``None`` means +# Lakebase is not configured and the legacy Delta executor handles +# OLTP traffic instead. Lower-cased to keep basedpyright from +# flagging it as an immutable module-level constant. +_pg_executor: "SqlExecutor | PgExecutor | None" = None + + +def set_oltp_executor(executor: "SqlExecutor | PgExecutor | None") -> None: + """Register (or clear) the process-wide OLTP executor. + + Called from :func:`backend.app.lifespan` after the connection pool + is open. Keeping this in module state (rather than passing it + through every request) lets the FastAPI ``Depends`` graph stay + request-local while still sharing the pool. + """ + global _pg_executor + _pg_executor = executor + + +def get_oltp_executor() -> "SqlExecutor | PgExecutor | None": + """Return the registered OLTP executor or ``None`` if Lakebase is off.""" + return _pg_executor + + _SP_TTL = 45 * 60 # 45 minutes _OBO_TTL = 45 * 60 # 45 minutes @@ -116,6 +153,32 @@ async def get_obo_sql_executor( ) +async def get_sp_oltp_executor( + sp_sql: Annotated[SqlExecutor, Depends(get_sp_sql_executor)], +) -> SqlExecutor: + """Return the executor that owns the OLTP tables. + + When Lakebase is configured the lifespan handler registers a + :class:`backend.pg_executor.PgExecutor` via :func:`set_oltp_executor` + and we hand it back to every OLTP service. Otherwise we fall back + to the legacy Delta executor (``get_sp_sql_executor``) so existing + deployments keep working with no code changes on their side. + + The return type is annotated as :class:`SqlExecutor` so every + downstream service can keep its existing ``sql: SqlExecutor`` + parameter. :class:`PgExecutor` deliberately mirrors the public + surface (``execute``, ``query``, ``query_dicts``, ``upsert``, + ``q``, ``json_literal_expr``, ``ts_text``, ``dialect``) so the + cast is safe at the call sites we exercise — services only touch + that surface. See ``sql_executor.py`` and ``pg_executor.py`` for + the parity contract. + """ + pg = get_oltp_executor() + if pg is None: + return sp_sql + return cast(SqlExecutor, pg) + + # --------------------------------------------------------------------------- # Service factories # --------------------------------------------------------------------------- @@ -124,21 +187,26 @@ async def get_obo_sql_executor( async def get_migration_runner( sql: Annotated[SqlExecutor, Depends(get_sp_sql_executor)], ) -> MigrationRunner: - """Create a MigrationRunner using app (SP) credentials.""" + """Create the Delta MigrationRunner using app (SP) credentials. + + The Postgres :class:`PgMigrationRunner` is constructed separately + in :func:`backend.app.lifespan` because it needs the running + :class:`PgExecutor`, not a SQL warehouse executor. + """ return MigrationRunner(sql=sql) async def get_app_settings_service( - sql: Annotated[SqlExecutor, Depends(get_sp_sql_executor)], + sql: Annotated[SqlExecutor, Depends(get_sp_oltp_executor)], ) -> AppSettingsService: - """Create an AppSettingsService using app (SP) credentials.""" + """Create an AppSettingsService routed at the OLTP executor.""" return AppSettingsService(sql=sql) async def get_role_service( - sql: Annotated[SqlExecutor, Depends(get_sp_sql_executor)], + sql: Annotated[SqlExecutor, Depends(get_sp_oltp_executor)], ) -> RoleService: - """Create a RoleService using app (SP) credentials.""" + """Create a RoleService routed at the OLTP executor.""" return RoleService(sql=sql) @@ -155,9 +223,9 @@ async def get_ai_rules_service( async def get_rules_catalog_service( - sql: Annotated[SqlExecutor, Depends(get_sp_sql_executor)], + sql: Annotated[SqlExecutor, Depends(get_sp_oltp_executor)], ) -> RulesCatalogService: - """Create a RulesCatalogService using app (SP) credentials.""" + """Create a RulesCatalogService routed at the OLTP executor.""" return RulesCatalogService(sql=sql) @@ -184,16 +252,16 @@ async def get_view_service( async def get_comments_service( - sql: Annotated[SqlExecutor, Depends(get_sp_sql_executor)], + sql: Annotated[SqlExecutor, Depends(get_sp_oltp_executor)], ) -> CommentsService: - """Create a CommentsService using app (SP) credentials.""" + """Create a CommentsService routed at the OLTP executor.""" return CommentsService(sql=sql) async def get_schedule_config_service( - sql: Annotated[SqlExecutor, Depends(get_sp_sql_executor)], + sql: Annotated[SqlExecutor, Depends(get_sp_oltp_executor)], ) -> ScheduleConfigService: - """Create a ScheduleConfigService using app (SP) credentials.""" + """Create a ScheduleConfigService routed at the OLTP executor.""" return ScheduleConfigService(sql=sql) @@ -372,6 +440,9 @@ async def get_user_catalog_names( "get_obo_ws", "get_sp_sql_executor", "get_obo_sql_executor", + "get_sp_oltp_executor", + "get_oltp_executor", + "set_oltp_executor", "get_conf", "get_check_validator", "get_migration_runner", diff --git a/app/src/databricks_labs_dqx_app/backend/migrations/__init__.py b/app/src/databricks_labs_dqx_app/backend/migrations/__init__.py index e2f164f9f..03e914daf 100644 --- a/app/src/databricks_labs_dqx_app/backend/migrations/__init__.py +++ b/app/src/databricks_labs_dqx_app/backend/migrations/__init__.py @@ -1,16 +1,47 @@ -"""Database migration runner for DQX Studio. +"""Database migration runner for DQX Studio (Delta). Migrations are versioned DDL statements applied in order against the -configured catalog/schema. The runner tracks every applied version in a -``dq_migrations`` meta-table, so re-starting the app never re-applies a -migration that already succeeded. - -Schema baseline ---------------- -The app has not shipped to external users, so the schema is delivered -as a **single consolidated baseline** rather than 36 incremental -migrations. Every table is created at its final shape, with liquid -clustering inlined into the ``CREATE TABLE`` statement. +configured Unity Catalog catalog/schema. The runner tracks every +applied version in a ``dq_migrations`` meta-table, so re-starting the +app never re-applies a migration that already succeeded. + +Hybrid backend split +-------------------- +The schema is delivered in **two parts** so the OLTP-style tables can +optionally live on Lakebase Postgres while the high-volume analytical +tables stay in Delta: + +- **v1 — Delta analytical baseline** (always applied). Holds the + Spark-written tables: ``dq_validation_runs``, + ``dq_profiling_results``, ``dq_quarantine_records``, + ``dq_metrics``. +- **v2 — Delta OLTP fallback** (only applied when Lakebase is + disabled, i.e. ``include_oltp_fallback=True``). Holds the + FastAPI-served tables: ``dq_app_settings``, ``dq_quality_rules``, + ``dq_quality_rules_history``, ``dq_role_mappings``, ``dq_comments``, + ``dq_schedule_configs``, ``dq_schedule_configs_history``, + ``dq_schedule_runs``. + +When Lakebase is enabled the same OLTP tables are created via +:mod:`backend.migrations.postgres` against the Postgres schema and v2 +is skipped on the Delta side. + +Status casing convention +------------------------ +Two status families intentionally use different casing: + +- **Run-lifecycle** (``dq_validation_runs.status``, + ``dq_profiling_results.status``) — UPPERCASE + (``RUNNING``/``SUCCESS``/``FAILED``/``CANCELED``). These mirror the + Databricks Jobs SDK ``life_cycle_state`` / ``result_state`` values + that are passed straight through ``RunStatusOut`` to the frontend. +- **App-domain workflow** (``dq_quality_rules.status``, + ``dq_schedule_runs.status``) — lowercase. These are pure DQX + vocabulary (``draft``/``approved``, ``pending``/``partial_failure``) + with no SDK counterpart. + +CHECK constraints enforce the agreed values per domain — see each +table's ``chk_*_status`` constraint below. Adding a new table or schema change after baseline -------------------------------------------------- @@ -21,22 +52,16 @@ SQL warehouse versions; ``_apply`` instead catches and tolerates ``COLUMN_ALREADY_EXISTS`` so re-running is safe). -Example:: - - Migration( - version=2, - description="Add description column to dq_role_mappings", - sql_template=( - "ALTER TABLE {catalog}.{schema}.dq_role_mappings " - "ADD COLUMN description STRING" - ), - ) +If the change touches an OLTP table, mirror it in +:mod:`backend.migrations.postgres` so Lakebase deployments stay in +sync. Upgrading an existing dev workspace ----------------------------------- -A workspace that previously ran the legacy v1–v36 sequence will have -``dq_migrations`` rows for versions that no longer exist. The cleanest -path is:: +A workspace that previously ran the legacy migration sequence will have +``dq_migrations`` rows for versions that no longer exist, and tables +whose column types or constraints predate this baseline revision. The +cleanest path is:: DROP SCHEMA . CASCADE; @@ -70,183 +95,322 @@ class Migration: # Order is significant. Never change or remove existing entries — only # append new ones. # -# v1 is the consolidated baseline created on 2026-05-03 by collapsing -# the original v1–v36 incremental sequence. Each table is defined at -# its final shape with liquid clustering inlined; the legacy -# wide-format ``dq_metrics`` (renamed to ``dq_metrics_v1_legacy`` in -# the original v32) is dropped from the baseline because it has no -# clean-install consumer. +# v1 is the consolidated baseline. Each table is defined at its final +# shape with liquid clustering, primary keys, and CHECK constraints +# inlined. Revisions to the baseline are allowed (and encouraged) until +# the app ships externally; existing dev workspaces upgrade by +# ``DROP SCHEMA … CASCADE`` and re-running migrations from scratch. +# +# Notes on column choices: +# - PRIMARY KEY constraints are informational (``NOT ENFORCED RELY``); +# Delta uses them for query optimization, lineage, AI/BI tooling. +# - CHECK constraints ARE enforced — picking the right value-set on +# day one is cheap; loosening later is just an ALTER TABLE. +# - ``VARIANT`` (DBR 15.3+ / serverless) replaces ad-hoc JSON-in-string +# for the largest blob columns (``dq_quality_rules.check``, +# ``dq_quarantine_records.row_data``/``errors``). +# - Run-lifecycle ``status`` columns use UPPERCASE values to mirror the +# Databricks Jobs SDK; app-domain ``status`` columns use lowercase. +# v1 always runs against Delta and only contains the high-volume +# analytical tables that the Spark task runner writes. Keeping these +# in Delta lets AI/BI dashboards consume them directly via SQL +# warehouse without round-tripping through Postgres. +_V1_ANALYTICAL_BASELINE = ( + # Profiler runs — one row per profile job. Mutable lifecycle + # (RUNNING → SUCCESS/FAILED/CANCELED). Status values mirror + # Databricks Jobs SDK convention. + # + # NOTE: Delta only allows PRIMARY KEY and FOREIGN KEY constraints + # inline in CREATE TABLE — every CHECK constraint must be added + # via a separate ALTER TABLE … ADD CONSTRAINT statement after the + # table exists. The migration runner swallows + # ``DELTA_CONSTRAINT_ALREADY_EXISTS`` so re-runs are idempotent. + f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_profiling_results (" + " run_id STRING NOT NULL," + " requesting_user STRING," + " source_table_fqn STRING NOT NULL," + " view_fqn STRING," + " sample_limit INT," + " rows_profiled INT," + " columns_profiled INT," + " duration_seconds DOUBLE," + " summary_json STRING," + " generated_rules_json STRING," + " status STRING NOT NULL," + " error_message STRING," + " canceled_by STRING," + " updated_at TIMESTAMP," + " job_run_id BIGINT," + " rule_set_fingerprint STRING," + " created_at TIMESTAMP" + ") CLUSTER BY (source_table_fqn, run_id, created_at);" + f"ALTER TABLE {_PLACEHOLDER}.dq_profiling_results " + f" ADD CONSTRAINT chk_dq_profiling_results_status " + f" CHECK (status IN ('RUNNING','SUCCESS','FAILED','CANCELED'));" + # + # Validation (dryrun + scheduled) runs — one row per run, mutable + # lifecycle. Joins to ``dq_metrics`` on + # ``(run_id, rule_set_fingerprint)``. + f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_validation_runs (" + " run_id STRING NOT NULL," + " requesting_user STRING," + " source_table_fqn STRING NOT NULL," + " view_fqn STRING," + " checks_json STRING," + " sample_size INT," + " total_rows INT," + " valid_rows INT," + " invalid_rows INT," + " error_rows INT," + " warning_rows INT," + " error_summary_json STRING," + " sample_invalid_json STRING," + " status STRING NOT NULL," + " error_message STRING," + " canceled_by STRING," + " updated_at TIMESTAMP," + " run_type STRING," + " job_run_id BIGINT," + " rule_set_fingerprint STRING," + " created_at TIMESTAMP" + ") CLUSTER BY (source_table_fqn, run_id, created_at);" + f"ALTER TABLE {_PLACEHOLDER}.dq_validation_runs " + f" ADD CONSTRAINT chk_dq_validation_runs_status " + f" CHECK (status IN ('RUNNING','SUCCESS','FAILED','CANCELED'));" + f"ALTER TABLE {_PLACEHOLDER}.dq_validation_runs " + f" ADD CONSTRAINT chk_dq_validation_runs_run_type " + f" CHECK (run_type IS NULL OR run_type IN ('dryrun','scheduled','preview'));" + # + # Quarantined invalid rows captured during validation. ``row_data`` + # and ``errors`` are VARIANT for native JSON predicate pushdown and + # ~10x compression vs. STRING. + f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_quarantine_records (" + " quarantine_id STRING NOT NULL," + " run_id STRING NOT NULL," + " source_table_fqn STRING NOT NULL," + " requesting_user STRING," + " row_data VARIANT," + " errors VARIANT," + " warnings VARIANT," + " created_at TIMESTAMP," + " CONSTRAINT pk_dq_quarantine_records PRIMARY KEY (quarantine_id) RELY" + ") CLUSTER BY (run_id);" + # + # Long-format observability events written by DQMetricsObserver. + # Schema mirrors the public DQX OBSERVATION_TABLE_SCHEMA so AI/BI + # dashboard templates targeting the spec drop straight in. + f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_metrics (" + " run_id STRING NOT NULL," + " run_name STRING," + " input_location STRING," + " output_location STRING," + " quarantine_location STRING," + " checks_location STRING," + " rule_set_fingerprint STRING," + " metric_name STRING NOT NULL," + " metric_value STRING," + " run_time TIMESTAMP NOT NULL," + " error_column_name STRING," + " warning_column_name STRING," + " user_metadata MAP" + ") CLUSTER BY (input_location, run_id, run_time)" +) + + +# v2 is the Delta-only OLTP fallback. It is **only** applied when +# Lakebase is disabled (``include_oltp_fallback=True`` in +# :meth:`MigrationRunner.run_all`). When Lakebase is enabled, the same +# tables are created via :mod:`backend.migrations.postgres` against the +# Postgres backend. +_V2_OLTP_FALLBACK = ( + # Settings — single-row-per-key key/value store (workspace config, + # label catalog, custom metrics, timezone, ...). + f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_app_settings (" + " setting_key STRING NOT NULL," + " setting_value STRING," + " updated_at TIMESTAMP," + " updated_by STRING," + " CONSTRAINT pk_dq_app_settings PRIMARY KEY (setting_key) RELY" + ") CLUSTER BY (setting_key);" + # + # Active rule catalog. ``rule_id`` is a per-check stable identifier; + # each row holds exactly ONE check serialized as a VARIANT object + # (no array wrapper). ``source`` records which authoring path + # produced the rule. + f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_quality_rules (" + " rule_id STRING NOT NULL," + " table_fqn STRING NOT NULL," + " `check` VARIANT NOT NULL," + " version INT NOT NULL," + " status STRING NOT NULL," + " source STRING NOT NULL," + " created_by STRING," + " created_at TIMESTAMP," + " updated_by STRING," + " updated_at TIMESTAMP," + " CONSTRAINT pk_dq_quality_rules PRIMARY KEY (rule_id) RELY" + ") CLUSTER BY (table_fqn, status, rule_id);" + f"ALTER TABLE {_PLACEHOLDER}.dq_quality_rules " + f" ADD CONSTRAINT chk_dq_quality_rules_status " + f" CHECK (status IN ('draft','pending_approval','approved','rejected'));" + f"ALTER TABLE {_PLACEHOLDER}.dq_quality_rules " + f" ADD CONSTRAINT chk_dq_quality_rules_source " + f" CHECK (source IN ('ui','sql','profiler','import','ai'));" + # + # Append-only audit trail for rule changes. Carries the post-state + # ``check`` payload on every row plus an explicit + # ``prev_status``/``new_status`` pair for status transitions. + f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_quality_rules_history (" + " rule_id STRING," + " table_fqn STRING NOT NULL," + " `check` VARIANT," + " version INT," + " source STRING," + " action STRING NOT NULL," + " prev_status STRING," + " new_status STRING," + " changed_by STRING," + " changed_at TIMESTAMP" + ") CLUSTER BY (table_fqn, changed_at);" + # + # RBAC: maps app roles (admin/rule_approver/rule_author/viewer/ + # runner) to Databricks workspace groups. Tiny table — no + # clustering needed. + f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_role_mappings (" + " role STRING NOT NULL," + " group_name STRING NOT NULL," + " created_by STRING," + " created_at TIMESTAMP," + " updated_by STRING," + " updated_at TIMESTAMP," + " CONSTRAINT pk_dq_role_mappings PRIMARY KEY (role, group_name) RELY" + ");" + f"ALTER TABLE {_PLACEHOLDER}.dq_role_mappings " + f" ADD CONSTRAINT chk_dq_role_mappings_role " + f" CHECK (role IN ('admin','rule_approver','rule_author','viewer','runner'));" + # + # Per-entity comment threads (rules, runs, profiles, ...). + f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_comments (" + " comment_id STRING NOT NULL," + " entity_type STRING NOT NULL," + " entity_id STRING NOT NULL," + " user_email STRING NOT NULL," + " comment STRING NOT NULL," + " created_at TIMESTAMP," + " CONSTRAINT pk_dq_comments PRIMARY KEY (comment_id) RELY" + ") CLUSTER BY (entity_type, entity_id);" + f"ALTER TABLE {_PLACEHOLDER}.dq_comments " + f" ADD CONSTRAINT chk_dq_comments_entity_type " + f" CHECK (entity_type IN ('run','rule'));" + # + # Scheduler bookkeeping: last/next run pointer per schedule. + # ``status`` is app-domain (lowercase). + f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_schedule_runs (" + " schedule_name STRING NOT NULL," + " last_run_at TIMESTAMP," + " next_run_at TIMESTAMP," + " last_run_id STRING," + " status STRING," + " updated_at TIMESTAMP," + " CONSTRAINT pk_dq_schedule_runs PRIMARY KEY (schedule_name) RELY" + ") CLUSTER BY (schedule_name);" + f"ALTER TABLE {_PLACEHOLDER}.dq_schedule_runs " + f" ADD CONSTRAINT chk_dq_schedule_runs_status " + f" CHECK (status IS NULL OR status IN ('pending','success','partial_failure','failed'));" + # + # Per-schedule live config (cron/interval, scope filters). + f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_schedule_configs (" + " schedule_name STRING NOT NULL," + " config_json STRING NOT NULL," + " version INT NOT NULL," + " created_by STRING," + " created_at TIMESTAMP," + " updated_by STRING," + " updated_at TIMESTAMP," + " CONSTRAINT pk_dq_schedule_configs PRIMARY KEY (schedule_name) RELY" + ") CLUSTER BY (schedule_name);" + # + # Append-only audit trail for schedule changes. + f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_schedule_configs_history (" + " schedule_name STRING NOT NULL," + " config_json STRING," + " version INT," + " action STRING NOT NULL," + " changed_by STRING," + " changed_at TIMESTAMP" + ") CLUSTER BY (schedule_name, changed_at)" +) + + +# Backfills ``warning_rows`` on workspaces deployed before v1 added it. +# On fresh deploys ``_apply`` swallows the ``COLUMN_ALREADY_EXISTS`` error +# per the column-addition rule documented at the top of this module. +_V3_VALIDATION_RUNS_WARNING_ROWS = f"ALTER TABLE {_PLACEHOLDER}.dq_validation_runs " f" ADD COLUMN warning_rows INT" + + +# Quarantine rows that fail only warning-level checks would otherwise +# show an empty ``errors`` column in the UI. We mirror DQX's row-level +# ``_warnings`` map into its own VARIANT so warnings can be rendered +# alongside errors in the dry-run sample table. +_V4_QUARANTINE_WARNINGS = f"ALTER TABLE {_PLACEHOLDER}.dq_quarantine_records " f" ADD COLUMN warnings VARIANT" + + +# ``invalid_rows`` (set from ``invalid_df.count()``) conflated "rows that +# failed any check" with "rows with errors" — and could over-count when +# certain DQX checks fan out internally. ``error_rows`` is the +# authoritative count from the DQX observer (``error_row_count``), so the +# UI now surfaces it as the primary "Errors" stat. ``invalid_rows`` is +# kept for backwards compatibility but no longer drives the UI. +_V5_VALIDATION_RUNS_ERROR_ROWS = f"ALTER TABLE {_PLACEHOLDER}.dq_validation_runs " f" ADD COLUMN error_rows INT" + + +# OLTP fallback migration is identified by ``oltp_fallback=True`` so +# the runner can skip it when Lakebase is enabled. Keeping the flag on +# the migration itself (rather than e.g. a hard-coded version number) +# makes it easy to add follow-up Delta-only OLTP migrations later +# without re-discovering the rule. +@dataclass(frozen=True) +class DeltaMigration(Migration): + """Migration variant that knows whether it carries OLTP fallback DDL. + + A subclass (rather than a flag on :class:`Migration`) keeps + backwards compatibility for any callers that still hand-build + ``Migration`` instances and don't care about the flag. + """ + + oltp_fallback: bool = False + + MIGRATIONS: list[Migration] = [ - Migration( + DeltaMigration( version=1, - description="Baseline schema — all DQX Studio tables at their final shape", - sql_template=( - # Settings — single-row-per-key key/value store (workspace - # config, label catalog, custom metrics, timezone, ...). - f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_app_settings (" - " setting_key STRING NOT NULL," - " setting_value STRING," - " updated_at TIMESTAMP," - " updated_by STRING" - ") CLUSTER BY (setting_key);" - # - # Active rule catalog. ``rule_id`` is a per-check stable - # identifier; ``source`` records which authoring path - # produced the rule (single-table, sql, profiler, import). - f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_quality_rules (" - " table_fqn STRING NOT NULL," - " checks STRING NOT NULL," - " version INT," - " status STRING," - " source STRING," - " rule_id STRING," - " created_by STRING," - " created_at TIMESTAMP," - " updated_by STRING," - " updated_at TIMESTAMP" - ") CLUSTER BY (table_fqn, status);" - # - # Append-only audit trail for rule changes. - f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_quality_rules_history (" - " table_fqn STRING NOT NULL," - " checks STRING," - " version INT," - " source STRING," - " rule_id STRING," - " action STRING NOT NULL," - " changed_by STRING," - " changed_at TIMESTAMP" - ") CLUSTER BY (table_fqn, changed_at);" - # - # Profiler runs — one row per profile job. Mutable - # lifecycle (RUNNING → SUCCESS/FAILED/CANCELED). - f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_profiling_results (" - " run_id STRING NOT NULL," - " requesting_user STRING," - " source_table_fqn STRING NOT NULL," - " view_fqn STRING," - " sample_limit INT," - " rows_profiled INT," - " columns_profiled INT," - " duration_seconds DOUBLE," - " summary_json STRING," - " generated_rules_json STRING," - " status STRING," - " error_message STRING," - " canceled_by STRING," - " updated_at STRING," - " job_run_id BIGINT," - " rule_set_fingerprint STRING," - " created_at STRING" - ") CLUSTER BY (source_table_fqn, created_at);" - # - # Validation (dryrun + scheduled) runs — one row per run, - # mutable lifecycle. Joins to ``dq_metrics`` on - # ``(run_id, rule_set_fingerprint)``. - f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_validation_runs (" - " run_id STRING NOT NULL," - " requesting_user STRING," - " source_table_fqn STRING NOT NULL," - " view_fqn STRING," - " checks_json STRING," - " sample_size INT," - " total_rows INT," - " valid_rows INT," - " invalid_rows INT," - " error_summary_json STRING," - " sample_invalid_json STRING," - " status STRING," - " error_message STRING," - " canceled_by STRING," - " updated_at STRING," - " run_type STRING," - " job_run_id BIGINT," - " rule_set_fingerprint STRING," - " created_at STRING" - ") CLUSTER BY (source_table_fqn, created_at);" - # - # RBAC: maps app roles (ADMIN/RULE_APPROVER/RULE_AUTHOR/ - # VIEWER/RUNNER) to Databricks workspace groups. - f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_role_mappings (" - " role STRING NOT NULL," - " group_name STRING NOT NULL," - " created_by STRING," - " created_at TIMESTAMP," - " updated_by STRING," - " updated_at TIMESTAMP" - ") CLUSTER BY (role);" - # - # Per-entity comment threads (rules, runs, profiles, ...). - f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_comments (" - " comment_id STRING NOT NULL," - " entity_type STRING NOT NULL," - " entity_id STRING NOT NULL," - " user_email STRING NOT NULL," - " comment STRING NOT NULL," - " created_at TIMESTAMP" - ") CLUSTER BY (entity_type, entity_id);" - # - # Quarantined invalid rows captured during validation. - f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_quarantine_records (" - " quarantine_id STRING NOT NULL," - " run_id STRING NOT NULL," - " source_table_fqn STRING NOT NULL," - " requesting_user STRING," - " row_data STRING," - " errors STRING," - " created_at STRING" - ") CLUSTER BY (run_id, source_table_fqn);" - # - # Long-format observability events written by - # DQMetricsObserver. Schema mirrors the public DQX - # OBSERVATION_TABLE_SCHEMA so AI/BI dashboard templates - # targeting the spec drop straight in. - f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_metrics (" - " run_id STRING NOT NULL," - " run_name STRING," - " input_location STRING," - " output_location STRING," - " quarantine_location STRING," - " checks_location STRING," - " rule_set_fingerprint STRING," - " metric_name STRING NOT NULL," - " metric_value STRING," - " run_time TIMESTAMP NOT NULL," - " error_column_name STRING," - " warning_column_name STRING," - " user_metadata MAP" - ") CLUSTER BY (input_location, run_time);" - # - # Scheduler bookkeeping: last/next run pointer per schedule. - f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_schedule_runs (" - " schedule_name STRING NOT NULL," - " last_run_at TIMESTAMP," - " next_run_at TIMESTAMP," - " last_run_id STRING," - " status STRING" - ");" - # - # Per-schedule live config (cron/interval, scope filters). - f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_schedule_configs (" - " schedule_name STRING NOT NULL," - " config_json STRING NOT NULL," - " version INT," - " created_by STRING," - " created_at TIMESTAMP," - " updated_by STRING," - " updated_at TIMESTAMP" - ") CLUSTER BY (schedule_name);" - # - # Append-only audit trail for schedule changes. - f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_schedule_configs_history (" - " schedule_name STRING NOT NULL," - " config_json STRING," - " version INT," - " action STRING NOT NULL," - " changed_by STRING," - " changed_at TIMESTAMP" - ") CLUSTER BY (schedule_name, changed_at)" - ), + description="Delta analytical baseline (validation, profiling, quarantine, metrics)", + sql_template=_V1_ANALYTICAL_BASELINE, + oltp_fallback=False, + ), + DeltaMigration( + version=2, + description="Delta OLTP fallback (rules, app settings, RBAC, schedules) — used only when Lakebase is disabled", + sql_template=_V2_OLTP_FALLBACK, + oltp_fallback=True, + ), + DeltaMigration( + version=3, + description="Add warning_rows column to dq_validation_runs (backfill for pre-v3 deploys)", + sql_template=_V3_VALIDATION_RUNS_WARNING_ROWS, + oltp_fallback=False, + ), + DeltaMigration( + version=4, + description="Add warnings VARIANT column to dq_quarantine_records (mirror DQX _warnings map)", + sql_template=_V4_QUARANTINE_WARNINGS, + oltp_fallback=False, + ), + DeltaMigration( + version=5, + description="Add error_rows column to dq_validation_runs (DQX error_row_count, replaces invalid_rows for UI)", + sql_template=_V5_VALIDATION_RUNS_ERROR_ROWS, + oltp_fallback=False, ), ] @@ -280,8 +444,21 @@ def __init__(self, sql: SqlExecutor) -> None: # Public API # ------------------------------------------------------------------ - def run_all(self) -> int: - """Ensure the schema exists and apply all pending migrations. + def run_all(self, *, include_oltp_fallback: bool = True) -> int: + """Ensure the schema exists and apply all pending Delta migrations. + + Parameters + ---------- + include_oltp_fallback: + When ``True`` (legacy mode, no Lakebase) all migrations + run including the OLTP fallback DDL (v2 in the baseline). + When ``False`` (Lakebase enabled) migrations marked with + ``oltp_fallback=True`` are skipped — the same tables are + created in Postgres via :class:`PgMigrationRunner` instead. + + Skipped migrations are *not* recorded as applied, so + disabling Lakebase later will cause them to run on the + next deploy and create the Delta-side tables on demand. Returns: The number of migrations applied in this invocation. @@ -300,6 +477,15 @@ def run_all(self) -> int: ) continue + if not include_oltp_fallback and isinstance(migration, DeltaMigration) and migration.oltp_fallback: + logger.info( + "Skipping Delta OLTP fallback migration v%d " + "(Lakebase enabled — these tables live in Postgres): %s", + migration.version, + migration.description, + ) + continue + logger.info( "Applying migration v%d: %s", migration.version, @@ -373,10 +559,27 @@ def _applied_at_map(self) -> dict[int, str]: # the whole migration to abort. _IDEMPOTENT_ERROR_FRAGMENTS = ( "COLUMN_ALREADY_EXISTS", + # Databricks Delta surfaces an ``ADD COLUMN`` that targets an + # already-present column as ``FIELD_ALREADY_EXISTS`` (singular). + # Older versions used the plural ``FIELDS_ALREADY_EXISTS``; + # keep both to defend against runtime wording drift across + # workspaces / DBR versions. + "FIELD_ALREADY_EXISTS", "FIELDS_ALREADY_EXISTS", "TABLE_OR_VIEW_ALREADY_EXISTS", "TABLE_ALREADY_EXISTS", "already has liquid clustering defined", + # CHECK constraints are added via ``ALTER TABLE … ADD CONSTRAINT`` + # in a separate statement after CREATE TABLE (Delta only allows + # PK/FK inline). When a previous migration attempt got past + # CREATE TABLE but failed before recording the version, the + # next run sees the constraint and emits this error — safe to + # swallow because the desired state is already in place. The + # second fragment guards against future error-message tweaks + # since Databricks has used both the SQLSTATE-prefixed code and + # plain English wording at different times. + "DELTA_CONSTRAINT_ALREADY_EXISTS", + "constraint already exists", ) def _apply(self, migration: Migration) -> None: diff --git a/app/src/databricks_labs_dqx_app/backend/migrations/postgres.py b/app/src/databricks_labs_dqx_app/backend/migrations/postgres.py new file mode 100644 index 000000000..5731380e7 --- /dev/null +++ b/app/src/databricks_labs_dqx_app/backend/migrations/postgres.py @@ -0,0 +1,294 @@ +"""Lakebase (Postgres) migration runner for DQX Studio OLTP tables. + +The OLTP subset of the schema (rules catalog, app settings, RBAC, +comments, schedule configs, scheduler bookkeeping) lives in Lakebase +Postgres when ``conf.lakebase_enabled`` is true. Append-mostly +analytical tables (``dq_validation_runs``, ``dq_profiling_results``, +``dq_metrics``, ``dq_quarantine_records``) stay in Delta because they +are written by the Spark task runner. + +The runner mirrors :class:`backend.migrations.MigrationRunner` so the +operational story (versioned, idempotent, recorded in a +``dq_migrations`` meta table) is identical regardless of backend. + +Schema mapping highlights +------------------------- +- Delta ``VARIANT`` (``check``, future blob columns) → Postgres + ``JSONB``. Both store JSON natively with predicate pushdown; JSONB + also supports GIN indexes for low-latency lookups. +- Delta ``TIMESTAMP`` → Postgres ``TIMESTAMPTZ``. Postgres' + timezone-naive ``TIMESTAMP`` would silently drop offsets and break + cross-region debugging. +- Delta ``BIGINT`` / ``INT`` / ``DOUBLE`` → ``BIGINT`` / ``INT`` / + ``DOUBLE PRECISION`` respectively. +- Delta ``CLUSTER BY`` is not exposed; Postgres uses indexes + declaratively where the access pattern justifies them. Each table + gets the small set of indexes the FastAPI services actually need. + +Adding a new migration +---------------------- +Append a new :class:`PgMigration` entry with the next monotonically +increasing version number. Postgres supports ``ALTER TABLE ... ADD +COLUMN IF NOT EXISTS`` natively so re-running is safe out of the box. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from typing import Protocol + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Migration protocol — keeps this module decoupled from PgExecutor so the +# unit tests can inject a mock with the same surface. +# --------------------------------------------------------------------------- + + +class _Executor(Protocol): + # Declared as properties so concrete classes are free to expose + # ``schema``/``database`` as either plain attributes or + # ``@property`` methods. Plain ``schema: str`` would have made + # property-based exposure incompatible (basedpyright treats the + # plain form as invariant write-able, while ``@property`` is + # readable-only). + @property + def schema(self) -> str: ... + @property + def database(self) -> str: ... + + def execute(self, sql: str, *, timeout_seconds: int = 120) -> None: ... + def query(self, sql: str, *, timeout_seconds: int = 120) -> list[list[str]]: ... + + +@dataclass(frozen=True) +class PgMigration: + version: int + description: str + sql: str + + +# ``{schema}`` is substituted at apply-time so a single migration list +# can be re-targeted for tests/dev/prod without copy-paste. +_S = "{schema}" + + +PG_MIGRATIONS: list[PgMigration] = [ + PgMigration( + version=1, + description="Lakebase OLTP baseline (app_settings, rules, role mappings, comments, schedules)", + sql=( + # ---------------------------------------------------------- + # dq_app_settings — single-row-per-key KV store. + # ---------------------------------------------------------- + f"CREATE TABLE IF NOT EXISTS {_S}.dq_app_settings (" + " setting_key TEXT PRIMARY KEY," + " setting_value TEXT," + " updated_at TIMESTAMPTZ," + " updated_by TEXT" + ");" + # ---------------------------------------------------------- + # dq_quality_rules — active rule catalog. + # ---------------------------------------------------------- + f"CREATE TABLE IF NOT EXISTS {_S}.dq_quality_rules (" + " rule_id TEXT PRIMARY KEY," + " table_fqn TEXT NOT NULL," + ' "check" JSONB NOT NULL,' + " version INTEGER NOT NULL," + " status TEXT NOT NULL," + " source TEXT NOT NULL," + " created_by TEXT," + " created_at TIMESTAMPTZ," + " updated_by TEXT," + " updated_at TIMESTAMPTZ," + " CONSTRAINT chk_dq_quality_rules_status " + " CHECK (status IN ('draft','pending_approval','approved','rejected'))," + " CONSTRAINT chk_dq_quality_rules_source " + " CHECK (source IN ('ui','sql','profiler','import','ai'))" + ");" + # Two read-paths dominate: by table_fqn (rules-list page) and + # by status filter (review queue). One composite index covers + # both since Postgres can use a leading-column-only scan. + f"CREATE INDEX IF NOT EXISTS idx_dq_quality_rules_table_status " + f" ON {_S}.dq_quality_rules (table_fqn, status);" + # ---------------------------------------------------------- + # dq_quality_rules_history — append-only audit trail. + # ---------------------------------------------------------- + f"CREATE TABLE IF NOT EXISTS {_S}.dq_quality_rules_history (" + " history_id BIGSERIAL PRIMARY KEY," + " rule_id TEXT," + " table_fqn TEXT NOT NULL," + ' "check" JSONB,' + " version INTEGER," + " source TEXT," + " action TEXT NOT NULL," + " prev_status TEXT," + " new_status TEXT," + " changed_by TEXT," + " changed_at TIMESTAMPTZ" + ");" + f"CREATE INDEX IF NOT EXISTS idx_dq_quality_rules_history_rule_changed_at " + f" ON {_S}.dq_quality_rules_history (rule_id, changed_at DESC);" + f"CREATE INDEX IF NOT EXISTS idx_dq_quality_rules_history_table_changed_at " + f" ON {_S}.dq_quality_rules_history (table_fqn, changed_at DESC);" + # ---------------------------------------------------------- + # dq_role_mappings — RBAC. + # ---------------------------------------------------------- + f"CREATE TABLE IF NOT EXISTS {_S}.dq_role_mappings (" + " role TEXT NOT NULL," + " group_name TEXT NOT NULL," + " created_by TEXT," + " created_at TIMESTAMPTZ," + " updated_by TEXT," + " updated_at TIMESTAMPTZ," + " PRIMARY KEY (role, group_name)," + " CONSTRAINT chk_dq_role_mappings_role " + " CHECK (role IN ('admin','rule_approver','rule_author','viewer','runner'))" + ");" + # ---------------------------------------------------------- + # dq_comments — per-entity comment threads. + # ---------------------------------------------------------- + f"CREATE TABLE IF NOT EXISTS {_S}.dq_comments (" + " comment_id TEXT PRIMARY KEY," + " entity_type TEXT NOT NULL," + " entity_id TEXT NOT NULL," + " user_email TEXT NOT NULL," + " comment TEXT NOT NULL," + " created_at TIMESTAMPTZ," + " CONSTRAINT chk_dq_comments_entity_type " + " CHECK (entity_type IN ('run','rule'))" + ");" + f"CREATE INDEX IF NOT EXISTS idx_dq_comments_entity_created_at " + f" ON {_S}.dq_comments (entity_type, entity_id, created_at DESC);" + # ---------------------------------------------------------- + # dq_schedule_runs — last/next-run pointer per schedule. + # ---------------------------------------------------------- + f"CREATE TABLE IF NOT EXISTS {_S}.dq_schedule_runs (" + " schedule_name TEXT PRIMARY KEY," + " last_run_at TIMESTAMPTZ," + " next_run_at TIMESTAMPTZ," + " last_run_id TEXT," + " status TEXT," + " updated_at TIMESTAMPTZ," + " CONSTRAINT chk_dq_schedule_runs_status " + " CHECK (status IS NULL OR status IN " + " ('pending','success','partial_failure','failed'))" + ");" + # The scheduler loop polls "next_run_at <= now() AND status + # IS NOT 'pending'" every tick; a btree index on + # next_run_at keeps that scan cheap as the schedule list + # grows. + f"CREATE INDEX IF NOT EXISTS idx_dq_schedule_runs_next_run_at " + f" ON {_S}.dq_schedule_runs (next_run_at);" + # ---------------------------------------------------------- + # dq_schedule_configs — live config per schedule. + # ---------------------------------------------------------- + f"CREATE TABLE IF NOT EXISTS {_S}.dq_schedule_configs (" + " schedule_name TEXT PRIMARY KEY," + " config_json TEXT NOT NULL," + " version INTEGER NOT NULL," + " created_by TEXT," + " created_at TIMESTAMPTZ," + " updated_by TEXT," + " updated_at TIMESTAMPTZ" + ");" + # ---------------------------------------------------------- + # dq_schedule_configs_history — append-only audit trail. + # ---------------------------------------------------------- + f"CREATE TABLE IF NOT EXISTS {_S}.dq_schedule_configs_history (" + " history_id BIGSERIAL PRIMARY KEY," + " schedule_name TEXT NOT NULL," + " config_json TEXT," + " version INTEGER," + " action TEXT NOT NULL," + " changed_by TEXT," + " changed_at TIMESTAMPTZ" + ");" + f"CREATE INDEX IF NOT EXISTS idx_dq_schedule_configs_history_schedule_changed_at " + f" ON {_S}.dq_schedule_configs_history (schedule_name, changed_at DESC);" + ), + ), +] + + +_META_TABLE = f"{_S}.dq_migrations" + + +class PgMigrationRunner: + """Applies pending Postgres migrations to the Lakebase OLTP schema. + + Mirrors :class:`backend.migrations.MigrationRunner` so the lifespan + handler can call ``run_all`` against either backend with the same + contract. The schema is created (``CREATE SCHEMA IF NOT EXISTS``) + so the very first deploy doesn't need a separate bootstrap step on + the Postgres side — Lakebase is provisioned bare with just a + ``public`` schema by default. + """ + + def __init__(self, executor: _Executor) -> None: + self._exec = executor + self._schema = executor.schema + self._meta_table = _META_TABLE.format(schema=self._schema) + + def run_all(self) -> int: + self._ensure_schema() + self._ensure_meta_table() + applied = self._applied_versions() + count = 0 + for migration in PG_MIGRATIONS: + if migration.version in applied: + logger.debug( + "Postgres migration v%d (%s) already applied", + migration.version, + migration.description, + ) + continue + logger.info( + "Applying Postgres migration v%d: %s", + migration.version, + migration.description, + ) + self._apply(migration) + count += 1 + return count + + def _ensure_schema(self) -> None: + # Quoting the schema name keeps mixed-case names safe and is a + # no-op for plain identifiers. + self._exec.execute(f'CREATE SCHEMA IF NOT EXISTS "{self._schema}"') + + def _ensure_meta_table(self) -> None: + sql = ( + f"CREATE TABLE IF NOT EXISTS {self._meta_table} (" + " version INTEGER PRIMARY KEY," + " description TEXT NOT NULL," + " applied_at TIMESTAMPTZ NOT NULL" + ")" + ).format(schema=self._schema) + self._exec.execute(sql) + + def _applied_versions(self) -> set[int]: + rows = self._exec.query(f"SELECT version FROM {self._meta_table} ORDER BY version") + return {int(row[0]) for row in rows} + + def _apply(self, migration: PgMigration) -> None: + formatted = migration.sql.format(schema=self._schema) + # Postgres supports compound statements per ``execute`` call, + # but breaking on ``;`` keeps the error trace pinned to the + # specific DDL statement that failed — much easier to debug. + for stmt in formatted.split(";"): + stmt = stmt.strip() + if stmt: + self._exec.execute(stmt) + + # ANSI-escape the description rather than parameterising the + # INSERT — the executor surface intentionally doesn't expose + # parameter binding to keep parity with SqlExecutor. + escaped_desc = migration.description.replace("'", "''") + self._exec.execute( + f"INSERT INTO {self._meta_table} (version, description, applied_at) " + f"VALUES ({migration.version}, '{escaped_desc}', CURRENT_TIMESTAMP)" + ) + logger.info("Postgres migration v%d applied", migration.version) diff --git a/app/src/databricks_labs_dqx_app/backend/models.py b/app/src/databricks_labs_dqx_app/backend/models.py index a0c176bf1..8b4d02920 100644 --- a/app/src/databricks_labs_dqx_app/backend/models.py +++ b/app/src/databricks_labs_dqx_app/backend/models.py @@ -141,7 +141,11 @@ class DryRunSubmitOut(BaseModel): class DryRunOut(BaseModel): total_rows: int valid_rows: int + # ``invalid_rows`` is kept for backwards compatibility but is no longer + # the primary count surfaced in the UI — see ``error_rows`` below. invalid_rows: int + error_rows: int = 0 + warning_rows: int = 0 error_summary: list[dict[str, Any]] sample_invalid: list[dict[str, Any]] @@ -261,6 +265,10 @@ class DryRunResultsOut(BaseModel): total_rows: int | None = None valid_rows: int | None = None invalid_rows: int | None = None + # ``error_rows`` / ``warning_rows`` are the authoritative DQX observer + # counts; ``invalid_rows`` is kept for backwards compatibility only. + error_rows: int | None = None + warning_rows: int | None = None error_summary: list[dict[str, Any]] = Field(default_factory=list) sample_invalid: list[dict[str, Any]] = Field(default_factory=list) @@ -277,6 +285,8 @@ class ValidationRunSummaryOut(BaseModel): run_type: str | None = None valid_rows: int | None = None invalid_rows: int | None = None + error_rows: int | None = None + warning_rows: int | None = None created_at: str | None = None error_message: str | None = None checks: list[dict[str, Any]] = Field(default_factory=list) @@ -328,6 +338,7 @@ class QuarantineRecordOut(BaseModel): requesting_user: str | None = None row_data: dict[str, Any] | None = None errors: list[Any] | None = None + warnings: list[Any] | None = None created_at: str | None = None diff --git a/app/src/databricks_labs_dqx_app/backend/pg_executor.py b/app/src/databricks_labs_dqx_app/backend/pg_executor.py new file mode 100644 index 000000000..28029e21b --- /dev/null +++ b/app/src/databricks_labs_dqx_app/backend/pg_executor.py @@ -0,0 +1,429 @@ +"""Lakebase Postgres executor. + +Mirrors the public surface of :class:`SqlExecutor` so that services can +target either Delta (via SQL warehouse) or Lakebase (via psycopg) with +the same call signatures. + +Why a separate class instead of a generic SQLAlchemy abstraction? +We need extremely tight control over: + +- **OAuth token refresh.** Lakebase tokens expire after one hour. A + background daemon thread refreshes the password every + ``DQX_LAKEBASE_TOKEN_REFRESH_MINUTES`` minutes and the connection + pool's ``configure`` callback hands new connections the latest + password. Existing connections in the pool are recycled when they + exceed ``max_lifetime``. +- **String-typed result rows.** The legacy :class:`SqlExecutor` returns + ``list[list[str]]`` because the Statement Execution API serialises + cells via ``Format.JSON_ARRAY``. Service code expects to call + ``json.loads`` on JSON columns and to receive ISO-string timestamps. + We coerce psycopg's natively-typed results to that shape so existing + services work unchanged. +- **Dialect helpers.** :meth:`q` and :meth:`json_literal_expr` produce + Postgres-flavoured identifiers/literals so portable service SQL + doesn't need a dialect branch. +""" + +from __future__ import annotations + +import json +import logging +import threading +import time +import uuid +from datetime import date, datetime +from decimal import Decimal +from typing import Any + +from databricks.sdk import WorkspaceClient +from psycopg_pool import ConnectionPool + +from databricks_labs_dqx_app.backend.sql_executor import RawSql, _render_value +from databricks_labs_dqx_app.backend.sql_utils import escape_sql_string + +logger = logging.getLogger(__name__) + + +class _TokenHolder: + """Thread-safe container for the rotating Lakebase OAuth token. + + The connection pool's ``configure`` callback (called when each new + physical connection is opened) reads :attr:`token` so the *next* + connection always picks up the latest credential. Existing + connections continue to work — Postgres only validates the + password during the SCRAM handshake, not on every query. + """ + + def __init__(self, token: str) -> None: + self._token = token + self._lock = threading.Lock() + + @property + def token(self) -> str: + with self._lock: + return self._token + + @token.setter + def token(self, value: str) -> None: + with self._lock: + self._token = value + + +def _generate_token(ws: WorkspaceClient, instance_name: str) -> str: + """Generate a fresh Lakebase OAuth token (1-hour TTL).""" + cred = ws.database.generate_database_credential( + request_id=str(uuid.uuid4()), + instance_names=[instance_name], + ) + if not cred.token: + raise RuntimeError(f"Lakebase credential response had no token (instance={instance_name})") + return cred.token + + +def _to_text(value: Any) -> str | None: + """Coerce a psycopg cell value to Delta-compatible string output. + + - ``None`` stays ``None``. + - ``dict``/``list`` → compact JSON (matching Delta's ``to_json``). + - ``datetime``/``date`` → ISO 8601. + - ``bool`` → ``"true"``/``"false"`` (lowercase, JSON-style). + - ``Decimal``/``int``/``float`` → ``str(value)``. + - Everything else → ``str(value)``. + """ + if value is None: + return None + if isinstance(value, (dict, list)): + return json.dumps(value, separators=(",", ":")) + if isinstance(value, (datetime, date)): + return value.isoformat() + if isinstance(value, bool): + return "true" if value else "false" + if isinstance(value, Decimal): + return str(value) + if isinstance(value, (bytes, bytearray, memoryview)): + # Binary columns are not used in any OLTP table today; surface + # them as hex if a future migration introduces one. + return bytes(value).hex() + return str(value) + + +def _pg_render_value(value: Any) -> str: + """Postgres-flavoured literal renderer for :meth:`PgExecutor.upsert`. + + Behaves identically to :func:`backend.sql_executor._render_value` + except that: + + - :class:`RawSql("current_timestamp()")` is rewritten to + ``CURRENT_TIMESTAMP`` because Postgres rejects the parenthesised + Spark SQL form. Other ``RawSql`` payloads pass through verbatim + so callers can still inject Postgres-specific helpers like + ``now()`` or ``::jsonb`` casts. + - ``bool`` renders as ``TRUE``/``FALSE`` which Postgres accepts. + """ + if isinstance(value, RawSql): + expr = value.expr.strip() + # Common Spark idiom that doesn't parse in Postgres — translate. + if expr.lower() in {"current_timestamp()", "now()"}: + return "CURRENT_TIMESTAMP" + return value.expr + return _render_value(value) + + +class PgExecutor: + """Drop-in :class:`SqlExecutor` replacement backed by Lakebase Postgres. + + Constructed at app startup when ``conf.lakebase_enabled`` is true; + the lifespan handler kicks off the token-refresh thread and runs + the Postgres migrations once before traffic arrives. + """ + + dialect: str = "postgres" + + def __init__( + self, + *, + ws: WorkspaceClient, + instance_name: str, + database: str, + schema: str, + username: str, + host: str, + port: int = 5432, + token_refresh_minutes: int = 50, + pool_min_size: int = 1, + pool_max_size: int = 10, + ) -> None: + self._ws = ws + self._instance_name = instance_name + self._database = database + self._schema = schema + self._username = username + self._host = host + self._port = port + self._token_refresh_seconds = max(60, token_refresh_minutes * 60) + + # Bootstrap the first token before the pool starts so the very + # first connection has valid credentials. + self._token_holder = _TokenHolder(_generate_token(ws, instance_name)) + + # ``kwargs`` is the dict the pool hands to ``Connection.connect`` + # every time it opens a new physical connection. Mutating + # ``password`` on token refresh means subsequent connects pick + # up the fresh credential without restarting the pool. The + # ``options`` flag sets the Postgres ``search_path`` so + # unqualified table references resolve to the app schema. + self._connect_kwargs: dict[str, Any] = { + "host": host, + "port": port, + "dbname": database, + "user": username, + "password": self._token_holder.token, + "sslmode": "require", + "options": f"-c search_path={schema}", + } + + # ``max_lifetime`` recycles connections every 50 minutes which + # ensures we never hand out a connection authenticated with a + # near-expired token. ``check`` runs ``SELECT 1`` on idle pool + # members so a server-side disconnect doesn't poison the pool. + self._pool: ConnectionPool = ConnectionPool( + conninfo="", + min_size=pool_min_size, + max_size=pool_max_size, + max_lifetime=self._token_refresh_seconds, + check=ConnectionPool.check_connection, + open=False, # opened explicitly below so failures surface eagerly + kwargs=self._connect_kwargs, + timeout=30.0, + name="dqx-lakebase", + ) + self._pool.open(wait=True, timeout=30.0) + logger.info( + "Lakebase connection pool open (host=%s db=%s schema=%s user=%s)", + host, + database, + schema, + username, + ) + + self._stop = threading.Event() + self._refresher = threading.Thread( + target=self._token_refresh_loop, + name="dqx-lakebase-token-refresh", + daemon=True, + ) + self._refresher.start() + + # ------------------------------------------------------------------ + # Public API mirrors SqlExecutor + # ------------------------------------------------------------------ + + @property + def warehouse_id(self) -> str: + # Kept for type compatibility with ``SqlExecutor``; Lakebase + # has no warehouse concept. + return "" + + @property + def catalog(self) -> str: + # Postgres has no Unity Catalog; return the database name so + # callers that build fully-qualified identifiers still get a + # 3-part name (``database.schema.table``) on Postgres. + return self._database + + @property + def schema(self) -> str: + return self._schema + + @property + def database(self) -> str: + return self._database + + def q(self, identifier: str) -> str: + """Quote a Postgres identifier (ANSI double quotes, doubled internal ``"``).""" + return '"' + identifier.replace('"', '""') + '"' + + def json_literal_expr(self, json_str: str) -> str: + """Return a Postgres expression that yields a JSONB value for *json_str*.""" + return f"'{escape_sql_string(json_str)}'::jsonb" + + def ts_text(self, col: str) -> str: + """Project a timestamp column as a string. + + Postgres TIMESTAMPTZ values are converted to ISO strings by + :func:`_to_text` when the row leaves the cursor, so we just + select the column verbatim and let the row-level coercion do + the work. This keeps service SQL portable: callers always + write ``executor.ts_text('created_at')`` regardless of dialect. + """ + return col + + def execute(self, sql: str, *, timeout_seconds: int = 120) -> None: # noqa: ARG002 - parity with SqlExecutor + """Run a non-result-returning statement.""" + with self._pool.connection() as conn: + with conn.cursor() as cur: + # psycopg accepts ``str`` at runtime (``Query`` is a + # union of bytes/str/Composable/SQL); the published + # stubs only declare the ``Template`` overload so we + # silence basedpyright here. + cur.execute(sql) # pyright: ignore[reportCallIssue, reportArgumentType] + conn.commit() + + def execute_no_schema(self, sql: str) -> None: + """Parity stub. Postgres has no per-statement catalog/schema context. + + Schemas are created via ``CREATE SCHEMA IF NOT EXISTS`` like any + other DDL, so we simply delegate to :meth:`execute`. + """ + self.execute(sql) + + def query(self, sql: str, *, timeout_seconds: int = 120) -> list[list[str]]: # noqa: ARG002 + """Run a query and return rows as lists of strings (Delta-compatible). + + The return type is annotated as ``list[list[str]]`` to mirror + :meth:`SqlExecutor.query`, but at runtime NULL cells surface + as ``None`` (just like the JSON_ARRAY response format used by + the Statement Execution API). Services already handle both + — e.g. ``int(row[0]) if row[0] else 0`` — so we keep the + Optional shape rather than coercing NULL to ``""``. + """ + with self._pool.connection() as conn: + with conn.cursor() as cur: + cur.execute(sql) # pyright: ignore[reportCallIssue, reportArgumentType] + rows = cur.fetchall() + return [[_to_text(cell) for cell in row] for row in rows] # pyright: ignore[reportReturnType] + + def query_dicts(self, sql: str, *, timeout_seconds: int = 120) -> list[dict[str, str | None]]: # noqa: ARG002 + """Run a query and return rows as ``{column: stringified value}`` dicts.""" + with self._pool.connection() as conn: + with conn.cursor() as cur: + cur.execute(sql) # pyright: ignore[reportCallIssue, reportArgumentType] + rows = cur.fetchall() + cols = [d.name for d in (cur.description or [])] + return [{col: _to_text(cell) for col, cell in zip(cols, row)} for row in rows] + + def upsert( + self, + table: str, + key_cols: dict[str, Any], + value_cols: dict[str, Any], + *, + timeout_seconds: int = 120, + ) -> None: + """Postgres ``INSERT ... ON CONFLICT ... DO UPDATE`` upsert. + + Keeps the same call shape as :meth:`SqlExecutor.upsert` so a + service can switch backends without code changes. The natural + key composing ``key_cols`` MUST have a UNIQUE/PRIMARY KEY index + in the migration. + """ + if not key_cols: + raise ValueError("upsert requires at least one key column") + + all_cols = list(key_cols.keys()) + list(value_cols.keys()) + all_vals = [_pg_render_value(v) for v in list(key_cols.values()) + list(value_cols.values())] + + # Natural-key columns get quoted via q() so reserved words like + # ``check`` survive. Service-provided keys are already validated + # in higher layers, but using q() also makes them dialect-safe. + quoted_cols = [self.q(c) for c in all_cols] + quoted_keys = [self.q(c) for c in key_cols] + update_set = ", ".join(f"{self.q(c)} = EXCLUDED.{self.q(c)}" for c in value_cols) + + if value_cols: + conflict_clause = f"ON CONFLICT ({', '.join(quoted_keys)}) DO UPDATE SET {update_set}" + else: + # Pure existence check — keys-only row, no update payload. + conflict_clause = f"ON CONFLICT ({', '.join(quoted_keys)}) DO NOTHING" + + sql = f"INSERT INTO {table} ({', '.join(quoted_cols)}) " f"VALUES ({', '.join(all_vals)}) " f"{conflict_clause}" + self.execute(sql, timeout_seconds=timeout_seconds) + + # ------------------------------------------------------------------ + # Lifecycle + # ------------------------------------------------------------------ + + def close(self) -> None: + self._stop.set() + try: + self._pool.close() + except Exception: # noqa: BLE001 - best-effort shutdown + logger.warning("Error closing Lakebase pool", exc_info=True) + + def _token_refresh_loop(self) -> None: + """Background thread that rotates the Lakebase OAuth token. + + Runs every ``token_refresh_minutes`` and updates both the + shared :class:`_TokenHolder` and the pool's ``kwargs`` dict so + the very next physical connect uses the fresh credential. + Existing connections keep working until ``max_lifetime`` + recycles them — Postgres only validates the password during + the SCRAM handshake, not on every query. + """ + while not self._stop.is_set(): + # Wait first so we don't immediately re-issue a token after + # init. + if self._stop.wait(self._token_refresh_seconds): + return + try: + fresh = _generate_token(self._ws, self._instance_name) + self._token_holder.token = fresh + # Mutating the same dict the pool was constructed with + # is the supported way to inject rotating credentials + # (psycopg-pool re-reads ``kwargs`` on every connect). + self._connect_kwargs["password"] = fresh + logger.info("Lakebase OAuth token refreshed") + except Exception: # noqa: BLE001 + # Don't crash the app on a transient SDK failure — the + # next iteration retries and existing connections keep + # working until the previous token expires. + logger.warning("Failed to refresh Lakebase token; will retry", exc_info=True) + # Back off a bit so we don't tight-loop on persistent + # failures. + time.sleep(60) + + +# --------------------------------------------------------------------------- +# Construction helper +# --------------------------------------------------------------------------- + + +def build_pg_executor( + ws: WorkspaceClient, + *, + instance_name: str, + database: str, + schema: str, + token_refresh_minutes: int = 50, + pool_min_size: int = 1, + pool_max_size: int = 10, +) -> PgExecutor: + """Construct a :class:`PgExecutor` from a Databricks workspace client. + + Resolves the instance's read/write DNS endpoint and the calling + identity's username (service principal in production, real user + locally) before opening the pool. + """ + instance = ws.database.get_database_instance(name=instance_name) + host = instance.read_write_dns + if not host: + raise RuntimeError( + f"Lakebase instance {instance_name!r} has no read_write_dns. " "Is it provisioned and running?" + ) + + me = ws.current_user.me() + username = me.user_name or me.id or "" + if not username: + raise RuntimeError("Could not determine workspace identity for Lakebase connection") + + return PgExecutor( + ws=ws, + instance_name=instance_name, + database=database, + schema=schema, + username=username, + host=host, + token_refresh_minutes=token_refresh_minutes, + pool_min_size=pool_min_size, + pool_max_size=pool_max_size, + ) diff --git a/app/src/databricks_labs_dqx_app/backend/routes/v1/config.py b/app/src/databricks_labs_dqx_app/backend/routes/v1/config.py index f353b503c..17b5fa0c8 100644 --- a/app/src/databricks_labs_dqx_app/backend/routes/v1/config.py +++ b/app/src/databricks_labs_dqx_app/backend/routes/v1/config.py @@ -4,7 +4,7 @@ from fastapi import APIRouter, Depends, HTTPException -from databricks_labs_dqx_app.backend.common.authorization import UserRole +from databricks_labs_dqx_app.backend.common.authorization import UserRole, get_user_email from databricks_labs_dqx_app.backend.dependencies import get_app_settings_service, require_role from databricks_labs_dqx_app.backend.logger import logger from pydantic import BaseModel, Field @@ -20,6 +20,17 @@ _TZ_SETTING_KEY = "display_timezone" _TZ_DEFAULT = "UTC" +# Defaults for the retention sweep — kept in sync with +# ``backend.services.scheduler_service``. Imported lazily inside the +# route to avoid pulling the scheduler module into the import graph +# of routes that have no scheduler dependency. +_RETENTION_DAYS_DEFAULT = 90 +_QUARANTINE_RETENTION_DAYS_DEFAULT = 30 +_RETENTION_DAYS_MIN = 7 +# Generous upper bound — anything past ~3 years is almost certainly a +# typo, and lets the UI render a meaningful slider/input range. +_RETENTION_DAYS_MAX = 3650 + _LABEL_DEFS_SETTING_KEY = "label_definitions" # Keys must be safe for YAML round-tripping and stable as DataFrame columns: # letters, digits, underscore, leading with a letter. @@ -101,10 +112,11 @@ def get_config( def save_config( body: ConfigIn, svc: Annotated[AppSettingsService, Depends(get_app_settings_service)], + email: Annotated[str, Depends(get_user_email)], ) -> ConfigOut: """Save workspace config to application state (admin only).""" try: - svc.save_config(body.config) + svc.save_config(body.config, user_email=email) _notify_scheduler() config = svc.get_config() return ConfigOut(config=config) @@ -140,6 +152,7 @@ def get_run_config( def save_run_config( body: RunConfigIn, svc: Annotated[AppSettingsService, Depends(get_app_settings_service)], + email: Annotated[str, Depends(get_user_email)], ) -> RunConfigOut: """Save a run config — creates or updates by name (admin only).""" config = svc.get_config() @@ -153,7 +166,7 @@ def save_run_config( if not updated: config.run_configs.append(body.config) - svc.save_config(config) + svc.save_config(config, user_email=email) _notify_scheduler() return RunConfigOut(config=body.config) @@ -167,6 +180,7 @@ def save_run_config( def delete_run_config( name: str, svc: Annotated[AppSettingsService, Depends(get_app_settings_service)], + email: Annotated[str, Depends(get_user_email)], ) -> ConfigOut: """Delete a run config by name (admin only).""" config = svc.get_config() @@ -176,7 +190,7 @@ def delete_run_config( if len(config.run_configs) == original_count: raise HTTPException(status_code=404, detail=f"Run config '{name}' not found") - svc.save_config(config) + svc.save_config(config, user_email=email) _notify_scheduler() return ConfigOut(config=config) @@ -203,12 +217,119 @@ def get_timezone( def save_timezone( body: TimezoneIn, svc: Annotated[AppSettingsService, Depends(get_app_settings_service)], + email: Annotated[str, Depends(get_user_email)], ) -> TimezoneOut: """Set the display timezone (admin only).""" - svc.save_setting(_TZ_SETTING_KEY, body.timezone) + svc.save_setting(_TZ_SETTING_KEY, body.timezone, user_email=email) return TimezoneOut(timezone=body.timezone) +# --------------------------------------------------------------------------- +# Retention — global vs. quarantine-specific DELETE windows surfaced for the +# admin UI. The scheduler reads the same keys directly from +# ``dq_app_settings`` (see ``SchedulerService._resolve_retention_days`` / +# ``_resolve_quarantine_retention_days``); these endpoints are the +# read/write surface and the only place we centralise validation. +# --------------------------------------------------------------------------- + + +class RetentionSettingsOut(BaseModel): + """Effective retention settings + the defaults the scheduler falls back to. + + ``retention_days`` / ``quarantine_retention_days`` reflect the + *current effective values* — the persisted setting if one exists, + otherwise the compiled-in default. The ``*_default`` and ``*_min`` + fields let the UI render hints and validation without duplicating + the constants on the frontend. + """ + + retention_days: int + quarantine_retention_days: int + retention_days_default: int = _RETENTION_DAYS_DEFAULT + quarantine_retention_days_default: int = _QUARANTINE_RETENTION_DAYS_DEFAULT + retention_days_min: int = _RETENTION_DAYS_MIN + retention_days_max: int = _RETENTION_DAYS_MAX + retention_days_set: bool + quarantine_retention_days_set: bool + + +class RetentionSettingsIn(BaseModel): + """Update payload — either field omitted means *leave unchanged*.""" + + retention_days: int | None = None + quarantine_retention_days: int | None = None + + +def _validate_retention_days(value: int, *, field: str) -> int: + if value < _RETENTION_DAYS_MIN: + raise HTTPException( + status_code=400, + detail=(f"{field} must be at least {_RETENTION_DAYS_MIN} days " "to protect against accidental data loss."), + ) + if value > _RETENTION_DAYS_MAX: + raise HTTPException( + status_code=400, + detail=f"{field} must be at most {_RETENTION_DAYS_MAX} days.", + ) + return value + + +@router.get( + "/retention", + response_model=RetentionSettingsOut, + operation_id="getRetentionSettings", + dependencies=[require_role(UserRole.ADMIN)], +) +def get_retention_settings( + svc: Annotated[AppSettingsService, Depends(get_app_settings_service)], +) -> RetentionSettingsOut: + """Return the current retention windows + defaults (admin only).""" + rd = svc.get_retention_days() + qd = svc.get_quarantine_retention_days() + return RetentionSettingsOut( + retention_days=rd if rd is not None else _RETENTION_DAYS_DEFAULT, + quarantine_retention_days=qd if qd is not None else _QUARANTINE_RETENTION_DAYS_DEFAULT, + retention_days_set=rd is not None, + quarantine_retention_days_set=qd is not None, + ) + + +@router.put( + "/retention", + response_model=RetentionSettingsOut, + operation_id="saveRetentionSettings", + dependencies=[require_role(UserRole.ADMIN)], +) +def save_retention_settings( + body: RetentionSettingsIn, + svc: Annotated[AppSettingsService, Depends(get_app_settings_service)], + email: Annotated[str, Depends(get_user_email)], +) -> RetentionSettingsOut: + """Update one or both retention windows (admin only). + + Either field may be omitted to leave the existing value unchanged. + Both values are validated against the safety floor and ceiling + before being persisted. + """ + if body.retention_days is None and body.quarantine_retention_days is None: + raise HTTPException( + status_code=400, + detail="At least one of retention_days or quarantine_retention_days must be provided.", + ) + + if body.retention_days is not None: + validated = _validate_retention_days(body.retention_days, field="retention_days") + svc.save_retention_days(validated, user_email=email) + logger.info("Saved global retention_days=%d", validated) + + if body.quarantine_retention_days is not None: + validated_q = _validate_retention_days(body.quarantine_retention_days, field="quarantine_retention_days") + svc.save_quarantine_retention_days(validated_q, user_email=email) + logger.info("Saved quarantine_retention_days=%d", validated_q) + + return get_retention_settings(svc) + + # --------------------------------------------------------------------------- # Label definitions — admin-managed catalog of label keys + allowed values. # Powers the constrained-mode label picker on rule authoring pages, and @@ -265,6 +386,7 @@ def get_label_definitions( def save_label_definitions( body: LabelDefinitionsIn, svc: Annotated[AppSettingsService, Depends(get_app_settings_service)], + email: Annotated[str, Depends(get_user_email)], ) -> LabelDefinitionsOut: """Replace the full set of label definitions (admin only). @@ -306,7 +428,7 @@ def save_label_definitions( ) ) - svc.save_setting(_LABEL_DEFS_SETTING_KEY, json.dumps([d.model_dump() for d in cleaned])) + svc.save_setting(_LABEL_DEFS_SETTING_KEY, json.dumps([d.model_dump() for d in cleaned]), user_email=email) logger.info("Saved %d label definition(s)", len(cleaned)) return LabelDefinitionsOut(definitions=cleaned) @@ -386,6 +508,7 @@ def get_custom_metrics( def save_custom_metrics( body: CustomMetricsIn, svc: Annotated[AppSettingsService, Depends(get_app_settings_service)], + email: Annotated[str, Depends(get_user_email)], ) -> CustomMetricsOut: """Replace the global custom-metrics list (admin only). @@ -400,6 +523,6 @@ def save_custom_metrics( continue seen.add(expr) cleaned.append(expr) - saved = svc.save_custom_metrics(cleaned) + saved = svc.save_custom_metrics(cleaned, user_email=email) logger.info("Saved %d custom metric expression(s)", len(saved)) return CustomMetricsOut(metrics=saved) diff --git a/app/src/databricks_labs_dqx_app/backend/routes/v1/dryrun.py b/app/src/databricks_labs_dqx_app/backend/routes/v1/dryrun.py index ca47fb3fb..f98ce3856 100644 --- a/app/src/databricks_labs_dqx_app/backend/routes/v1/dryrun.py +++ b/app/src/databricks_labs_dqx_app/backend/routes/v1/dryrun.py @@ -109,6 +109,10 @@ async def list_validation_runs( total_rows=int(v) if (v := row.get("total_rows")) else None, valid_rows=int(v) if (v := row.get("valid_rows")) else None, invalid_rows=int(v) if (v := row.get("invalid_rows")) else None, + # ``is not None`` (vs the truthiness pattern above) so the UI + # can distinguish "0 errors / warnings" from pre-migration NULLs. + error_rows=int(v) if (v := row.get("error_rows")) is not None else None, + warning_rows=int(v) if (v := row.get("warning_rows")) is not None else None, created_at=row.get("created_at"), run_type=row.get("run_type"), error_message=row.get("error_message"), @@ -511,6 +515,8 @@ def get_dry_run_results( total_rows=int(v) if (v := row.get("total_rows")) else None, valid_rows=int(v) if (v := row.get("valid_rows")) else None, invalid_rows=int(v) if (v := row.get("invalid_rows")) else None, + error_rows=int(v) if (v := row.get("error_rows")) is not None else None, + warning_rows=int(v) if (v := row.get("warning_rows")) is not None else None, error_summary=json.loads(error_summary_json), sample_invalid=json.loads(sample_invalid_json), ) diff --git a/app/src/databricks_labs_dqx_app/backend/routes/v1/quarantine.py b/app/src/databricks_labs_dqx_app/backend/routes/v1/quarantine.py index eb657cd3b..ca0aab44c 100644 --- a/app/src/databricks_labs_dqx_app/backend/routes/v1/quarantine.py +++ b/app/src/databricks_labs_dqx_app/backend/routes/v1/quarantine.py @@ -36,9 +36,14 @@ def _query_quarantine( count_rows = sql.query(count_sql) total_count = int(count_rows[0][0] or 0) if count_rows and count_rows[0] else 0 + # row_data / errors / warnings are VARIANT — render as JSON strings for + # the existing _row_to_record parser. created_at is TIMESTAMP — cast to + # STRING so query_dicts returns an ISO-formatted value. data_sql = ( f"SELECT quarantine_id, run_id, source_table_fqn, requesting_user, " - f"row_data, errors, created_at " + f"to_json(row_data) AS row_data, to_json(errors) AS errors, " + f"to_json(warnings) AS warnings, " + f"CAST(created_at AS STRING) AS created_at " f"FROM {table} WHERE run_id = '{er}' " f"ORDER BY created_at DESC LIMIT {limit} OFFSET {offset}" ) @@ -61,6 +66,18 @@ def _row_to_record(row: dict[str, Any]) -> QuarantineRecordOut: except (json.JSONDecodeError, TypeError): errors = [row["errors"]] + # ``warnings`` is missing on rows written before migration v4; the + # column is ``null`` for SQL-check quarantines. + warnings: list[Any] | None = None + raw_warnings = row.get("warnings") + if raw_warnings and raw_warnings != "null": + try: + parsed = json.loads(raw_warnings) + if parsed is not None: + warnings = parsed if isinstance(parsed, list) else [parsed] + except (json.JSONDecodeError, TypeError): + warnings = [raw_warnings] + return QuarantineRecordOut( quarantine_id=row.get("quarantine_id", ""), run_id=row.get("run_id", ""), @@ -68,6 +85,7 @@ def _row_to_record(row: dict[str, Any]) -> QuarantineRecordOut: requesting_user=row.get("requesting_user"), row_data=row_data, errors=errors, + warnings=warnings, created_at=row.get("created_at"), ) @@ -132,7 +150,9 @@ def export_quarantine_records( stmt = ( f"SELECT quarantine_id, run_id, source_table_fqn, requesting_user, " - f"row_data, errors, created_at " + f"to_json(row_data) AS row_data, to_json(errors) AS errors, " + f"to_json(warnings) AS warnings, " + f"CAST(created_at AS STRING) AS created_at " f"FROM {table} WHERE run_id = '{er}' ORDER BY created_at DESC " # noqa: S608 f"LIMIT {int(max_rows)}" ) @@ -164,15 +184,18 @@ def export_quarantine_records( except (json.JSONDecodeError, TypeError): pass data_keys = sorted(all_data_keys) - headers = ["quarantine_id", "run_id", "source_table_fqn", "errors"] + data_keys + ["created_at"] + headers = ["quarantine_id", "run_id", "source_table_fqn", "errors", "warnings"] + data_keys + ["created_at"] ws.append(headers) for r in rows: + raw_warn = r.get("warnings") + warn_str = "" if not raw_warn or raw_warn == "null" else raw_warn flat: dict[str, str] = { "quarantine_id": r.get("quarantine_id", "") or "", "run_id": r.get("run_id", "") or "", "source_table_fqn": r.get("source_table_fqn", "") or "", "errors": r.get("errors", "") or "", + "warnings": warn_str or "", "created_at": r.get("created_at", "") or "", } raw_rd = r.get("row_data") @@ -203,15 +226,18 @@ def export_quarantine_records( except (json.JSONDecodeError, TypeError): pass csv_dk = sorted(csv_data_keys) - fieldnames = ["quarantine_id", "run_id", "source_table_fqn", "errors"] + csv_dk + ["created_at"] + fieldnames = ["quarantine_id", "run_id", "source_table_fqn", "errors", "warnings"] + csv_dk + ["created_at"] writer = csv.DictWriter(buf, fieldnames=fieldnames, extrasaction="ignore") writer.writeheader() for r in rows: + raw_warn_csv = r.get("warnings") + warn_str_csv = "" if not raw_warn_csv or raw_warn_csv == "null" else raw_warn_csv csv_flat: dict[str, str] = { "quarantine_id": r.get("quarantine_id", "") or "", "run_id": r.get("run_id", "") or "", "source_table_fqn": r.get("source_table_fqn", "") or "", "errors": r.get("errors", "") or "", + "warnings": warn_str_csv or "", "created_at": r.get("created_at", "") or "", } raw_rd = r.get("row_data") diff --git a/app/src/databricks_labs_dqx_app/backend/run_status_manager.py b/app/src/databricks_labs_dqx_app/backend/run_status_manager.py index 27dd85e95..23135e68f 100644 --- a/app/src/databricks_labs_dqx_app/backend/run_status_manager.py +++ b/app/src/databricks_labs_dqx_app/backend/run_status_manager.py @@ -34,7 +34,9 @@ def update_run_status( es = escape_sql_string(status) em = escape_sql_string(error_message or "") - set_clause = f"status = '{es}', error_message = '{em}', updated_at = CAST(current_timestamp() AS STRING)" + # updated_at is TIMESTAMP in the baseline; pass current_timestamp() + # directly rather than casting to STRING. + set_clause = f"status = '{es}', error_message = '{em}', updated_at = current_timestamp()" if canceled_by: ec = escape_sql_string(canceled_by) set_clause += f", canceled_by = '{ec}'" diff --git a/app/src/databricks_labs_dqx_app/backend/services/app_settings_service.py b/app/src/databricks_labs_dqx_app/backend/services/app_settings_service.py index 641d1f738..bc1869813 100644 --- a/app/src/databricks_labs_dqx_app/backend/services/app_settings_service.py +++ b/app/src/databricks_labs_dqx_app/backend/services/app_settings_service.py @@ -4,7 +4,7 @@ from databricks.labs.blueprint.installation import Installation from databricks.labs.dqx.config import WorkspaceConfig -from databricks_labs_dqx_app.backend.sql_executor import SqlExecutor +from databricks_labs_dqx_app.backend.sql_executor import RawSql, SqlExecutor logger = logging.getLogger(__name__) @@ -12,33 +12,37 @@ class AppSettingsService: - """Manages app configuration in a Delta table using app (SP) credentials. + """Manages app configuration backed by ``dq_app_settings``. - Config is stored as a JSON blob in the dq_app_settings table keyed by a - well-known key. All operations use the app's service principal, not the - calling user's OBO token. + Config is stored as a JSON blob keyed by a well-known key. All + operations use the app's service principal, not the calling + user's OBO token. + + The ``dq_app_settings`` table is one of the OLTP tables that lives + in Lakebase Postgres when ``conf.lakebase_enabled`` is true and in + Delta otherwise. The injected executor decides which. """ def __init__(self, sql: SqlExecutor) -> None: self._sql = sql - self._table = f"{sql.catalog}.{sql.schema}.dq_app_settings" + if getattr(sql, "dialect", "delta") == "postgres": + self._table = f"{sql.schema}.dq_app_settings" + else: + self._table = f"{sql.catalog}.{sql.schema}.dq_app_settings" # ------------------------------------------------------------------ # Public API # ------------------------------------------------------------------ def ensure_table(self) -> None: - """Create the settings table if it doesn't exist.""" - sql = ( - f"CREATE TABLE IF NOT EXISTS {self._table} (" - " setting_key STRING NOT NULL," - " setting_value STRING," - " updated_at TIMESTAMP," - " updated_by STRING" - ")" - ) - self._sql.execute(sql) - logger.info(f"Ensured settings table exists: {self._table}") + """No-op kept for backwards compatibility. + + The migration runner now owns table creation for both backends + (see :mod:`backend.migrations` and + :mod:`backend.migrations.postgres`); calling code that still + invokes this method gets a quiet ``DEBUG`` log and we move on. + """ + logger.debug("AppSettingsService.ensure_table() is a no-op; migrations handle DDL") def get_config(self) -> WorkspaceConfig: """Load the workspace config from the settings table.""" @@ -55,25 +59,10 @@ def get_config(self) -> WorkspaceConfig: return WorkspaceConfig(run_configs=[]) return result - def save_config(self, config: WorkspaceConfig) -> WorkspaceConfig: + def save_config(self, config: WorkspaceConfig, user_email: str | None = None) -> WorkspaceConfig: """Save the workspace config to the settings table.""" - from databricks_labs_dqx_app.backend.sql_utils import escape_sql_string - config_dict = config.as_dict() - value = json.dumps(config_dict) - escaped = escape_sql_string(value) - - sql = ( - f"MERGE INTO {self._table} AS target " - f"USING (SELECT '{_CONFIG_KEY}' AS setting_key) AS source " - "ON target.setting_key = source.setting_key " - "WHEN MATCHED THEN UPDATE SET " - f" setting_value = '{escaped}', " - " updated_at = current_timestamp() " - "WHEN NOT MATCHED THEN INSERT (setting_key, setting_value, updated_at) " - f"VALUES ('{_CONFIG_KEY}', '{escaped}', current_timestamp())" - ) - self._sql.execute(sql) + self.save_setting(_CONFIG_KEY, json.dumps(config_dict), user_email=user_email) logger.info("Saved workspace config to settings table") return config @@ -86,24 +75,18 @@ def get_setting(self, key: str) -> str | None: rows = self._sql.query(sql) return rows[0][0] if rows else None - def save_setting(self, key: str, value: str) -> None: - """Upsert a single setting value.""" - from databricks_labs_dqx_app.backend.sql_utils import escape_sql_string - - escaped_key = escape_sql_string(key) - escaped_val = escape_sql_string(value) - sql = ( - f"MERGE INTO {self._table} AS target " - f"USING (SELECT '{escaped_key}' AS setting_key) AS source " - "ON target.setting_key = source.setting_key " - "WHEN MATCHED THEN UPDATE SET " - f" setting_value = '{escaped_val}', " - " updated_at = current_timestamp() " - "WHEN NOT MATCHED THEN INSERT (setting_key, setting_value, updated_at) " - f"VALUES ('{escaped_key}', '{escaped_val}', current_timestamp())" + def save_setting(self, key: str, value: str, *, user_email: str | None = None) -> None: + """Upsert a single setting value, recording who wrote it.""" + self._sql.upsert( + self._table, + key_cols={"setting_key": key}, + value_cols={ + "setting_value": value, + "updated_at": RawSql("current_timestamp()"), + "updated_by": user_email, + }, ) - self._sql.execute(sql) - logger.info("Saved setting: %s", key) + logger.info("Saved setting: %s (by=%s)", key, user_email or "system") # ------------------------------------------------------------------ # Custom metrics — global SQL-expression list passed to DQMetricsObserver. @@ -127,8 +110,59 @@ def get_custom_metrics(self) -> list[str]: return [] return [s for s in parsed if isinstance(s, str) and s.strip()] - def save_custom_metrics(self, expressions: list[str]) -> list[str]: + def save_custom_metrics(self, expressions: list[str], *, user_email: str | None = None) -> list[str]: """Persist the global custom-metric list. Returns the cleaned list.""" cleaned = [s.strip() for s in expressions if isinstance(s, str) and s.strip()] - self.save_setting("custom_metrics_v1", json.dumps(cleaned)) + self.save_setting("custom_metrics_v1", json.dumps(cleaned), user_email=user_email) return cleaned + + # ------------------------------------------------------------------ + # Retention — daily DELETE sweep window for analytical tables. + # Two knobs: + # * ``retention_days`` — applied to dq_validation_runs, + # dq_profiling_results, dq_metrics + # and the OLTP history tables + # (default 90). + # * ``quarantine_retention_days`` — applied only to + # dq_quarantine_records, which + # stores the full source row + # payload (PII surface). Default + # 30 so row-level data ages out + # faster than trend tables. + # Both keys store a plain integer string. The scheduler reads them + # via ``SchedulerService._resolve_setting_days`` which floors at 7 + # days so a misconfiguration cannot wipe data inside the safety + # window. Returning ``None`` means the setting is unset and the + # consumer should fall back to its compiled-in default. + # ------------------------------------------------------------------ + + _RETENTION_KEY = "retention_days" + _QUARANTINE_RETENTION_KEY = "quarantine_retention_days" + + def get_retention_days(self) -> int | None: + """Return the configured global retention window, or ``None`` if unset.""" + return self._get_int_setting(self._RETENTION_KEY) + + def get_quarantine_retention_days(self) -> int | None: + """Return the configured quarantine retention window, or ``None`` if unset.""" + return self._get_int_setting(self._QUARANTINE_RETENTION_KEY) + + def save_retention_days(self, days: int, *, user_email: str | None = None) -> int: + """Persist the global retention window. Returns the saved value.""" + self.save_setting(self._RETENTION_KEY, str(int(days)), user_email=user_email) + return int(days) + + def save_quarantine_retention_days(self, days: int, *, user_email: str | None = None) -> int: + """Persist the quarantine retention window. Returns the saved value.""" + self.save_setting(self._QUARANTINE_RETENTION_KEY, str(int(days)), user_email=user_email) + return int(days) + + def _get_int_setting(self, key: str) -> int | None: + raw = self.get_setting(key) + if raw is None or raw == "": + return None + try: + return int(raw) + except (TypeError, ValueError): + logger.warning("Setting %s is not parseable as int (%r); ignoring", key, raw) + return None diff --git a/app/src/databricks_labs_dqx_app/backend/services/comments_service.py b/app/src/databricks_labs_dqx_app/backend/services/comments_service.py index 37f872523..7ae2348ba 100644 --- a/app/src/databricks_labs_dqx_app/backend/services/comments_service.py +++ b/app/src/databricks_labs_dqx_app/backend/services/comments_service.py @@ -32,7 +32,10 @@ class CommentsService: def __init__(self, sql: SqlExecutor) -> None: self._sql = sql - self._table = f"{sql.catalog}.{sql.schema}.dq_comments" + if getattr(sql, "dialect", "delta") == "postgres": + self._table = f"{sql.schema}.dq_comments" + else: + self._table = f"{sql.catalog}.{sql.schema}.dq_comments" def add_comment(self, entity_type: str, entity_id: str, user_email: str, comment: str) -> Comment: from databricks_labs_dqx_app.backend.sql_utils import escape_sql_string, validate_entity_type @@ -40,7 +43,6 @@ def add_comment(self, entity_type: str, entity_id: str, user_email: str, comment validate_entity_type(entity_type, self.VALID_ENTITY_TYPES) comment_id = uuid4().hex[:16] - now = datetime.now(timezone.utc).isoformat() e_comment = escape_sql_string(comment) e_email = escape_sql_string(user_email) e_entity_id = escape_sql_string(entity_id) @@ -49,7 +51,7 @@ def add_comment(self, entity_type: str, entity_id: str, user_email: str, comment sql = ( f"INSERT INTO {self._table} (comment_id, entity_type, entity_id, user_email, comment, created_at) " f"VALUES ('{comment_id}', '{e_type}', '{e_entity_id}', " - f"'{e_email}', '{e_comment}', '{now}')" + f"'{e_email}', '{e_comment}', now())" ) self._sql.execute(sql) logger.info("Added comment %s on %s/%s by %s", comment_id, entity_type, entity_id, user_email) @@ -60,7 +62,7 @@ def add_comment(self, entity_type: str, entity_id: str, user_email: str, comment entity_id=entity_id, user_email=user_email, comment=comment, - created_at=now, + created_at=datetime.now(timezone.utc).isoformat(), ) def list_comments(self, entity_type: str, entity_id: str) -> list[Comment]: @@ -71,7 +73,7 @@ def list_comments(self, entity_type: str, entity_id: str) -> list[Comment]: e_entity_id = escape_sql_string(entity_id) sql = ( f"SELECT comment_id, entity_type, entity_id, user_email, comment, " - f"CAST(created_at AS STRING) " + f"{self._sql.ts_text('created_at')} " f"FROM {self._table} " f"WHERE entity_type = '{e_type}' AND entity_id = '{e_entity_id}' " f"ORDER BY created_at ASC LIMIT 200" diff --git a/app/src/databricks_labs_dqx_app/backend/services/job_service.py b/app/src/databricks_labs_dqx_app/backend/services/job_service.py index 120c0b406..c2d15779f 100644 --- a/app/src/databricks_labs_dqx_app/backend/services/job_service.py +++ b/app/src/databricks_labs_dqx_app/backend/services/job_service.py @@ -129,18 +129,22 @@ def _record_running_placeholder( run_type: str | None = None, job_run_id: int | None = None, ) -> None: - """Insert a RUNNING placeholder row. Non-fatal on failure.""" - from datetime import datetime, timezone + """Insert a RUNNING placeholder row. Non-fatal on failure. + + ``created_at`` is now TIMESTAMP in the schema; we use + ``current_timestamp()`` rather than an ISO-string literal so the + warehouse stamps the value with its own clock and zone-mapping + works correctly on the cluster key. + """ from databricks_labs_dqx_app.backend.sql_utils import escape_sql_string - now = datetime.now(timezone.utc).isoformat() er = escape_sql_string(run_id) eu = escape_sql_string(requesting_user) ef = escape_sql_string(source_table_fqn) ev = escape_sql_string(view_fqn) cols = f"run_id, requesting_user, source_table_fqn, view_fqn, {size_column}, status, created_at" - vals = f"'{er}', '{eu}', '{ef}', '{ev}', {int(size_value)}, 'RUNNING', '{now}'" + vals = f"'{er}', '{eu}', '{ef}', '{ev}', {int(size_value)}, 'RUNNING', current_timestamp()" if run_type: ert = escape_sql_string(run_type) cols += ", run_type" @@ -201,16 +205,23 @@ def record_dryrun_started( job_run_id=job_run_id, ) + # ``updated_at`` and ``created_at`` are TIMESTAMP — cast to STRING so + # the existing query_dicts → JSON serialization keeps producing ISO + # values for the frontend without further plumbing. _PROFILE_COLS = ( "run_id, requesting_user, source_table_fqn, view_fqn, sample_limit, " "rows_profiled, columns_profiled, duration_seconds, summary_json, " - "generated_rules_json, status, error_message, canceled_by, updated_at, created_at" + "generated_rules_json, status, error_message, canceled_by, " + "CAST(updated_at AS STRING) AS updated_at, " + "CAST(created_at AS STRING) AS created_at" ) _DRYRUN_COLS = ( "run_id, requesting_user, source_table_fqn, sample_size, " - "total_rows, valid_rows, invalid_rows, " - "status, error_message, canceled_by, updated_at, created_at, " + "total_rows, valid_rows, invalid_rows, error_rows, warning_rows, " + "status, error_message, canceled_by, " + "CAST(updated_at AS STRING) AS updated_at, " + "CAST(created_at AS STRING) AS created_at, " "COALESCE(run_type, 'dryrun') AS run_type, " "checks_json" ) diff --git a/app/src/databricks_labs_dqx_app/backend/services/role_service.py b/app/src/databricks_labs_dqx_app/backend/services/role_service.py index bc25eca91..6776bc3ee 100644 --- a/app/src/databricks_labs_dqx_app/backend/services/role_service.py +++ b/app/src/databricks_labs_dqx_app/backend/services/role_service.py @@ -31,15 +31,20 @@ class RoleMapping: class RoleService: - """Manages role-to-group mappings in a Delta table. + """Manages role-to-group mappings in ``dq_role_mappings``. - All operations use the app's service principal, not the calling user's - OBO token. + All operations use the app's service principal, not the calling + user's OBO token. The table lives on Lakebase Postgres when + Lakebase is enabled and on Delta otherwise — the injected + executor decides which. """ def __init__(self, sql: SqlExecutor) -> None: self._sql = sql - self._table = f"{sql.catalog}.{sql.schema}.dq_role_mappings" + if getattr(sql, "dialect", "delta") == "postgres": + self._table = f"{sql.schema}.dq_role_mappings" + else: + self._table = f"{sql.catalog}.{sql.schema}.dq_role_mappings" self._mappings_cache: list[RoleMapping] | None = None self._mappings_cache_expires: float = 0.0 @@ -50,7 +55,7 @@ def list_mappings(self, *, use_cache: bool = False) -> list[RoleMapping]: sql = ( f"SELECT role, group_name, created_by, " - f"CAST(created_at AS STRING), updated_by, CAST(updated_at AS STRING) " + f"{self._sql.ts_text('created_at')}, updated_by, {self._sql.ts_text('updated_at')} " f"FROM {self._table} ORDER BY role, group_name" ) rows = self._sql.query(sql) @@ -79,7 +84,7 @@ def get_mappings_for_role(self, role: str) -> list[RoleMapping]: escaped_role = escape_sql_string(role) sql = ( f"SELECT role, group_name, created_by, " - f"CAST(created_at AS STRING), updated_by, CAST(updated_at AS STRING) " + f"{self._sql.ts_text('created_at')}, updated_by, {self._sql.ts_text('updated_at')} " f"FROM {self._table} WHERE role = '{escaped_role}' ORDER BY group_name" ) rows = self._sql.query(sql) @@ -96,7 +101,13 @@ def get_mappings_for_role(self, role: str) -> list[RoleMapping]: ] def create_mapping(self, role: str, group_name: str, user_email: str) -> RoleMapping: - """Create or update a role-to-group mapping.""" + """Create or update a role-to-group mapping. + + On UPDATE we only refresh ``updated_*`` so the original + ``created_*`` survive — this is why we hand-write the upsert + statement rather than calling :meth:`SqlExecutor.upsert`, + which would clobber every column on the matched branch. + """ if role not in [r.value for r in UserRole]: raise ValueError(f"Invalid role: {role}. Must be one of {[r.value for r in UserRole]}") @@ -104,17 +115,28 @@ def create_mapping(self, role: str, group_name: str, user_email: str) -> RoleMap escaped_group = escape_sql_string(group_name) escaped_user = escape_sql_string(user_email) - sql = ( - f"MERGE INTO {self._table} AS target " - f"USING (SELECT '{escaped_role}' AS role, '{escaped_group}' AS group_name) AS source " - "ON target.role = source.role AND target.group_name = source.group_name " - "WHEN MATCHED THEN UPDATE SET " - f" updated_by = '{escaped_user}', " - " updated_at = current_timestamp() " - "WHEN NOT MATCHED THEN INSERT (role, group_name, created_by, created_at, updated_by, updated_at) " - f"VALUES ('{escaped_role}', '{escaped_group}', '{escaped_user}', current_timestamp(), " - f"'{escaped_user}', current_timestamp())" - ) + if getattr(self._sql, "dialect", "delta") == "postgres": + sql = ( + f"INSERT INTO {self._table} " + "(role, group_name, created_by, created_at, updated_by, updated_at) " + f"VALUES ('{escaped_role}', '{escaped_group}', '{escaped_user}', now(), " + f"'{escaped_user}', now()) " + "ON CONFLICT (role, group_name) DO UPDATE SET " + f" updated_by = '{escaped_user}', " + " updated_at = now()" + ) + else: + sql = ( + f"MERGE INTO {self._table} AS target " + f"USING (SELECT '{escaped_role}' AS role, '{escaped_group}' AS group_name) AS source " + "ON target.role = source.role AND target.group_name = source.group_name " + "WHEN MATCHED THEN UPDATE SET " + f" updated_by = '{escaped_user}', " + " updated_at = now() " + "WHEN NOT MATCHED THEN INSERT (role, group_name, created_by, created_at, updated_by, updated_at) " + f"VALUES ('{escaped_role}', '{escaped_group}', '{escaped_user}', now(), " + f"'{escaped_user}', now())" + ) self._sql.execute(sql) self.invalidate_mappings_cache() logger.info(f"Created/updated role mapping: {role} -> {group_name}") diff --git a/app/src/databricks_labs_dqx_app/backend/services/rules_catalog_service.py b/app/src/databricks_labs_dqx_app/backend/services/rules_catalog_service.py index 5dd521cbc..a55f3d49e 100644 --- a/app/src/databricks_labs_dqx_app/backend/services/rules_catalog_service.py +++ b/app/src/databricks_labs_dqx_app/backend/services/rules_catalog_service.py @@ -13,7 +13,13 @@ class RuleCatalogEntry: - """Represents a single rule (individual check) in the rules catalog.""" + """Represents a single rule (one check) in the rules catalog. + + Note: ``checks`` is preserved as a one-element list for API/DTO + compatibility with existing routes and frontend types. The underlying + Delta column ``check`` is a VARIANT object holding the bare check — + the array wrapper is purely an in-memory representation. + """ def __init__( self, @@ -44,8 +50,9 @@ class RulesCatalogService: """Manages the rules catalog in a Delta table using app (SP) credentials. Each row represents a single rule (one check) identified by ``rule_id``. - Multiple rules can target the same ``table_fqn``. For execution the caller - aggregates all approved rules for a table into a single checks array. + The persisted ``check`` column is a VARIANT holding the bare check object + (no array wrapper); ``RuleCatalogEntry.checks`` exposes a one-element list + so existing callers don't have to change. """ VALID_STATUSES = {"draft", "pending_approval", "approved", "rejected"} @@ -57,16 +64,57 @@ class RulesCatalogService: "rejected": {"draft"}, } - _SELECT_COLS = ( - "table_fqn, checks, version, status, created_by, " - "CAST(created_at AS STRING), updated_by, CAST(updated_at AS STRING), " - "COALESCE(source, 'ui'), COALESCE(rule_id, '')" - ) - def __init__(self, sql: SqlExecutor) -> None: self._sql = sql - self._table = f"{sql.catalog}.{sql.schema}.dq_quality_rules" - self._history_table = f"{sql.catalog}.{sql.schema}.dq_quality_rules_history" + self._table = self._qualify(sql, "dq_quality_rules") + self._history_table = self._qualify(sql, "dq_quality_rules_history") + # ``check`` is a SQL reserved word in both Delta and Postgres, + # so quote it via the executor so we get backticks on Delta and + # double-quotes on Postgres. + self._check_col = sql.q("check") + self._select_cols = self._build_select_cols() + + @staticmethod + def _qualify(sql: SqlExecutor, table: str) -> str: + """Return the fully-qualified table path for either backend. + + Delta: ``catalog.schema.table``. Postgres: ``schema.table`` + (Postgres only has a single database per connection so we drop + the catalog component there). The Postgres executor exposes + the database via :attr:`PgExecutor.database` if a future + cross-database join is ever needed. + """ + if getattr(sql, "dialect", "delta") == "postgres": + return f"{sql.schema}.{table}" + return f"{sql.catalog}.{sql.schema}.{table}" + + def _build_select_cols(self) -> str: + """Build the column projection used by every SELECT. + + The projection deliberately differs per dialect: + + - **Delta** wraps ``check`` in ``to_json`` so the JSON_ARRAY + response format returns a string we can ``json.loads``, and + casts timestamps to STRING for the same reason. + - **Postgres** doesn't need either: :class:`PgExecutor` runs + values through :func:`_to_text` on the way out, which JSON- + stringifies JSONB columns and ISO-formats timestamps for + free. + + The column *order* is identical so :meth:`_row_to_entry` + doesn't have to branch on dialect. + """ + check = self._check_col + if getattr(self._sql, "dialect", "delta") == "postgres": + return ( + f"table_fqn, {check} AS check_json, version, status, created_by, " + f"created_at, updated_by, updated_at, source, rule_id" + ) + return ( + f"table_fqn, to_json({check}) AS check_json, version, status, created_by, " + f"CAST(created_at AS STRING), updated_by, CAST(updated_at AS STRING), " + f"source, rule_id" + ) # ------------------------------------------------------------------ # Public API @@ -126,24 +174,36 @@ def save( if duplicates: logger.info("Skipped %d duplicate check(s) for table %s", len(duplicates), table_fqn) - now = datetime.now(timezone.utc).isoformat() e_table = escape_sql_string(table_fqn) e_source = escape_sql_string(source) e_user = escape_sql_string(user_email) created: list[RuleCatalogEntry] = [] + check_col = self._check_col for check in non_dup_checks: rule_id = uuid4().hex[:16] - single_check_json = json.dumps([check]) - e_checks = escape_sql_string(single_check_json) + check_json = json.dumps(check) + check_expr = self._sql.json_literal_expr(check_json) sql = ( f"INSERT INTO {self._table} " - "(table_fqn, checks, version, status, source, created_by, created_at, updated_by, updated_at, rule_id) " - f"VALUES ('{e_table}', '{e_checks}', 1, 'draft', '{e_source}', " - f"'{e_user}', '{now}', '{e_user}', '{now}', '{rule_id}')" + f"(rule_id, table_fqn, {check_col}, version, status, source, " + f"created_by, created_at, updated_by, updated_at) " + f"VALUES ('{rule_id}', '{e_table}', {check_expr}, 1, 'draft', '{e_source}', " + f"'{e_user}', now(), '{e_user}', now())" ) self._sql.execute(sql) - self._record_history(table_fqn, single_check_json, 1, source, user_email, now, "save", rule_id) + self._record_history( + table_fqn=table_fqn, + check_json=check_json, + version=1, + source=source, + user_email=user_email, + action="save", + rule_id=rule_id, + prev_status=None, + new_status="draft", + ) + now = datetime.now(timezone.utc).isoformat() created.append( RuleCatalogEntry( table_fqn=table_fqn, @@ -173,25 +233,37 @@ def update_rule( entry = self.get_by_rule_id(rule_id) if entry is None: raise RuntimeError(f"Rule not found: {rule_id}") - - checks_json = json.dumps(checks) - now = datetime.now(timezone.utc).isoformat() - e_checks = escape_sql_string(checks_json) + if not checks: + raise ValueError("update_rule requires exactly one check; got none") + + # update_rule is one-rule-at-a-time at the table layer; collapse to a + # single check object for the VARIANT/JSONB column. + check = checks[0] + check_json = json.dumps(check) + check_expr = self._sql.json_literal_expr(check_json) e_user = escape_sql_string(user_email) e_rule_id = escape_sql_string(rule_id) sql = ( f"UPDATE {self._table} SET " - f" checks = '{e_checks}', " - " version = version + 1, " - " status = 'draft', " + f" {self._check_col} = {check_expr}, " + f" version = version + 1, " + f" status = 'draft', " f" updated_by = '{e_user}', " - f" updated_at = '{now}' " + f" updated_at = now() " f"WHERE rule_id = '{e_rule_id}'" ) self._sql.execute(sql) self._record_history( - entry.table_fqn, checks_json, entry.version + 1, entry.source, user_email, now, "update", rule_id + table_fqn=entry.table_fqn, + check_json=check_json, + version=entry.version + 1, + source=entry.source, + user_email=user_email, + action="update", + rule_id=rule_id, + prev_status=entry.status, + new_status="draft", ) logger.info("Updated rule %s (table %s)", rule_id, entry.table_fqn) return self.get_by_rule_id(rule_id) or entry @@ -228,7 +300,7 @@ def get(self, table_fqn: str) -> RuleCatalogEntry | None: def get_by_rule_id(self, rule_id: str) -> RuleCatalogEntry | None: """Get a single rule by its rule_id.""" e_rule_id = escape_sql_string(rule_id) - sql = f"SELECT {self._SELECT_COLS} FROM {self._table} WHERE rule_id = '{e_rule_id}'" # noqa: S608 + sql = f"SELECT {self._select_cols} FROM {self._table} WHERE rule_id = '{e_rule_id}'" # noqa: S608 rows = self._sql.query(sql) if not rows: return None @@ -237,7 +309,7 @@ def get_by_rule_id(self, rule_id: str) -> RuleCatalogEntry | None: def list_rules_for_table(self, table_fqn: str, status: str | None = None) -> list[RuleCatalogEntry]: """List all individual rules for a given table, optionally filtered by status.""" e_table = escape_sql_string(table_fqn) - sql = f"SELECT {self._SELECT_COLS} FROM {self._table} WHERE table_fqn = '{e_table}'" # noqa: S608 + sql = f"SELECT {self._select_cols} FROM {self._table} WHERE table_fqn = '{e_table}'" # noqa: S608 if status: e_status = escape_sql_string(status) sql += f" AND status = '{e_status}'" @@ -247,7 +319,7 @@ def list_rules_for_table(self, table_fqn: str, status: str | None = None) -> lis def list_rules(self, status: str | None = None) -> list[RuleCatalogEntry]: """List all individual rules, optionally filtered by status.""" - sql = f"SELECT {self._SELECT_COLS} FROM {self._table}" + sql = f"SELECT {self._select_cols} FROM {self._table}" if status: e_status = escape_sql_string(status) sql += f" WHERE status = '{e_status}'" @@ -332,11 +404,24 @@ def delete(self, rule_id: str, user_email: str) -> None: e_rule_id = escape_sql_string(rule_id) sql = f"DELETE FROM {self._table} WHERE rule_id = '{e_rule_id}'" self._sql.execute(sql) - now = datetime.now(timezone.utc).isoformat() table_fqn = entry.table_fqn if entry else "unknown" version = entry.version if entry else 0 source = entry.source if entry else "ui" - self._record_history(table_fqn, None, version, source, user_email, now, "delete", rule_id) + # Preserve the post-state ``check`` payload in history so audit + # readers can reconstruct what was deleted without walking back to + # the prior save row. + check_json = json.dumps(entry.checks[0]) if entry and entry.checks else None + self._record_history( + table_fqn=table_fqn, + check_json=check_json, + version=version, + source=source, + user_email=user_email, + action="delete", + rule_id=rule_id, + prev_status=entry.status if entry else None, + new_status=None, + ) logger.info("Deleted rule %s (table %s, by %s)", rule_id, table_fqn, user_email) def delete_by_table(self, table_fqn: str, user_email: str) -> None: @@ -344,8 +429,17 @@ def delete_by_table(self, table_fqn: str, user_email: str) -> None: e_table = escape_sql_string(table_fqn) sql = f"DELETE FROM {self._table} WHERE table_fqn = '{e_table}'" self._sql.execute(sql) - now = datetime.now(timezone.utc).isoformat() - self._record_history(table_fqn, None, 0, "ui", user_email, now, "delete_all") + self._record_history( + table_fqn=table_fqn, + check_json=None, + version=0, + source="ui", + user_email=user_email, + action="delete_all", + rule_id=None, + prev_status=None, + new_status=None, + ) logger.info("Deleted all rules for table %s (by %s)", table_fqn, user_email) def set_status( @@ -378,7 +472,6 @@ def set_status( f"but current is v{entry.version}. Another user may have modified the rule." ) - now = datetime.now(timezone.utc).isoformat() e_status = escape_sql_string(status) e_user = escape_sql_string(user_email) e_rule_id = escape_sql_string(rule_id) @@ -387,12 +480,24 @@ def set_status( f"UPDATE {self._table} SET " f" status = '{e_status}', " f" updated_by = '{e_user}', " - f" updated_at = '{now}' " + f" updated_at = now() " f"WHERE rule_id = '{e_rule_id}' AND version = {entry.version}" ) self._sql.execute(sql) + # Always include the post-state ``check`` payload + explicit + # prev/new status pair so dashboards reconstructing the trail + # don't have to walk back to the prior save row. + check_json = json.dumps(entry.checks[0]) if entry.checks else None self._record_history( - entry.table_fqn, None, entry.version, entry.source, user_email, now, f"status:{status}", rule_id + table_fqn=entry.table_fqn, + check_json=check_json, + version=entry.version, + source=entry.source, + user_email=user_email, + action=f"status:{status}", + rule_id=rule_id, + prev_status=entry.status, + new_status=status, ) logger.info("Updated status for rule %s to %s (by %s)", rule_id, status, user_email) @@ -435,21 +540,14 @@ def get_approved_checks_for_table(self, table_fqn: str) -> list[dict[str, Any]]: def backfill_rule_ids(self) -> int: """Assign a rule_id to every row that currently has NULL or empty rule_id. - Returns the number of rows updated. + Retained as a no-op safety net: in the current baseline ``rule_id`` + is ``NOT NULL`` with a PK constraint, so this should always return 0. + Kept so legacy upgrade paths that ran the pre-baseline migrations + can still call it without an AttributeError. """ count_sql = f"SELECT COUNT(*) FROM {self._table} WHERE rule_id IS NULL OR rule_id = ''" rows = self._sql.query(count_sql) total = int(rows[0][0]) if rows and rows[0] else 0 - if total == 0: - return 0 - - sql = ( - f"UPDATE {self._table} " - "SET rule_id = SUBSTRING(MD5(CONCAT(table_fqn, checks, CAST(RAND() AS STRING))), 1, 16) " - "WHERE rule_id IS NULL OR rule_id = ''" - ) - self._sql.execute(sql) - logger.info("Backfilled rule_id for %d rule(s)", total) return total # ------------------------------------------------------------------ @@ -491,8 +589,27 @@ def _check_no_duplicate_pending_or_approved(self, entry: RuleCatalogEntry) -> No ) def _row_to_entry(self, row: list[str]) -> RuleCatalogEntry: - """Convert a query result row to a RuleCatalogEntry.""" - checks = json.loads(row[1], strict=False) if row[1] else [] + """Convert a query result row to a RuleCatalogEntry. + + Row layout (see :meth:`_build_select_cols`): + ``[table_fqn, check_json, version, status, created_by, created_at, + updated_by, updated_at, source, rule_id]``. + + ``check_json`` is the JSON rendering of the VARIANT/JSONB + column (via Delta's ``to_json`` or PgExecutor's automatic + text coercion). We wrap it in a one-element list so the + in-memory ``checks`` shape keeps existing callers (route DTOs, + scheduler) unchanged. + """ + check_json = row[1] + if check_json: + try: + parsed = json.loads(check_json, strict=False) + except json.JSONDecodeError: + parsed = None + checks = [parsed] if isinstance(parsed, dict) else [] + else: + checks = [] return RuleCatalogEntry( table_fqn=row[0], checks=checks, @@ -508,28 +625,44 @@ def _row_to_entry(self, row: list[str]) -> RuleCatalogEntry: def _record_history( self, + *, table_fqn: str, - checks_json: str | None, + check_json: str | None, version: int, source: str, user_email: str, - timestamp: str, action: str, - rule_id: str | None = None, + rule_id: str | None, + prev_status: str | None, + new_status: str | None, ) -> None: - """Insert an audit row into the history table (best-effort).""" + """Insert an audit row into the history table (best-effort). + + Always carries the post-state ``check`` payload (when one exists) + and an explicit ``prev_status``/``new_status`` pair so callers + querying the audit trail don't have to walk backwards through the + log to reconstruct what changed. + + ``check`` is a SQL reserved word so the column name is quoted + via the executor's :meth:`q` helper (backticks for Delta, + double quotes for Postgres). + """ try: e_table = escape_sql_string(table_fqn) - e_checks = escape_sql_string(checks_json or "") e_source = escape_sql_string(source) e_action = escape_sql_string(action) e_user = escape_sql_string(user_email) - e_rule_id = escape_sql_string(rule_id or "") + rule_id_sql = f"'{escape_sql_string(rule_id)}'" if rule_id else "NULL" + check_sql = self._sql.json_literal_expr(check_json) if check_json else "NULL" + prev_sql = f"'{escape_sql_string(prev_status)}'" if prev_status else "NULL" + new_sql = f"'{escape_sql_string(new_status)}'" if new_status else "NULL" + sql = ( f"INSERT INTO {self._history_table} " - "(table_fqn, checks, version, source, action, changed_by, changed_at, rule_id) VALUES " - f"('{e_table}', '{e_checks}', {version}, '{e_source}', '{e_action}', " - f"'{e_user}', '{timestamp}', '{e_rule_id}')" + f"(rule_id, table_fqn, {self._check_col}, version, source, action, " + f"prev_status, new_status, changed_by, changed_at) VALUES " + f"({rule_id_sql}, '{e_table}', {check_sql}, {version}, '{e_source}', " + f"'{e_action}', {prev_sql}, {new_sql}, '{e_user}', now())" ) self._sql.execute(sql) except Exception: diff --git a/app/src/databricks_labs_dqx_app/backend/services/schedule_config_service.py b/app/src/databricks_labs_dqx_app/backend/services/schedule_config_service.py index 941fde0e0..044bdee24 100644 --- a/app/src/databricks_labs_dqx_app/backend/services/schedule_config_service.py +++ b/app/src/databricks_labs_dqx_app/backend/services/schedule_config_service.py @@ -35,13 +35,18 @@ class ScheduleConfigService: def __init__(self, sql: SqlExecutor) -> None: self._sql = sql - self._table = f"{sql.catalog}.{sql.schema}.dq_schedule_configs" - self._history_table = f"{sql.catalog}.{sql.schema}.dq_schedule_configs_history" + if getattr(sql, "dialect", "delta") == "postgres": + self._table = f"{sql.schema}.dq_schedule_configs" + self._history_table = f"{sql.schema}.dq_schedule_configs_history" + else: + self._table = f"{sql.catalog}.{sql.schema}.dq_schedule_configs" + self._history_table = f"{sql.catalog}.{sql.schema}.dq_schedule_configs_history" def list_schedules(self) -> list[ScheduleConfigEntry]: + ts = self._sql.ts_text sql = ( f"SELECT schedule_name, config_json, version, created_by, " - f"CAST(created_at AS STRING), updated_by, CAST(updated_at AS STRING) " + f"{ts('created_at')}, updated_by, {ts('updated_at')} " f"FROM {self._table} ORDER BY schedule_name" ) rows = self._sql.query(sql) @@ -50,9 +55,10 @@ def list_schedules(self) -> list[ScheduleConfigEntry]: def get(self, name: str) -> ScheduleConfigEntry | None: validate_schedule_name(name) escaped = escape_sql_string(name) + ts = self._sql.ts_text sql = ( f"SELECT schedule_name, config_json, version, created_by, " - f"CAST(created_at AS STRING), updated_by, CAST(updated_at AS STRING) " + f"{ts('created_at')}, updated_by, {ts('updated_at')} " f"FROM {self._table} WHERE schedule_name = '{escaped}'" ) rows = self._sql.query(sql) @@ -66,33 +72,58 @@ def save( config: dict[str, Any], user_email: str, ) -> ScheduleConfigEntry: + """Upsert a schedule config row, incrementing ``version`` on update. + + Uses an explicit MERGE rather than ``SqlExecutor.upsert`` because: + 1. ``version`` increments rather than being clobbered, and + 2. ``created_*`` is preserved on update; only ``updated_*`` changes. + """ validate_schedule_name(name) config_json = json.dumps(config) - now = datetime.now(timezone.utc).isoformat() escaped_name = escape_sql_string(name) escaped_json = escape_sql_string(config_json) escaped_user = escape_sql_string(user_email) - sql = ( - f"MERGE INTO {self._table} AS target " - f"USING (SELECT '{escaped_name}' AS schedule_name) AS source " - "ON target.schedule_name = source.schedule_name " - "WHEN MATCHED THEN UPDATE SET " - f" config_json = '{escaped_json}', " - " version = target.version + 1, " - f" updated_by = '{escaped_user}', " - f" updated_at = '{now}' " - "WHEN NOT MATCHED THEN INSERT " - "(schedule_name, config_json, version, created_by, created_at, updated_by, updated_at) " - f"VALUES ('{escaped_name}', '{escaped_json}', 1, '{escaped_user}', '{now}', '{escaped_user}', '{now}')" - ) + if getattr(self._sql, "dialect", "delta") == "postgres": + # On conflict we reference the existing row via the + # unqualified base table name (Postgres' convention for + # ON CONFLICT DO UPDATE), versus EXCLUDED.* which would + # surface the proposed-row values. + base = self._table.split(".")[-1] + sql = ( + f"INSERT INTO {self._table} " + "(schedule_name, config_json, version, created_by, created_at, updated_by, updated_at) " + f"VALUES ('{escaped_name}', '{escaped_json}', 1, '{escaped_user}', now(), " + f"'{escaped_user}', now()) " + "ON CONFLICT (schedule_name) DO UPDATE SET " + f" config_json = '{escaped_json}', " + f" version = {base}.version + 1, " + f" updated_by = '{escaped_user}', " + " updated_at = now()" + ) + else: + sql = ( + f"MERGE INTO {self._table} AS target " + f"USING (SELECT '{escaped_name}' AS schedule_name) AS source " + "ON target.schedule_name = source.schedule_name " + "WHEN MATCHED THEN UPDATE SET " + f" config_json = '{escaped_json}', " + " version = target.version + 1, " + f" updated_by = '{escaped_user}', " + " updated_at = now() " + "WHEN NOT MATCHED THEN INSERT " + "(schedule_name, config_json, version, created_by, created_at, updated_by, updated_at) " + f"VALUES ('{escaped_name}', '{escaped_json}', 1, '{escaped_user}', now(), " + f"'{escaped_user}', now())" + ) self._sql.execute(sql) self._record_history(name, config_json, user_email, "save") logger.info("Saved schedule config: %s (user=%s)", name, user_email) entry = self.get(name) if entry is None: + now = datetime.now(timezone.utc).isoformat() return ScheduleConfigEntry( schedule_name=name, config=config, @@ -125,7 +156,7 @@ def get_history(self, name: str) -> list[dict[str, Any]]: escaped = escape_sql_string(name) sql = ( f"SELECT schedule_name, config_json, version, action, changed_by, " - f"CAST(changed_at AS STRING) " + f"{self._sql.ts_text('changed_at')} " f"FROM {self._history_table} " f"WHERE schedule_name = '{escaped}' " "ORDER BY changed_at DESC" @@ -162,7 +193,6 @@ def _record_history( version: int = 0, ) -> None: try: - now = datetime.now(timezone.utc).isoformat() escaped_name = escape_sql_string(name) escaped_json = escape_sql_string(config_json) escaped_user = escape_sql_string(user_email) @@ -171,7 +201,7 @@ def _record_history( f"INSERT INTO {self._history_table} " "(schedule_name, config_json, version, action, changed_by, changed_at) " f"VALUES ('{escaped_name}', '{escaped_json}', {version}, '{escaped_action}', " - f"'{escaped_user}', '{now}')" + f"'{escaped_user}', now())" ) self._sql.execute(sql) except Exception: diff --git a/app/src/databricks_labs_dqx_app/backend/services/scheduler_service.py b/app/src/databricks_labs_dqx_app/backend/services/scheduler_service.py index d788a770c..7864ce4f3 100644 --- a/app/src/databricks_labs_dqx_app/backend/services/scheduler_service.py +++ b/app/src/databricks_labs_dqx_app/backend/services/scheduler_service.py @@ -16,13 +16,16 @@ import json import re from datetime import datetime, timedelta, timezone -from typing import Any +from typing import TYPE_CHECKING, Any, cast from uuid import uuid4 from databricks.sdk import WorkspaceClient from databricks_labs_dqx_app.backend.logger import get_logger -from databricks_labs_dqx_app.backend.sql_executor import SqlExecutor +from databricks_labs_dqx_app.backend.sql_executor import RawSql, SqlExecutor + +if TYPE_CHECKING: + from databricks_labs_dqx_app.backend.pg_executor import PgExecutor logger = get_logger("scheduler") @@ -65,6 +68,45 @@ _GC_AGE_HOURS = 48 _GC_MAX_DROPS_PER_RUN = 500 +# Retention sweep — daily DELETE pass against the high-volume tables to +# keep them from growing without bound. Each (table, time-column) pair +# in :data:`_RETENTION_TABLES` is trimmed to ``RETENTION_DAYS`` worth of +# history. Defaults to 90 days; configurable via the ``retention_days`` +# setting in ``dq_app_settings``. The sweep runs at most once per +# ``_RETENTION_INTERVAL_HOURS`` so the warehouse isn't billed repeatedly +# for the same DELETE. +_RETENTION_DAYS_DEFAULT = 90 +_RETENTION_DAYS_MIN = 7 +_RETENTION_INTERVAL_HOURS = 24 + +# ``dq_quarantine_records`` is the only table that holds full row +# payloads (the source row + ``_errors`` / ``_warnings`` blobs). Those +# rows are PII-sensitive and tend to drive most of the Studio's storage +# growth, so we expose a *separate* retention knob with a tighter +# default (30 days) instead of subjecting them to the same window as +# trend tables like ``dq_metrics`` (which dashboards expect to look +# back ~3 months on). Set via the ``quarantine_retention_days`` key +# in ``dq_app_settings``; falls back here when unset. +_QUARANTINE_RETENTION_DAYS_DEFAULT = 30 +_QUARANTINE_TABLE_NAME = "dq_quarantine_records" + +# Retention is split per-backend: analytical (Delta) tables are +# trimmed via the SQL warehouse executor, OLTP tables via the OLTP +# executor (Lakebase if enabled, Delta otherwise). Both lists are +# walked on every retention sweep. ``dq_quarantine_records`` is in +# this list but resolves its cutoff via :meth:`_resolve_quarantine_retention_days` +# instead of the global :meth:`_resolve_retention_days`. +_DELTA_RETENTION_TABLES: tuple[tuple[str, str], ...] = ( + ("dq_validation_runs", "created_at"), + ("dq_profiling_results", "created_at"), + (_QUARANTINE_TABLE_NAME, "created_at"), + ("dq_metrics", "run_time"), +) +_OLTP_RETENTION_TABLES: tuple[tuple[str, str], ...] = ( + ("dq_quality_rules_history", "changed_at"), + ("dq_schedule_configs_history", "changed_at"), +) + class SchedulerService: """Manages a background loop that checks schedule configs and triggers runs.""" @@ -77,7 +119,23 @@ def __init__( schema: str, tmp_schema: str, job_id: str, + oltp_sql: "SqlExecutor | PgExecutor | None" = None, ) -> None: + """Construct the scheduler. + + Parameters + ---------- + oltp_sql: + Executor used for OLTP-table operations (schedule + tracking, schedule configs, app settings, rule reads). + When ``None`` (legacy mode, no Lakebase) the same Delta + executor is used for everything. When Lakebase is enabled, + callers pass a :class:`backend.pg_executor.PgExecutor` so + the high-frequency reads/writes hit Postgres. Internally + we cast to :class:`SqlExecutor` because :class:`PgExecutor` + mirrors that public surface — it's the same trick used at + the FastAPI dependency boundary. + """ self._ws = ws self._job_id = job_id self._catalog = catalog @@ -85,13 +143,20 @@ def __init__( self._tmp_schema = tmp_schema self._sql = SqlExecutor(ws=ws, warehouse_id=warehouse_id, catalog=catalog, schema=schema) self._tmp_sql = SqlExecutor(ws=ws, warehouse_id=warehouse_id, catalog=catalog, schema=tmp_schema) + # OLTP executor — either a PgExecutor (Lakebase) or the same + # Delta executor (legacy mode). All schedule / settings / + # rule access goes through this; only analytical table + # operations (retention sweep, orphan view GC) use ``self._sql``. + self._oltp_sql: SqlExecutor = cast(SqlExecutor, oltp_sql) if oltp_sql is not None else self._sql self._task: asyncio.Task[None] | None = None self._reload_event = asyncio.Event() self._force_recalc = False - self._table = f"{catalog}.{schema}.dq_schedule_runs" - self._configs_table = f"{catalog}.{schema}.dq_schedule_configs" - self._settings_table = f"{catalog}.{schema}.dq_app_settings" - self._rules_table = f"{catalog}.{schema}.dq_quality_rules" + # Both backend layouts qualify the table differently — let the + # OLTP executor's catalog/schema decide. + self._table = self._qualify_oltp("dq_schedule_runs") + self._configs_table = self._qualify_oltp("dq_schedule_configs") + self._settings_table = self._qualify_oltp("dq_app_settings") + self._rules_table = self._qualify_oltp("dq_quality_rules") # Orphan-tmp-view GC: fires every Saturday at 01:00 UTC. Held in # process memory rather than persisted — a missed Saturday (e.g. @@ -100,6 +165,17 @@ def __init__( # and orphans only accumulate slowly. self._next_view_gc_at: datetime = self._next_saturday_01_utc(datetime.now(timezone.utc)) + # Retention sweep: fires every ``_RETENTION_INTERVAL_HOURS`` + # (default 24h). Held in process memory like the view GC; a + # missed sweep is harmless since the next one catches up. + self._next_retention_at: datetime = datetime.now(timezone.utc) + timedelta(hours=_RETENTION_INTERVAL_HOURS) + + def _qualify_oltp(self, table: str) -> str: + """Fully-qualify *table* for whichever backend the OLTP executor uses.""" + if getattr(self._oltp_sql, "dialect", "delta") == "postgres": + return f"{self._oltp_sql.schema}.{table}" + return f"{self._oltp_sql.catalog}.{self._oltp_sql.schema}.{table}" + # ------------------------------------------------------------------ # Lifecycle # ------------------------------------------------------------------ @@ -140,6 +216,7 @@ async def _loop(self) -> None: self._force_recalc = False await self._tick(recalc=recalc) await self._maybe_gc_orphan_views(datetime.now(timezone.utc)) + await self._maybe_run_retention(datetime.now(timezone.utc)) except asyncio.CancelledError: raise except Exception: @@ -236,7 +313,7 @@ def _load_schedule_configs(self) -> dict[str, dict[str, Any]]: try: sql = f"SELECT schedule_name, config_json FROM {self._configs_table}" - rows = self._sql.query(sql) + rows = self._oltp_sql.query(sql) for row in rows: name = row[0] or "" if not name: @@ -256,7 +333,7 @@ def _load_schedule_configs(self) -> dict[str, dict[str, Any]]: # Legacy fallback: read from dq_app_settings blob try: sql = f"SELECT setting_value FROM {self._settings_table} WHERE setting_key = 'workspace_config'" - rows = self._sql.query(sql) + rows = self._oltp_sql.query(sql) if not rows: return {} data = json.loads(rows[0][0]) @@ -286,12 +363,13 @@ def _get_tracker(self, name: str) -> dict[str, str] | None: validate_schedule_name(name) escaped = escape_sql_string(name) + ts = self._oltp_sql.ts_text sql = ( - f"SELECT schedule_name, CAST(last_run_at AS STRING), CAST(next_run_at AS STRING), " + f"SELECT schedule_name, {ts('last_run_at')}, {ts('next_run_at')}, " f"last_run_id, status " f"FROM {self._table} WHERE schedule_name = '{escaped}'" ) - rows = self._sql.query(sql) + rows = self._oltp_sql.query(sql) if not rows: return None row = rows[0] @@ -311,28 +389,34 @@ def _upsert_tracker( last_run_id: str | None, status: str, ) -> None: - from databricks_labs_dqx_app.backend.sql_utils import escape_sql_string, validate_schedule_name + from databricks_labs_dqx_app.backend.sql_utils import validate_schedule_name validate_schedule_name(name) if status not in _VALID_TRACKER_STATUSES: raise ValueError(f"Invalid tracker status: '{status}'. Must be one of {_VALID_TRACKER_STATUSES}") - escaped_name = escape_sql_string(name) - escaped_status = escape_sql_string(status) - last_str = f"'{last_run_at.isoformat()}'" if last_run_at else "NULL" - next_str = f"'{next_run_at.isoformat()}'" if next_run_at else "NULL" - run_id_str = f"'{escape_sql_string(last_run_id)}'" if last_run_id else "NULL" - sql = ( - f"MERGE INTO {self._table} AS target " - f"USING (SELECT '{escaped_name}' AS schedule_name) AS source " - "ON target.schedule_name = source.schedule_name " - "WHEN MATCHED THEN UPDATE SET " - f" last_run_at = {last_str}, next_run_at = {next_str}, " - f" last_run_id = {run_id_str}, status = '{escaped_status}' " - "WHEN NOT MATCHED THEN INSERT (schedule_name, last_run_at, next_run_at, last_run_id, status) " - f"VALUES ('{escaped_name}', {last_str}, {next_str}, {run_id_str}, '{escaped_status}')" + # Render datetimes as portable TIMESTAMP literals. The + # ``TIMESTAMP''`` form is ANSI SQL and works in both + # Delta and Postgres without modification. PgExecutor's + # upsert renderer treats ``RawSql("current_timestamp()")`` + # specially and rewrites it to ``CURRENT_TIMESTAMP`` so the + # same call works for both backends. + def _ts(dt: datetime | None) -> RawSql: + if dt is None: + return RawSql("NULL") + return RawSql(f"TIMESTAMP'{dt.isoformat()}'") + + self._oltp_sql.upsert( + self._table, + key_cols={"schedule_name": name}, + value_cols={ + "last_run_at": _ts(last_run_at), + "next_run_at": _ts(next_run_at), + "last_run_id": last_run_id, + "status": status, + "updated_at": RawSql("current_timestamp()"), + }, ) - self._sql.execute(sql) # ------------------------------------------------------------------ # Trigger run @@ -413,29 +497,95 @@ def _trigger_run(self, schedule_name: str, cfg: dict[str, Any], run_id_prefix: s # ------------------------------------------------------------------ def _resolve_scope(self, cfg: dict[str, Any]) -> list[str]: - """Return list of unique table_fqn matching the schedule's scope from approved rules.""" + """Return list of unique table_fqn matching the schedule's scope from approved rules. + + Two orthogonal filters intersected: + * ``scope_mode`` / ``scope_catalogs|schemas|tables`` — FQN-based. + * ``scope_labels`` — keep only FQNs that have at least one approved + check carrying a matching ``user_metadata`` label. + """ mode = cfg.get("scope_mode", "all") sql = f"SELECT DISTINCT table_fqn FROM {self._rules_table} WHERE status = 'approved'" - rows = self._sql.query(sql) + rows = self._oltp_sql.query(sql) fqns = [r[0] for r in rows if r[0]] - if mode == "all": - return fqns - if mode == "catalog": catalogs = set(cfg.get("scope_catalogs") or []) - return [f for f in fqns if self._fqn_part(f, 0) in catalogs] - - if mode == "schema": + fqns = [f for f in fqns if self._fqn_part(f, 0) in catalogs] + elif mode == "schema": schemas = set(cfg.get("scope_schemas") or []) - return [f for f in fqns if self._fqn_schema(f) in schemas] - - if mode == "tables": + fqns = [f for f in fqns if self._fqn_schema(f) in schemas] + elif mode == "tables": tables = set(cfg.get("scope_tables") or []) - return [f for f in fqns if f in tables] + fqns = [f for f in fqns if f in tables] + + label_filter = self._parse_scope_labels(cfg.get("scope_labels")) + if label_filter: + fqns = [f for f in fqns if self._fqn_has_matching_label(f, label_filter)] return fqns + @staticmethod + def _parse_scope_labels(raw: Any) -> set[tuple[str, str]]: + """Normalise the persisted ``scope_labels`` field to a set of (key, value). + + Accepts the canonical ``[{key, value}, ...]`` shape produced by the + UI plus a lenient ``["key=value", ...]`` shorthand for hand-edited + configs. Invalid entries are silently dropped — a malformed label + filter must never block a scheduled run. + """ + if not isinstance(raw, list): + return set() + out: set[tuple[str, str]] = set() + for entry in raw: + if isinstance(entry, dict): + key = entry.get("key") + if isinstance(key, str) and key: + out.add((key, str(entry.get("value") or ""))) + elif isinstance(entry, str): + if not entry: + continue + idx = entry.find("=") + if idx < 0: + out.add((entry, "")) + else: + out.add((entry[:idx], entry[idx + 1 :])) + return out + + def _fqn_has_matching_label( + self, + table_fqn: str, + label_filter: set[tuple[str, str]], + ) -> bool: + """True iff any approved check on ``table_fqn`` carries a matching label.""" + rule = self._get_approved_rule(table_fqn) + if rule is None: + return False + for check in rule.get("checks") or []: + md = self._check_user_metadata(check) + for key, value in md.items(): + if (key, value) in label_filter: + return True + return False + + @staticmethod + def _check_user_metadata(check: Any) -> dict[str, str]: + """Pull the ``user_metadata`` map off a check payload regardless of shape. + + Mirrors the front-end ``getUserMetadata`` helper — checks come in + either as a top-level dict with ``user_metadata`` directly on them, + or wrapped under a ``check`` key (legacy export shape). + """ + if not isinstance(check, dict): + return {} + candidate = check.get("user_metadata") + if not isinstance(candidate, dict): + inner = check.get("check") + candidate = inner.get("user_metadata") if isinstance(inner, dict) else None + if not isinstance(candidate, dict): + return {} + return {str(k): str(v) for k, v in candidate.items() if k} + @staticmethod def _fqn_part(fqn: str, idx: int) -> str: parts = fqn.split(".") @@ -451,24 +601,48 @@ def _fqn_schema(fqn: str) -> str: # ------------------------------------------------------------------ def _get_approved_rule(self, table_fqn: str) -> dict[str, Any] | None: - """Get merged checks from all approved rule rows for a table.""" + """Get merged checks from all approved rule rows for a table. + + After the v1 baseline split, each row stores a single check in + the VARIANT/JSONB ``check`` column rather than an array of + checks. The scheduler still presents one merged ``checks`` list + downstream (the task runner expects an array) so we collect + each row's bare object and append it. + """ from databricks_labs_dqx_app.backend.sql_utils import escape_sql_string e_fqn = escape_sql_string(table_fqn) + check_col = self._oltp_sql.q("check") + # Project the VARIANT/JSONB column as JSON text. Delta needs + # ``to_json`` to serialise the VARIANT through the JSON_ARRAY + # response format; Postgres returns JSONB cells as Python + # dicts which :func:`PgExecutor._to_text` already JSON-encodes, + # so a plain projection is enough there. + if getattr(self._oltp_sql, "dialect", "delta") == "postgres": + check_proj = f"{check_col} AS check_json" + else: + check_proj = f"to_json({check_col}) AS check_json" sql = ( - f"SELECT table_fqn, checks FROM {self._rules_table} " f"WHERE table_fqn = '{e_fqn}' AND status = 'approved'" + f"SELECT table_fqn, {check_proj} FROM {self._rules_table} " + f"WHERE table_fqn = '{e_fqn}' AND status = 'approved'" ) - rows = self._sql.query(sql) + rows = self._oltp_sql.query(sql) if not rows: return None merged_checks: list[dict[str, Any]] = [] for row in rows: try: - parsed = json.loads(row[1], strict=False) if row[1] else [] - if isinstance(parsed, list): - merged_checks.extend(parsed) + parsed = json.loads(row[1], strict=False) if row[1] else None except json.JSONDecodeError: continue + if isinstance(parsed, dict): + merged_checks.append(parsed) + elif isinstance(parsed, list): + # Defensive: pre-baseline rows wrapped the single check + # in a one-element list. Tolerate that on read so a + # workspace that hasn't run ``DROP SCHEMA CASCADE`` + # against legacy data doesn't lose its rules. + merged_checks.extend([c for c in parsed if isinstance(c, dict)]) if not merged_checks: return None return {"table_fqn": rows[0][0], "checks": merged_checks} @@ -487,7 +661,7 @@ def _load_custom_metrics(self) -> list[str]: key = escape_sql_string("custom_metrics_v1") sql = f"SELECT setting_value FROM {self._settings_table} WHERE setting_key = '{key}'" # noqa: S608 - rows = self._sql.query(sql) + rows = self._oltp_sql.query(sql) if not rows or rows[0][0] is None: return [] parsed = json.loads(rows[0][0]) @@ -628,6 +802,136 @@ def _gc_orphan_views(self) -> None: skipped, ) + # ------------------------------------------------------------------ + # Retention — daily DELETE sweep against high-volume tables + # ------------------------------------------------------------------ + + def _resolve_retention_days(self) -> int: + """Return the configured retention window in days (>= 7). + + Looks up ``retention_days`` in ``dq_app_settings`` and falls back + to :data:`_RETENTION_DAYS_DEFAULT` (90 days) when unset or + unparseable. Capped at the lower bound :data:`_RETENTION_DAYS_MIN` + so a misconfiguration can't accidentally wipe live data. + """ + return self._resolve_setting_days("retention_days", _RETENTION_DAYS_DEFAULT) + + def _resolve_quarantine_retention_days(self) -> int: + """Return the quarantine-specific retention window in days (>= 7). + + Quarantine rows hold the full source row payload (PII surface) + so we maintain a separate, tighter default + (:data:`_QUARANTINE_RETENTION_DAYS_DEFAULT`, 30 days) than the + global retention. Configurable via ``quarantine_retention_days`` + in ``dq_app_settings``. Same min-floor protection as the global + resolver. + """ + return self._resolve_setting_days( + "quarantine_retention_days", + _QUARANTINE_RETENTION_DAYS_DEFAULT, + ) + + def _resolve_setting_days(self, key: str, default: int) -> int: + """Read an integer-day setting from ``dq_app_settings``. + + Shared parsing/floor logic for the global and quarantine + retention knobs. Any read or parse failure falls back to + *default*; the returned value is always >= :data:`_RETENTION_DAYS_MIN` + so a misconfiguration can never wipe data inside the safety floor. + """ + try: + from databricks_labs_dqx_app.backend.sql_utils import escape_sql_string + + escaped_key = escape_sql_string(key) + sql = f"SELECT setting_value FROM {self._settings_table} WHERE setting_key = '{escaped_key}'" # noqa: S608 + rows = self._oltp_sql.query(sql) + if rows and rows[0] and rows[0][0]: + value = int(rows[0][0]) + return max(_RETENTION_DAYS_MIN, value) + except Exception: + logger.debug("Failed to read %s setting; falling back to default", key, exc_info=True) + return default + + async def _maybe_run_retention(self, now: datetime) -> None: + """Run the retention sweep if the daily timer has elapsed. + + Cheap to skip (one comparison) and runs in a background thread + so it doesn't block the loop. Failures are logged but never + fatal — the next tick re-tries. + """ + if now < self._next_retention_at: + return + + scheduled_for = self._next_retention_at + # Advance the timer first so a slow sweep can't double-fire. + self._next_retention_at = now + timedelta(hours=_RETENTION_INTERVAL_HOURS) + logger.info( + "Retention sweep: triggering daily cleanup (was due at %s); next run scheduled for %s", + scheduled_for.isoformat(), + self._next_retention_at.isoformat(), + ) + try: + await asyncio.to_thread(self._run_retention) + except Exception: + logger.exception("Retention sweep failed (non-fatal)") + + def _run_retention(self) -> None: + """DELETE rows older than ``retention_days`` from each high-volume table. + + Each table is processed independently — a failure on one + doesn't abort the others. The DELETE predicate uses an + INTERVAL literal so the backend stamps the cutoff against its + own clock (no Python-side time skew). + + Tables are split between the analytical Delta executor and + the OLTP executor (Lakebase or Delta-fallback) because the + ``INTERVAL`` syntax differs between dialects: Delta uses + ``INTERVAL N DAY`` (no quotes); Postgres uses + ``INTERVAL ' days'``. + """ + days = self._resolve_retention_days() + quarantine_days = self._resolve_quarantine_retention_days() + logger.info( + "Retention sweep: deleting rows older than %d days (quarantine: %d days)", + days, + quarantine_days, + ) + + total_deleted = 0 + # Delta tables — quoted with backticks so a future + # special-character schema name doesn't break the DELETE. + # ``dq_quarantine_records`` honours its own cutoff so PII row + # payloads can be aged out faster than the trend tables. + for table_name, time_col in _DELTA_RETENTION_TABLES: + table = f"`{self._catalog}`.`{self._schema}`.{table_name}" + cutoff = quarantine_days if table_name == _QUARANTINE_TABLE_NAME else days + stmt = f"DELETE FROM {table} " f"WHERE {time_col} < current_timestamp() - INTERVAL {cutoff} DAY" + try: + self._sql.execute(stmt) + logger.info("Retention sweep (Delta): cleaned %s (cutoff=%dd)", table_name, cutoff) + total_deleted += 1 + except Exception as exc: + logger.warning("Retention sweep: %s failed (%s); continuing", table_name, exc) + + # OLTP tables — quoted by the executor's q() helper so + # backticks/double-quotes follow the dialect. + is_postgres = getattr(self._oltp_sql, "dialect", "delta") == "postgres" + for table_name, time_col in _OLTP_RETENTION_TABLES: + table = self._qualify_oltp(table_name) + if is_postgres: + interval = f"INTERVAL '{days} days'" + else: + interval = f"INTERVAL {days} DAY" + stmt = f"DELETE FROM {table} " f"WHERE {time_col} < CURRENT_TIMESTAMP - {interval}" + try: + self._oltp_sql.execute(stmt) + logger.info("Retention sweep (OLTP): cleaned %s (cutoff=%dd)", table_name, days) + total_deleted += 1 + except Exception as exc: + logger.warning("Retention sweep: %s failed (%s); continuing", table_name, exc) + + logger.info("Retention sweep complete: %d table(s) processed", total_deleted) + # ------------------------------------------------------------------ # View creation (SP credentials) # ------------------------------------------------------------------ diff --git a/app/src/databricks_labs_dqx_app/backend/sql_executor.py b/app/src/databricks_labs_dqx_app/backend/sql_executor.py index 73a1e7f7b..1c464bd9f 100644 --- a/app/src/databricks_labs_dqx_app/backend/sql_executor.py +++ b/app/src/databricks_labs_dqx_app/backend/sql_executor.py @@ -10,23 +10,66 @@ import logging import time +from typing import Any from databricks.sdk import WorkspaceClient from databricks.sdk.service.sql import Disposition, Format, StatementState +from databricks_labs_dqx_app.backend.sql_utils import escape_sql_string + logger = logging.getLogger(__name__) _TERMINAL_STATES = {StatementState.SUCCEEDED, StatementState.FAILED, StatementState.CANCELED, StatementState.CLOSED} +# Sentinel value for raw SQL expressions in upsert() — anything wrapped with +# ``RawSql("current_timestamp()")`` is interpolated verbatim instead of being +# escaped as a string literal. +class RawSql: + """Marker that wraps a raw SQL expression so :meth:`SqlExecutor.upsert` does not escape it.""" + + __slots__ = ("expr",) + + def __init__(self, expr: str) -> None: + self.expr = expr + + +def _render_value(value: Any) -> str: + """Convert a Python value to a SQL literal or raw expression. + + Strings are wrapped in single quotes and ANSI-escaped (`'` → `''`). + ``None`` becomes ``NULL``. Booleans are rendered as ``TRUE``/``FALSE``. + Numerics pass through. :class:`RawSql` is returned verbatim — this is + how callers inject ``current_timestamp()``, ``parse_json('...')``, etc. + """ + if isinstance(value, RawSql): + return value.expr + if value is None: + return "NULL" + if isinstance(value, bool): + return "TRUE" if value else "FALSE" + if isinstance(value, (int, float)): + return repr(value) + return f"'{escape_sql_string(str(value))}'" + + class SqlExecutor: """Thin wrapper around the Databricks Statement Execution API. Provides ``execute``, ``query``, ``query_dicts``, and ``execute_no_schema`` covering all usage patterns in the DQX Studio backend. + + The class also exposes a tiny dialect surface (:attr:`dialect`, + :meth:`q`, :meth:`json_literal_expr`) so that services which can + target either Delta or Lakebase Postgres can share the same SQL + while staying portable. See :class:`PgExecutor` for the Postgres + implementation. """ + # Cheap identifier for routing decisions in shared service code. + dialect: str = "delta" + def __init__( self, ws: WorkspaceClient, @@ -51,6 +94,40 @@ def catalog(self) -> str: def schema(self) -> str: return self._schema + # ------------------------------------------------------------------ + # Dialect helpers — kept identical-named on every executor so callers + # can hand-write portable SQL without an "if dialect" branch. + # ------------------------------------------------------------------ + def q(self, identifier: str) -> str: + """Quote an identifier for this dialect. + + Delta uses backticks (``` `check` ```); Postgres uses double + quotes (``"check"``). Always quote columns/tables that share + a name with a reserved word — ``check``, ``user``, ``order``… + """ + return f"`{identifier}`" + + def json_literal_expr(self, json_str: str) -> str: + """Return the SQL expression that turns *json_str* into a JSON value. + + Delta uses :func:`parse_json`; Postgres uses ``::jsonb``. The + returned expression is safe to inline into a larger statement + as it already includes the proper escaping. + """ + return f"parse_json('{escape_sql_string(json_str)}')" + + def ts_text(self, col: str) -> str: + """Project a timestamp column as an ISO-formatted string. + + Delta wraps the column in ``CAST(... AS STRING)`` because the + Statement Execution API otherwise returns timestamps as their + Spark string repr (which uses a space separator instead of + ``T``). On Postgres the column is selected verbatim and the + :class:`PgExecutor` row converter ISO-formats it on the way + out. + """ + return f"CAST({col} AS STRING)" + def execute(self, sql: str, *, timeout_seconds: int = 120) -> None: """Execute a SQL statement that does not return rows. @@ -187,6 +264,50 @@ def query_dicts(self, sql: str, *, timeout_seconds: int = 120) -> list[dict[str, ] return [dict(zip(columns, row)) for row in resp.result.data_array] + def upsert( + self, + table: str, + key_cols: dict[str, Any], + value_cols: dict[str, Any], + *, + timeout_seconds: int = 120, + ) -> None: + """Idempotently insert-or-update one row identified by *key_cols*. + + Centralises the ``MERGE INTO ... USING (SELECT)`` pattern that + otherwise gets duplicated across every service. *key_cols* are + the natural-key columns used in the ON clause; *value_cols* are + the non-key columns set on update / inserted on miss. + + Each value can be one of: + - ``str`` / ``int`` / ``float`` / ``bool`` / ``None`` — bound as + a SQL literal (strings are ANSI-escaped). + - :class:`RawSql` — interpolated verbatim, e.g. + ``RawSql("current_timestamp()")``. + + The full row written on insert is the union of *key_cols* and + *value_cols*. Updates only touch *value_cols* (the keys are + immutable by definition of the merge predicate). + """ + if not key_cols: + raise ValueError("upsert requires at least one key column") + + on_clause = " AND ".join(f"target.{k} = source.{k}" for k in key_cols) + source_select = ", ".join(f"{_render_value(v)} AS {k}" for k, v in key_cols.items()) + update_set = ", ".join(f"{k} = {_render_value(v)}" for k, v in value_cols.items()) + all_cols = list(key_cols.keys()) + list(value_cols.keys()) + all_vals = [_render_value(v) for v in list(key_cols.values()) + list(value_cols.values())] + insert_cols = ", ".join(all_cols) + insert_vals = ", ".join(all_vals) + + sql = ( + f"MERGE INTO {table} AS target " + f"USING (SELECT {source_select}) AS source ON {on_clause} " + f"WHEN MATCHED THEN UPDATE SET {update_set} " + f"WHEN NOT MATCHED THEN INSERT ({insert_cols}) VALUES ({insert_vals})" + ) + self.execute(sql, timeout_seconds=timeout_seconds) + def _wait_for_completion(self, statement_id: str, timeout_seconds: int) -> StatementState: """Poll statement status until it reaches a terminal state.""" start = time.time() diff --git a/app/src/databricks_labs_dqx_app/ui/components/DryRunResults.tsx b/app/src/databricks_labs_dqx_app/ui/components/DryRunResults.tsx index 822edfd6f..fa41885b1 100644 --- a/app/src/databricks_labs_dqx_app/ui/components/DryRunResults.tsx +++ b/app/src/databricks_labs_dqx_app/ui/components/DryRunResults.tsx @@ -93,7 +93,20 @@ function downloadAsExcel(rows: Record[], filename: string): voi export function DryRunResults({ result }: DryRunResultsProps) { const totalRows = result.total_rows ?? 0; const validRows = result.valid_rows ?? 0; - const invalidRows = result.invalid_rows ?? 0; + // ``invalid_rows`` (rows that failed any check) is kept on the model for + // backwards compatibility, but we surface DQX's authoritative observer + // counts instead: ``error_rows`` (= ``error_row_count``) and + // ``warning_rows`` (= ``warning_row_count``). Pre-v5 history rows have + // ``error_rows = null`` — fall back to ``invalid_rows`` so the card + // doesn't show ``0`` for runs created before the rename. + const errorRows = result.error_rows ?? result.invalid_rows ?? 0; + const warningRows = result.warning_rows; + const showWarnings = warningRows != null; + // The bottom data table still drives off the quarantine endpoint, which + // contains rows that failed any check (errors OR warnings). We use + // ``hasFailedRows`` so the table renders whenever there's anything to + // show — not just when there are errors. + const hasFailedRows = errorRows > 0 || (warningRows ?? 0) > 0; const errorSummary = result.error_summary ?? []; const sampleInvalid = result.sample_invalid ?? []; @@ -111,6 +124,7 @@ export function DryRunResults({ result }: DryRunResultsProps) { return { ...rest, errors: Array.isArray(_errors) ? _errors.map(formatError).join("; ") : String(_errors ?? ""), + warnings: Array.isArray(_warnings) ? _warnings.map(formatError).join("; ") : String(_warnings ?? ""), }; }); }, [sampleInvalid]); @@ -129,10 +143,10 @@ export function DryRunResults({ result }: DryRunResultsProps) { const { data: quarantineResp, isLoading: quarantineLoading } = useListQuarantineRecords(result.run_id, { offset, limit: pageSize }, { - query: { enabled: invalidRows > 0 }, + query: { enabled: hasFailedRows }, }); const { data: countResp } = useQuarantineCount(result.run_id, { - query: { enabled: invalidRows > 0 }, + query: { enabled: hasFailedRows }, }); const quarantineRecords: QuarantineRecordOut[] = quarantineResp?.data?.records ?? []; @@ -147,11 +161,19 @@ export function DryRunResults({ result }: DryRunResultsProps) { return sampleInvalid.slice(offset, offset + pageSize); }, [hasQuarantine, sampleInvalid, offset, pageSize]); - const rows: Array<{ rowData: Record; errors: unknown[] }> = useMemo(() => { + const rows: Array<{ + rowData: Record; + errors: unknown[]; + warnings: unknown[]; + }> = useMemo(() => { if (hasQuarantine) { return displayRecords.map((r) => ({ rowData: r.row_data ?? {}, errors: r.errors ?? [], + // ``warnings`` is null for rows written before migration v4 and + // for SQL-check quarantines. Render an empty array so the column + // shows nothing rather than ``null``. + warnings: r.warnings ?? [], })); } return fallbackRows.map((r) => { @@ -159,10 +181,19 @@ export function DryRunResults({ result }: DryRunResultsProps) { return { rowData: rest, errors: Array.isArray(_errors) ? (_errors as unknown[]) : [], + warnings: Array.isArray(_warnings) ? (_warnings as unknown[]) : [], }; }); }, [hasQuarantine, displayRecords, fallbackRows]); + // Only render the Warnings column when at least one displayed row has a + // warning payload. Pre-v4 quarantine rows have ``null`` warnings — hiding + // the column keeps the table compact for runs that have only errors. + const hasAnyWarning = useMemo( + () => rows.some((r) => Array.isArray(r.warnings) && r.warnings.length > 0), + [rows], + ); + const dataColumns = useMemo(() => { const keys = new Set(); for (const r of rows) { @@ -175,8 +206,12 @@ export function DryRunResults({ result }: DryRunResultsProps) { return (
- {/* Summary stats */} -
+ {/* ``Errors`` and ``Warnings`` are independent buckets — a row can be in + both. Both come from DQX's observer (``error_row_count`` / + ``warning_row_count``) so they're bounded by the input row count + and don't suffer from the fan-out artefacts that ``invalid_rows`` + can have on certain check types. */} +
{totalRows}
Total Rows
@@ -189,12 +224,21 @@ export function DryRunResults({ result }: DryRunResultsProps) {
-
{invalidRows}
+
{errorRows}
- Invalid + Errors
+ {showWarnings && ( +
+
{warningRows}
+
+ + Warnings +
+
+ )}
{/* Pass rate bar */} @@ -213,12 +257,17 @@ export function DryRunResults({ result }: DryRunResultsProps) {
- {/* Error summary */} + {/* Check failure summary — each row is one approved check that + produced at least one error or warning. We split the total + into ``error_count`` / ``warning_count`` columns so a + warning-level check is visually distinct from an error-level + one (otherwise warning-only checks look identical to errors, + which is what users hit in practice). */} {errorSummary.length > 0 && (

- Error Summary + Failed checks ({errorSummary.length} distinct) @@ -227,23 +276,44 @@ export function DryRunResults({ result }: DryRunResultsProps) { - - + + + - {visibleErrorSummary.map((item, idx) => ( - - - - - ))} + {visibleErrorSummary.map((item, idx) => { + const errCount = Number(item.error_count ?? 0); + const warnCount = Number(item.warning_count ?? 0); + return ( + + + + + + ); + })}
ErrorCountCheckErrorsWarnings
- {summarizeErrorText(String(item.error ?? ""))} - - - {String(item.count ?? 0)} - -
+ {summarizeErrorText(String(item.error ?? ""))} + + {errCount > 0 ? ( + + {errCount} + + ) : ( + + )} + + {warnCount > 0 ? ( + + {warnCount} + + ) : ( + + )} +
{hiddenErrorCount > 0 && ( @@ -258,13 +328,15 @@ export function DryRunResults({ result }: DryRunResultsProps) {

)} - {/* Invalid rows data table */} - {invalidRows > 0 && ( + {/* Failed rows data table — includes both error rows and warning rows + since DQX's split puts anything that failed a check into the same + quarantine bucket. */} + {hasFailedRows && (

- - Invalid Rows + + Failed rows ({hasQuarantine ? `${quarantineTotal} quarantined` : `${sampleInvalid.length} samples`}) @@ -345,6 +417,9 @@ export function DryRunResults({ result }: DryRunResultsProps) { ))} Errors + {hasAnyWarning && ( + Warnings + )} @@ -365,6 +440,21 @@ export function DryRunResults({ result }: DryRunResultsProps) { ))}

+ {hasAnyWarning && ( + +
+ {(row.warnings ?? []).map((w, wi) => ( + + {formatError(w)} + + ))} +
+ + )} ))} diff --git a/app/src/databricks_labs_dqx_app/ui/lib/api-custom.ts b/app/src/databricks_labs_dqx_app/ui/lib/api-custom.ts index a5779b944..0130fe9d8 100644 --- a/app/src/databricks_labs_dqx_app/ui/lib/api-custom.ts +++ b/app/src/databricks_labs_dqx_app/ui/lib/api-custom.ts @@ -174,6 +174,8 @@ export interface ValidationRunSummaryOut { total_rows: number | null; valid_rows: number | null; invalid_rows: number | null; + error_rows: number | null; + warning_rows: number | null; created_at: string | null; error_message: string | null; checks: Record[]; @@ -567,6 +569,7 @@ export interface QuarantineRecordOut { requesting_user: string | null; row_data: Record | null; errors: unknown[] | null; + warnings: unknown[] | null; created_at: string | null; } @@ -891,3 +894,83 @@ export const useSaveLabelDefinitions = < ...mutationOptions, }); }; + +// --------------------------------------------------------------------------- +// Retention settings — global vs. quarantine-specific DELETE windows +// surfaced for the admin Configuration page. Mirrors +// ``backend/routes/v1/config.py``. +// --------------------------------------------------------------------------- + +export interface RetentionSettingsOut { + retention_days: number; + quarantine_retention_days: number; + retention_days_default: number; + quarantine_retention_days_default: number; + retention_days_min: number; + retention_days_max: number; + retention_days_set: boolean; + quarantine_retention_days_set: boolean; +} + +export interface RetentionSettingsIn { + retention_days?: number | null; + quarantine_retention_days?: number | null; +} + +export const getRetentionSettings = ( + options?: AxiosRequestConfig, +): Promise> => + axios.default.get("/api/v1/config/retention", options); + +export const getRetentionSettingsQueryKey = () => ["retention-settings"] as const; + +export const useRetentionSettings = < + TData = Awaited>["data"], + TError = AxiosError, +>( + options?: { + query?: Partial>, TError, TData>>; + axios?: AxiosRequestConfig; + }, +): UseQueryResult => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; + return useQuery({ + queryKey: queryOptions?.queryKey ?? getRetentionSettingsQueryKey(), + queryFn: () => getRetentionSettings(axiosOptions), + select: ((resp: Awaited>) => resp.data) as never, + staleTime: 5 * 60 * 1000, + ...queryOptions, + }) as UseQueryResult; +}; + +export const saveRetentionSettings = ( + body: RetentionSettingsIn, + options?: AxiosRequestConfig, +): Promise> => + axios.default.put("/api/v1/config/retention", body, options); + +export const useSaveRetentionSettings = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: RetentionSettingsIn }, + TContext + >; + axios?: AxiosRequestConfig; + }, +): UseMutationResult< + Awaited>, + TError, + { data: RetentionSettingsIn }, + TContext +> => { + const { mutation: mutationOptions, axios: axiosOptions } = options ?? {}; + return useMutation({ + mutationFn: ({ data }: { data: RetentionSettingsIn }) => saveRetentionSettings(data, axiosOptions), + ...mutationOptions, + }); +}; diff --git a/app/src/databricks_labs_dqx_app/ui/lib/api.ts b/app/src/databricks_labs_dqx_app/ui/lib/api.ts index a4eafc60a..ea29b040f 100644 --- a/app/src/databricks_labs_dqx_app/ui/lib/api.ts +++ b/app/src/databricks_labs_dqx_app/ui/lib/api.ts @@ -309,6 +309,10 @@ export type DryRunResultsOutValidRows = number | null; export type DryRunResultsOutInvalidRows = number | null; +export type DryRunResultsOutErrorRows = number | null; + +export type DryRunResultsOutWarningRows = number | null; + export type DryRunResultsOutErrorSummaryItem = { [key: string]: unknown }; export type DryRunResultsOutSampleInvalidItem = { [key: string]: unknown }; @@ -319,6 +323,8 @@ export interface DryRunResultsOut { total_rows?: DryRunResultsOutTotalRows; valid_rows?: DryRunResultsOutValidRows; invalid_rows?: DryRunResultsOutInvalidRows; + error_rows?: DryRunResultsOutErrorRows; + warning_rows?: DryRunResultsOutWarningRows; error_summary?: DryRunResultsOutErrorSummaryItem[]; sample_invalid?: DryRunResultsOutSampleInvalidItem[]; } @@ -689,6 +695,8 @@ export type QuarantineRecordOutRowData = QuarantineRecordOutRowDataAnyOf | null; export type QuarantineRecordOutErrors = unknown[] | null; +export type QuarantineRecordOutWarnings = unknown[] | null; + export type QuarantineRecordOutCreatedAt = string | null; export interface QuarantineRecordOut { @@ -698,9 +706,42 @@ export interface QuarantineRecordOut { requesting_user?: QuarantineRecordOutRequestingUser; row_data?: QuarantineRecordOutRowData; errors?: QuarantineRecordOutErrors; + warnings?: QuarantineRecordOutWarnings; created_at?: QuarantineRecordOutCreatedAt; } +export type RetentionSettingsInRetentionDays = number | null; + +export type RetentionSettingsInQuarantineRetentionDays = number | null; + +/** + * Update payload — either field omitted means *leave unchanged*. + */ +export interface RetentionSettingsIn { + retention_days?: RetentionSettingsInRetentionDays; + quarantine_retention_days?: RetentionSettingsInQuarantineRetentionDays; +} + +/** + * Effective retention settings + the defaults the scheduler falls back to. + +``retention_days`` / ``quarantine_retention_days`` reflect the +*current effective values* — the persisted setting if one exists, +otherwise the compiled-in default. The ``*_default`` and ``*_min`` +fields let the UI render hints and validation without duplicating +the constants on the frontend. + */ +export interface RetentionSettingsOut { + retention_days: number; + quarantine_retention_days: number; + retention_days_default?: number; + quarantine_retention_days_default?: number; + retention_days_min?: number; + retention_days_max?: number; + retention_days_set: boolean; + quarantine_retention_days_set: boolean; +} + export type RoleMappingOutCreatedBy = string | null; export type RoleMappingOutCreatedAt = string | null; @@ -1026,6 +1067,10 @@ export type ValidationRunSummaryOutValidRows = number | null; export type ValidationRunSummaryOutInvalidRows = number | null; +export type ValidationRunSummaryOutErrorRows = number | null; + +export type ValidationRunSummaryOutWarningRows = number | null; + export type ValidationRunSummaryOutCreatedAt = string | null; export type ValidationRunSummaryOutErrorMessage = string | null; @@ -1044,6 +1089,8 @@ export interface ValidationRunSummaryOut { run_type?: ValidationRunSummaryOutRunType; valid_rows?: ValidationRunSummaryOutValidRows; invalid_rows?: ValidationRunSummaryOutInvalidRows; + error_rows?: ValidationRunSummaryOutErrorRows; + warning_rows?: ValidationRunSummaryOutWarningRows; created_at?: ValidationRunSummaryOutCreatedAt; error_message?: ValidationRunSummaryOutErrorMessage; checks?: ValidationRunSummaryOutChecksItem[]; @@ -3173,6 +3220,373 @@ export const useSaveTimezone = < return useMutation(mutationOptions, queryClient); }; +/** + * Return the current retention windows + defaults (admin only). + * @summary Get Retention Settings + */ +export const getRetentionSettings = ( + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.get(`/api/v1/config/retention`, options); +}; + +export const getGetRetentionSettingsQueryKey = () => { + return [`/api/v1/config/retention`] as const; +}; + +export const getGetRetentionSettingsQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>(options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; +}) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; + + const queryKey = queryOptions?.queryKey ?? getGetRetentionSettingsQueryKey(); + + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => getRetentionSettings({ signal, ...axiosOptions }); + + return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< + Awaited>, + TError, + TData + > & { queryKey: DataTag }; +}; + +export type GetRetentionSettingsQueryResult = NonNullable< + Awaited> +>; +export type GetRetentionSettingsQueryError = AxiosError; + +export function useGetRetentionSettings< + TData = Awaited>, + TError = AxiosError, +>( + options: { + query: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + > & + Pick< + DefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): DefinedUseQueryResult & { + queryKey: DataTag; +}; +export function useGetRetentionSettings< + TData = Awaited>, + TError = AxiosError, +>( + options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + > & + Pick< + UndefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +}; +export function useGetRetentionSettings< + TData = Awaited>, + TError = AxiosError, +>( + options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +}; +/** + * @summary Get Retention Settings + */ + +export function useGetRetentionSettings< + TData = Awaited>, + TError = AxiosError, +>( + options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +} { + const queryOptions = getGetRetentionSettingsQueryOptions(options); + + const query = useQuery(queryOptions, queryClient) as UseQueryResult< + TData, + TError + > & { queryKey: DataTag }; + + query.queryKey = queryOptions.queryKey; + + return query; +} + +export const getGetRetentionSettingsSuspenseQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>(options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; +}) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; + + const queryKey = queryOptions?.queryKey ?? getGetRetentionSettingsQueryKey(); + + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => getRetentionSettings({ signal, ...axiosOptions }); + + return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > & { queryKey: DataTag }; +}; + +export type GetRetentionSettingsSuspenseQueryResult = NonNullable< + Awaited> +>; +export type GetRetentionSettingsSuspenseQueryError = + AxiosError; + +export function useGetRetentionSettingsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + options: { + query: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +export function useGetRetentionSettingsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +export function useGetRetentionSettingsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +/** + * @summary Get Retention Settings + */ + +export function useGetRetentionSettingsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +} { + const queryOptions = getGetRetentionSettingsSuspenseQueryOptions(options); + + const query = useSuspenseQuery( + queryOptions, + queryClient, + ) as UseSuspenseQueryResult & { + queryKey: DataTag; + }; + + query.queryKey = queryOptions.queryKey; + + return query; +} + +/** + * Update one or both retention windows (admin only). + +Either field may be omitted to leave the existing value unchanged. +Both values are validated against the safety floor and ceiling +before being persisted. + * @summary Save Retention Settings + */ +export const saveRetentionSettings = ( + retentionSettingsIn: RetentionSettingsIn, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.put( + `/api/v1/config/retention`, + retentionSettingsIn, + options, + ); +}; + +export const getSaveRetentionSettingsMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: RetentionSettingsIn }, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + { data: RetentionSettingsIn }, + TContext +> => { + const mutationKey = ["saveRetentionSettings"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + { data: RetentionSettingsIn } + > = (props) => { + const { data } = props ?? {}; + + return saveRetentionSettings(data, axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; +}; + +export type SaveRetentionSettingsMutationResult = NonNullable< + Awaited> +>; +export type SaveRetentionSettingsMutationBody = RetentionSettingsIn; +export type SaveRetentionSettingsMutationError = + AxiosError; + +/** + * @summary Save Retention Settings + */ +export const useSaveRetentionSettings = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: RetentionSettingsIn }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { data: RetentionSettingsIn }, + TContext +> => { + const mutationOptions = getSaveRetentionSettingsMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + /** * Return all admin-defined label definitions. diff --git a/app/src/databricks_labs_dqx_app/ui/routes/_sidebar/config.tsx b/app/src/databricks_labs_dqx_app/ui/routes/_sidebar/config.tsx index ec53be77c..997467709 100644 --- a/app/src/databricks_labs_dqx_app/ui/routes/_sidebar/config.tsx +++ b/app/src/databricks_labs_dqx_app/ui/routes/_sidebar/config.tsx @@ -3,7 +3,7 @@ import { QueryErrorResetBoundary, useQueryClient } from "@tanstack/react-query"; import { ErrorBoundary } from "react-error-boundary"; import { Button } from "@/components/ui/button"; import { PageBreadcrumb } from "@/components/apx/PageBreadcrumb"; -import { AlertCircle, Globe, Loader2, Search, Tags, Plus, Trash2, X } from "lucide-react"; +import { AlertCircle, Clock, Globe, Loader2, Search, Tags, Plus, Trash2, X } from "lucide-react"; import { FadeIn } from "@/components/anim/FadeIn"; import { ShinyText } from "@/components/anim/ShinyText"; import { RoleManagement } from "@/components/RoleManagement"; @@ -25,7 +25,11 @@ import { useLabelDefinitions, useSaveLabelDefinitions, getLabelDefinitionsQueryKey, + useRetentionSettings, + useSaveRetentionSettings, + getRetentionSettingsQueryKey, type LabelDefinition, + type RetentionSettingsOut, } from "@/lib/api-custom"; import type { AxiosError } from "axios"; import { toast } from "sonner"; @@ -590,6 +594,217 @@ function DefinitionEditorCard({ ); } +// ───────────────────────────────────────────────────────────────────────────── +// Retention Settings — admin-controlled DELETE windows for the daily sweep. +// Two knobs: a global retention applied to dq_validation_runs, dq_metrics, +// dq_profiling_results + the OLTP history tables; and a tighter +// quarantine-specific retention applied only to dq_quarantine_records (which +// holds full source row payloads + errors/warnings). The split exists so PII +// can age out faster than trend tables that the dashboards look back on. +// ───────────────────────────────────────────────────────────────────────────── + +function RetentionSettings() { + const { data, isLoading } = useRetentionSettings(); + const queryClient = useQueryClient(); + const saveMutation = useSaveRetentionSettings(); + const { data: role } = useCurrentUserRoleSuspense(); + const isAdmin = role?.data?.role === "admin"; + + const settings = data as RetentionSettingsOut | undefined; + const [global, setGlobal] = useState(""); + const [quarantine, setQuarantine] = useState(""); + const [hydrated, setHydrated] = useState(false); + + useEffect(() => { + if (settings && !hydrated) { + setGlobal(String(settings.retention_days)); + setQuarantine(String(settings.quarantine_retention_days)); + setHydrated(true); + } + }, [settings, hydrated]); + + const min = settings?.retention_days_min ?? 7; + const max = settings?.retention_days_max ?? 3650; + + const parsedGlobal = Number.parseInt(global, 10); + const parsedQuarantine = Number.parseInt(quarantine, 10); + + const validation = useMemo(() => { + const errors: string[] = []; + const check = (label: string, value: number) => { + if (Number.isNaN(value)) { + errors.push(`${label} must be a whole number of days.`); + return; + } + if (value < min) errors.push(`${label} must be at least ${min} days.`); + if (value > max) errors.push(`${label} must be at most ${max} days.`); + }; + check("Global retention", parsedGlobal); + check("Quarantine retention", parsedQuarantine); + return errors; + }, [parsedGlobal, parsedQuarantine, min, max]); + + const isDirty = useMemo(() => { + if (!settings) return false; + return ( + parsedGlobal !== settings.retention_days || + parsedQuarantine !== settings.quarantine_retention_days + ); + }, [settings, parsedGlobal, parsedQuarantine]); + + const handleSave = () => { + if (!settings || validation.length > 0) return; + const payload: { retention_days?: number; quarantine_retention_days?: number } = {}; + if (parsedGlobal !== settings.retention_days) payload.retention_days = parsedGlobal; + if (parsedQuarantine !== settings.quarantine_retention_days) { + payload.quarantine_retention_days = parsedQuarantine; + } + saveMutation.mutate( + { data: payload }, + { + onSuccess: (resp) => { + queryClient.invalidateQueries({ queryKey: getRetentionSettingsQueryKey() }); + setGlobal(String(resp.data.retention_days)); + setQuarantine(String(resp.data.quarantine_retention_days)); + toast.success("Retention settings saved."); + }, + onError: (err: unknown) => { + const axErr = err as AxiosError<{ detail?: string }>; + toast.error(axErr?.response?.data?.detail ?? "Failed to save retention settings."); + }, + }, + ); + }; + + const handleReset = () => { + if (!settings) return; + setGlobal(String(settings.retention_days)); + setQuarantine(String(settings.quarantine_retention_days)); + }; + + const resetToDefaults = () => { + if (!settings) return; + setGlobal(String(settings.retention_days_default)); + setQuarantine(String(settings.quarantine_retention_days_default)); + }; + + if (isLoading || !settings) return ; + + return ( + + + + + Data Retention + + + +

+ The scheduler runs a daily DELETE pass against the analytical tables. + Quarantine holds the full source + row payload (errors, warnings, and the row itself) so its window is kept + tighter than the trend tables by default. Both values are floored at{" "} + {min} days to protect against accidental data loss. +

+ +
+
+ + setGlobal(e.target.value)} + className="h-8" + /> +

+ Applies to dq_validation_runs, dq_profiling_results,{" "} + dq_metrics, and the OLTP history tables. +
+ Default: {settings.retention_days_default} days + {!settings.retention_days_set && " (not yet customised)"} +

+
+ +
+ + setQuarantine(e.target.value)} + className="h-8" + /> +

+ Applies only to dq_quarantine_records (the table that + stores per-row failures, including the source row payload). +
+ Default: {settings.quarantine_retention_days_default} days + {!settings.quarantine_retention_days_set && " (not yet customised)"} +

+
+
+ + {validation.length > 0 && ( +
+ {validation.map((msg, i) => ( +

+ + {msg} +

+ ))} +
+ )} + +
+ + + + {!isAdmin && ( + + Only admins can change retention. + + )} +
+
+
+ ); +} + function ConfigPage() { const { isAdmin } = usePermissions(); const navigate = useNavigate(); @@ -636,6 +851,13 @@ function ConfigPage() { + + }> + + + + + diff --git a/app/src/databricks_labs_dqx_app/ui/routes/_sidebar/runs-history.tsx b/app/src/databricks_labs_dqx_app/ui/routes/_sidebar/runs-history.tsx index 21fce4727..722a05dbc 100644 --- a/app/src/databricks_labs_dqx_app/ui/routes/_sidebar/runs-history.tsx +++ b/app/src/databricks_labs_dqx_app/ui/routes/_sidebar/runs-history.tsx @@ -165,7 +165,7 @@ function RunsHistoryPage() { ); } -type RunsSortKey = "table" | "type" | "status" | "requested_by" | "total" | "valid" | "invalid" | "run_date"; +type RunsSortKey = "table" | "type" | "status" | "requested_by" | "total" | "valid" | "errors" | "warnings" | "run_date"; function RunHistoryContent() { const { data: currentUser } = useCurrentUserSuspense(selector()); @@ -178,7 +178,11 @@ function RunHistoryContent() { const [tableSearch, setTableSearch] = useState(""); const [statusFilter, setStatusFilter] = useState("all"); const [runTypeFilter, setRunTypeFilter] = useState("all"); - const [invalidOnly, setInvalidOnly] = useState(false); + // ``failedOnly`` keeps a row if it has either errors or warnings — the + // user-facing "Has failures" toggle that replaced the old "Has invalid" + // filter. We still tolerate ``error_rows`` being ``null`` on pre-v5 rows + // by falling back to ``invalid_rows``. + const [failedOnly, setFailedOnly] = useState(false); const [myRunsOnly, setMyRunsOnly] = useState(false); const [labelFilter, setLabelFilter] = useState>(new Set()); const [expandedRunId, setExpandedRunId] = useState(null); @@ -285,7 +289,12 @@ function RunHistoryContent() { } if (statusFilter !== "all" && run.status !== statusFilter) return false; if (runTypeFilter !== "all" && (run.run_type ?? "dryrun") !== runTypeFilter) return false; - if (invalidOnly && !(run.invalid_rows != null && run.invalid_rows > 0)) return false; + if (failedOnly) { + const errors = run.error_rows ?? run.invalid_rows; + const warnings = run.warning_rows; + const hasFailures = (errors != null && errors > 0) || (warnings != null && warnings > 0); + if (!hasFailures) return false; + } if (myRunsOnly && currentUserEmail && run.requesting_user !== currentUserEmail) return false; if (labelFilter.size > 0) { // Match the label filter against any check captured on the run; an @@ -321,8 +330,11 @@ function RunHistoryContent() { case "valid": cmp = (a.valid_rows ?? 0) - (b.valid_rows ?? 0); break; - case "invalid": - cmp = (a.invalid_rows ?? 0) - (b.invalid_rows ?? 0); + case "errors": + cmp = (a.error_rows ?? a.invalid_rows ?? 0) - (b.error_rows ?? b.invalid_rows ?? 0); + break; + case "warnings": + cmp = (a.warning_rows ?? 0) - (b.warning_rows ?? 0); break; case "run_date": cmp = (a.created_at ?? "").localeCompare(b.created_at ?? ""); @@ -330,7 +342,7 @@ function RunHistoryContent() { } return cmp * dir; }); - }, [allRuns, catalogFilter, schemaFilter, tableFilter, tableSearch, statusFilter, runTypeFilter, invalidOnly, myRunsOnly, currentUserEmail, rSortKey, rSortDir, labelFilter]); + }, [allRuns, catalogFilter, schemaFilter, tableFilter, tableSearch, statusFilter, runTypeFilter, failedOnly, myRunsOnly, currentUserEmail, rSortKey, rSortDir, labelFilter]); // Distinct labels seen across all runs' checks. Drives the LabelFilter // dropdown content for this page. @@ -368,7 +380,7 @@ function RunHistoryContent() { setTableFilter("all"); }; - const hasActiveFilters = catalogFilter !== "all" || schemaFilter !== "all" || tableFilter !== "all" || tableSearch !== "" || statusFilter !== "all" || runTypeFilter !== "all" || invalidOnly || myRunsOnly || labelFilter.size > 0; + const hasActiveFilters = catalogFilter !== "all" || schemaFilter !== "all" || tableFilter !== "all" || tableSearch !== "" || statusFilter !== "all" || runTypeFilter !== "all" || failedOnly || myRunsOnly || labelFilter.size > 0; const PAGE_SIZE = 25; const [currentPage, setCurrentPage] = useState(1); @@ -378,7 +390,7 @@ function RunHistoryContent() { [runs, currentPage], ); - useEffect(() => { setCurrentPage(1); }, [catalogFilter, schemaFilter, tableFilter, tableSearch, statusFilter, runTypeFilter, invalidOnly, myRunsOnly, rSortKey, rSortDir]); + useEffect(() => { setCurrentPage(1); }, [catalogFilter, schemaFilter, tableFilter, tableSearch, statusFilter, runTypeFilter, failedOnly, myRunsOnly, rSortKey, rSortDir]); return (
@@ -474,13 +486,14 @@ function RunHistoryContent() {
+ +
e.stopPropagation()} /> - {rule.display_name || rule.table_fqn} + +
+ {rule.display_name || rule.table_fqn} + +
+ {rule.checks.length}
@@ -1618,17 +1743,23 @@ function RunEditorError({ resetErrorBoundary }: { resetErrorBoundary: () => void // =========================================================================== function scopeLabel(cfg: ScheduleConfig): string { - switch (cfg.scope_mode) { - case "all": return "All approved rules"; - case "catalog": return (cfg.scope_catalogs ?? []).join(", ") || "No catalogs"; - case "schema": return (cfg.scope_schemas ?? []).join(", ") || "No schemas"; - case "tables": { - const tables = cfg.scope_tables ?? []; - if (tables.length <= 2) return tables.join(", ") || "No tables"; - return `${tables[0]}, ${tables[1]} +${tables.length - 2} more`; + const base = (() => { + switch (cfg.scope_mode) { + case "all": return "All approved rules"; + case "catalog": return (cfg.scope_catalogs ?? []).join(", ") || "No catalogs"; + case "schema": return (cfg.scope_schemas ?? []).join(", ") || "No schemas"; + case "tables": { + const tables = cfg.scope_tables ?? []; + if (tables.length <= 2) return tables.join(", ") || "No tables"; + return `${tables[0]}, ${tables[1]} +${tables.length - 2} more`; + } + default: return "All"; } - default: return "All"; - } + })(); + const labels = cfg.scope_labels ?? []; + if (labels.length === 0) return base; + const labelStr = labels.map(({ key, value }) => formatLabel(key, value)).join(", "); + return `${base} · labels: ${labelStr}`; } function SchedulesListView({ isDeleting }: { isDeleting?: boolean }) { @@ -1739,7 +1870,7 @@ function SchedulesListView({ isDeleting }: { isDeleting?: boolean }) { - {sched.config.sample_size != null ? sched.config.sample_size.toLocaleString() : "All rows"} + {sched.config.sample_size ? sched.config.sample_size.toLocaleString() : "All rows"} {sched.updated_at ? formatDate(sched.updated_at) : sched.created_at ? formatDate(sched.created_at) : "—"} @@ -2177,6 +2308,26 @@ function FormEditor({ if (parsed.scope_catalogs) schedFromYaml.scope_catalogs = parsed.scope_catalogs; if (parsed.scope_schemas) schedFromYaml.scope_schemas = parsed.scope_schemas; if (parsed.scope_tables) schedFromYaml.scope_tables = parsed.scope_tables; + if (Array.isArray(parsed.scope_labels)) { + // Tolerate both the canonical ``[{key, value}]`` form and a + // ``["key=value"]`` shorthand that admins might hand-edit. + schedFromYaml.scope_labels = parsed.scope_labels + .map((entry: unknown) => { + if (entry && typeof entry === "object" && !Array.isArray(entry)) { + const e = entry as Record; + if (typeof e.key === "string") { + return { key: e.key, value: String(e.value ?? "") }; + } + } + if (typeof entry === "string") { + const idx = entry.indexOf("="); + if (idx < 0) return { key: entry, value: "" }; + return { key: entry.slice(0, idx), value: entry.slice(idx + 1) }; + } + return null; + }) + .filter((v: unknown): v is { key: string; value: string } => v !== null); + } if (parsed.sample_size != null) schedFromYaml.sample_size = parsed.sample_size; if (parsed.cron_expression) schedFromYaml.cron_expression = parsed.cron_expression; setScheduleConfig({ ...DEFAULT_SCHEDULE, ...schedFromYaml }); diff --git a/app/tasks/src/dqx_task_runner/runner.py b/app/tasks/src/dqx_task_runner/runner.py index 984db577f..01c7a4785 100644 --- a/app/tasks/src/dqx_task_runner/runner.py +++ b/app/tasks/src/dqx_task_runner/runner.py @@ -18,7 +18,7 @@ import sys import time import re -from datetime import datetime, timezone, date +from datetime import date, datetime from decimal import Decimal from typing import Any, cast @@ -267,7 +267,12 @@ def _run_profile( # Write result row. Profiling has no checks yet, but we still record a # null fingerprint slot so later pipeline stages that join on # rule_set_fingerprint don't have to special-case profile rows. - now = datetime.now(timezone.utc).isoformat() + # + # ``created_at`` is TIMESTAMP per the baseline schema — we materialise + # via ``current_timestamp()`` instead of an ISO string so cluster-key + # zone maps stay tight and downstream filters behave correctly. + from pyspark.sql import functions as F + result_row = spark.createDataFrame( [ ( @@ -283,7 +288,6 @@ def _run_profile( _json_dumps(rules) if rules else "[]", "SUCCESS", None, - now, None, ) ], @@ -291,10 +295,10 @@ def _run_profile( "run_id STRING, requesting_user STRING, source_table_fqn STRING, " "view_fqn STRING, sample_limit INT, rows_profiled INT, columns_profiled INT, " "duration_seconds DOUBLE, summary_json STRING, generated_rules_json STRING, " - "status STRING, error_message STRING, created_at STRING, " + "status STRING, error_message STRING, " "rule_set_fingerprint STRING" ), - ) + ).withColumn("created_at", F.current_timestamp()) result_row.writeTo(result_table).append() logger.info("Profile results written to %s (run_id=%s)", result_table, run_id) @@ -353,7 +357,11 @@ def _run_dryrun( ) engine = DQEngine(workspace_client=ws, spark=spark, observer=observer) - df = _read_view_with_retry(spark, view_fqn).limit(sample_size) + # ``sample_size = 0`` is the UI convention for "All rows" — skip + # ``.limit`` for it (passing 0 would short-circuit to an empty DataFrame). + df = _read_view_with_retry(spark, view_fqn) + if sample_size: + df = df.limit(sample_size) # ``apply_checks_by_metadata_and_split`` returns ``(valid, invalid)`` # when no observer is attached and ``(valid, invalid, observation)`` # when one is. We always construct the engine with ``observer=...`` @@ -368,21 +376,29 @@ def _run_dryrun( # observation. ``invalid_df.count()`` triggers metric collection. invalid_rows = invalid_df.count() - sample_invalid: list[dict[str, Any]] = [] - if invalid_rows > 0: - sample_rows = invalid_df.limit(10).collect() - sample_invalid = [row.asDict(recursive=True) for row in sample_rows] - - observed = observation.get if observation is not None else {} + # CRITICAL: snapshot the observation IMMEDIATELY after the first + # full-scan action and BEFORE any limit/partial action. On Spark + # Connect (Databricks), ``Observation.get`` returns a reference to a + # live dict that gets mutated by every subsequent action — including + # ``invalid_df.limit(10).collect()`` below, whose limit pushdown + # would otherwise overwrite ``input_row_count`` with ~10. We also + # ``dict(...)`` the result to detach our copy from the live dict so + # the later quarantine write (which re-fires the observer) cannot + # change what we pass to ``_persist_observed_metrics``. + observed: dict[str, Any] = dict(observation.get) if observation is not None else {} total_rows = int(observed.get("input_row_count", 0) or 0) valid_rows = int(observed.get("valid_row_count", 0) or 0) error_rows = int(observed.get("error_row_count", 0) or 0) warning_rows = int(observed.get("warning_row_count", 0) or 0) - - # Backwards-compatible error_summary derived from check_metrics. error_summary = _check_metrics_to_error_summary(observed.get("check_metrics")) - now = datetime.now(timezone.utc).isoformat() + sample_invalid: list[dict[str, Any]] = [] + if invalid_rows > 0: + sample_rows = invalid_df.limit(10).collect() + sample_invalid = [row.asDict(recursive=True) for row in sample_rows] + + from pyspark.sql import functions as F + result_row = spark.createDataFrame( [ ( @@ -395,11 +411,12 @@ def _run_dryrun( total_rows, valid_rows, invalid_rows, + error_rows, + warning_rows, _json_dumps(error_summary), _json_dumps(sample_invalid), "SUCCESS", None, - now, run_type, fingerprint, ) @@ -407,12 +424,12 @@ def _run_dryrun( schema=( "run_id STRING, requesting_user STRING, source_table_fqn STRING, " "view_fqn STRING, checks_json STRING, sample_size INT, " - "total_rows INT, valid_rows INT, invalid_rows INT, " + "total_rows INT, valid_rows INT, invalid_rows INT, error_rows INT, warning_rows INT, " "error_summary_json STRING, sample_invalid_json STRING, " - "status STRING, error_message STRING, created_at STRING, " + "status STRING, error_message STRING, " "run_type STRING, rule_set_fingerprint STRING" ), - ) + ).withColumn("created_at", F.current_timestamp()) result_row.writeTo(result_table).append() logger.info( "Dry-run results written to %s (run_id=%s, run_type=%s, errors=%d, warnings=%d)", @@ -485,9 +502,12 @@ def _run_dryrun_sql_check( sample_rows = violation_df.limit(10).collect() sample_invalid = [row.asDict(recursive=True) for row in sample_rows] - now = datetime.now(timezone.utc).isoformat() + from pyspark.sql import functions as F + if run_type == "dryrun" and sample_size == 0: run_type = "scheduled" + # SQL checks treat every violation as an error (no warning concept), so + # ``error_rows == invalid_rows`` and ``warning_rows == 0``. result_row = spark.createDataFrame( [ ( @@ -500,11 +520,12 @@ def _run_dryrun_sql_check( total_rows, valid_rows, invalid_rows, + invalid_rows, + 0, _json_dumps(error_summary), _json_dumps(sample_invalid), "SUCCESS", None, - now, run_type, fingerprint, ) @@ -512,12 +533,12 @@ def _run_dryrun_sql_check( schema=( "run_id STRING, requesting_user STRING, source_table_fqn STRING, " "view_fqn STRING, checks_json STRING, sample_size INT, " - "total_rows INT, valid_rows INT, invalid_rows INT, " + "total_rows INT, valid_rows INT, invalid_rows INT, error_rows INT, warning_rows INT, " "error_summary_json STRING, sample_invalid_json STRING, " - "status STRING, error_message STRING, created_at STRING, " + "status STRING, error_message STRING, " "run_type STRING, rule_set_fingerprint STRING" ), - ) + ).withColumn("created_at", F.current_timestamp()) result_row.writeTo(result_table).append() logger.info( "SQL-check %s results written to %s (run_id=%s, violations=%d)", run_type, result_table, run_id, invalid_rows @@ -582,7 +603,13 @@ def _write_quarantine_records( result_catalog: str, result_schema: str, ) -> None: - """Persist every invalid row to dq_quarantine_records.""" + """Persist every invalid row to dq_quarantine_records. + + ``row_data`` and ``errors`` columns are VARIANT in the baseline + schema. We materialise them via ``parse_json(to_json(...))`` so the + row payload becomes a typed JSON value (queryable with + ``variant_get``) rather than an opaque STRING. + """ from pyspark.sql import functions as F invalid_count = invalid_df.count() @@ -591,18 +618,34 @@ def _write_quarantine_records( return quarantine_table = f"{result_catalog}.{result_schema}.dq_quarantine_records" - now = datetime.now(timezone.utc).isoformat() data_cols = [c for c in invalid_df.columns if c not in ("_warnings", "_errors", "_rule_name")] + # ``_warnings`` may be absent if every check is error-level; default to a + # JSON null so the column stays well-typed in the VARIANT. + warnings_expr = ( + F.parse_json(F.to_json(F.col("_warnings"))) + if "_warnings" in invalid_df.columns + else F.parse_json(F.lit("null")) + ) quarantine_df = ( invalid_df.withColumn("quarantine_id", F.expr("uuid()")) .withColumn("run_id", F.lit(run_id)) .withColumn("source_table_fqn", F.lit(source_table_fqn)) .withColumn("requesting_user", F.lit(requesting_user)) - .withColumn("row_data", F.to_json(F.struct(*data_cols))) - .withColumn("errors", F.to_json(F.col("_errors"))) - .withColumn("created_at", F.lit(now)) - .select("quarantine_id", "run_id", "source_table_fqn", "requesting_user", "row_data", "errors", "created_at") + .withColumn("row_data", F.parse_json(F.to_json(F.struct(*data_cols)))) + .withColumn("errors", F.parse_json(F.to_json(F.col("_errors")))) + .withColumn("warnings", warnings_expr) + .withColumn("created_at", F.current_timestamp()) + .select( + "quarantine_id", + "run_id", + "source_table_fqn", + "requesting_user", + "row_data", + "errors", + "warnings", + "created_at", + ) ) quarantine_df.writeTo(quarantine_table).append() logger.info("Wrote %d quarantine rows to %s (run_id=%s)", invalid_count, quarantine_table, run_id) @@ -650,23 +693,28 @@ def _write_sql_quarantine_records( capped_df = violation_df.limit(persisted_target) if persisted_target < invalid_count else violation_df quarantine_table = f"{result_catalog}.{result_schema}.dq_quarantine_records" - now = datetime.now(timezone.utc).isoformat() # Every column on the violation view is part of the row payload — # there are no DQX-internal columns to strip (the ``not in`` filter # is just defensive in case a check author re-uses those names). + # ``row_data``/``errors`` are VARIANT — wrap with parse_json so the + # values land as typed JSON rather than opaque strings. data_cols = [c for c in capped_df.columns if c not in ("_warnings", "_errors", "_rule_name")] - row_data_expr = F.to_json(F.struct(*data_cols)) if data_cols else F.lit("{}") - errors_json = _json_dumps({check_name: "SQL check violation"}) + row_data_expr = F.parse_json(F.to_json(F.struct(*data_cols))) if data_cols else F.parse_json(F.lit("{}")) + errors_expr = F.parse_json(F.lit(_json_dumps({check_name: "SQL check violation"}))) + # SQL checks have no warning-level distinction — every violation is + # treated as an error. Persist ``warnings`` as JSON null so the + # column shape matches row-level quarantine writes. quarantine_df = ( capped_df.withColumn("quarantine_id", F.expr("uuid()")) .withColumn("run_id", F.lit(run_id)) .withColumn("source_table_fqn", F.lit(source_table_fqn)) .withColumn("requesting_user", F.lit(requesting_user)) .withColumn("row_data", row_data_expr) - .withColumn("errors", F.lit(errors_json)) - .withColumn("created_at", F.lit(now)) + .withColumn("errors", errors_expr) + .withColumn("warnings", F.parse_json(F.lit("null"))) + .withColumn("created_at", F.current_timestamp()) .select( "quarantine_id", "run_id", @@ -674,6 +722,7 @@ def _write_sql_quarantine_records( "requesting_user", "row_data", "errors", + "warnings", "created_at", ) ) @@ -923,14 +972,19 @@ def _run_scheduled( invalid_rows = invalid_df.count() # triggers the observation - observed = observation.get if observation is not None else {} + # Defensive snapshot — on Spark Connect, ``Observation.get`` returns + # a reference to a live dict that subsequent actions (the quarantine + # write below) re-fire and can mutate. ``dict(...)`` detaches our + # copy. + observed: dict[str, Any] = dict(observation.get) if observation is not None else {} total_rows = int(observed.get("input_row_count", 0) or 0) valid_rows = int(observed.get("valid_row_count", 0) or 0) error_rows = int(observed.get("error_row_count", 0) or 0) warning_rows = int(observed.get("warning_row_count", 0) or 0) error_summary = _check_metrics_to_error_summary(observed.get("check_metrics")) - now = datetime.now(timezone.utc).isoformat() + from pyspark.sql import functions as F + result_table = f"{result_catalog}.{result_schema}.dq_validation_runs" result_row = spark.createDataFrame( [ @@ -944,11 +998,12 @@ def _run_scheduled( total_rows, valid_rows, invalid_rows, + error_rows, + warning_rows, _json_dumps(error_summary), None, "SUCCESS", None, - now, "scheduled", fingerprint, ) @@ -956,12 +1011,12 @@ def _run_scheduled( schema=( "run_id STRING, requesting_user STRING, source_table_fqn STRING, " "view_fqn STRING, checks_json STRING, sample_size INT, " - "total_rows INT, valid_rows INT, invalid_rows INT, " + "total_rows INT, valid_rows INT, invalid_rows INT, error_rows INT, warning_rows INT, " "error_summary_json STRING, sample_invalid_json STRING, " - "status STRING, error_message STRING, created_at STRING, " + "status STRING, error_message STRING, " "run_type STRING, rule_set_fingerprint STRING" ), - ) + ).withColumn("created_at", F.current_timestamp()) result_row.writeTo(result_table).append() logger.info( "Scheduled run results written to %s (run_id=%s, errors=%d, warnings=%d)", @@ -1009,7 +1064,8 @@ def _write_error( skip_history: bool = False, ) -> None: """Write a FAILED status row so the app can report the error.""" - now = datetime.now(timezone.utc).isoformat() + from pyspark.sql import functions as F + checks_str = _json_dumps(checks) if checks else None fingerprint = _compute_fingerprint(checks or []) if task_type == "profile": @@ -1029,7 +1085,6 @@ def _write_error( None, "FAILED", error_message, - now, fingerprint, ) ], @@ -1037,10 +1092,10 @@ def _write_error( "run_id STRING, requesting_user STRING, source_table_fqn STRING, " "view_fqn STRING, sample_limit INT, rows_profiled INT, columns_profiled INT, " "duration_seconds DOUBLE, summary_json STRING, generated_rules_json STRING, " - "status STRING, error_message STRING, created_at STRING, " + "status STRING, error_message STRING, " "rule_set_fingerprint STRING" ), - ) + ).withColumn("created_at", F.current_timestamp()) else: table = f"{result_catalog}.{result_schema}.dq_validation_runs" if skip_history: @@ -1061,11 +1116,12 @@ def _write_error( None, None, None, + None, # error_rows + None, # warning_rows None, None, "FAILED", error_message, - now, error_run_type, fingerprint, ) @@ -1073,12 +1129,12 @@ def _write_error( schema=( "run_id STRING, requesting_user STRING, source_table_fqn STRING, " "view_fqn STRING, checks_json STRING, sample_size INT, " - "total_rows INT, valid_rows INT, invalid_rows INT, " + "total_rows INT, valid_rows INT, invalid_rows INT, error_rows INT, warning_rows INT, " "error_summary_json STRING, sample_invalid_json STRING, " - "status STRING, error_message STRING, created_at STRING, " + "status STRING, error_message STRING, " "run_type STRING, rule_set_fingerprint STRING" ), - ) + ).withColumn("created_at", F.current_timestamp()) row.writeTo(table).append() logger.info("Error result written to %s (run_id=%s)", table, run_id) diff --git a/app/tests/test_custom_metrics.py b/app/tests/test_custom_metrics.py index 1271c8e8a..a296047c4 100644 --- a/app/tests/test_custom_metrics.py +++ b/app/tests/test_custom_metrics.py @@ -67,17 +67,22 @@ def test_save_normalises_and_persists(self, svc): s, sql = svc result = s.save_custom_metrics([" sum(x) as total ", None, "", "count(*) as n"]) assert result == ["sum(x) as total", "count(*) as n"] - # ``save_setting`` performs a MERGE — confirm the JSON payload - # matches what we expect. - assert sql.execute.called - sql_arg = sql.execute.call_args.args[0] - assert "sum(x) as total" in sql_arg - assert "count(*) as n" in sql_arg + # ``save_setting`` now goes through SqlExecutor.upsert(); the + # JSON payload lands on the value-cols dict. + assert sql.upsert.called + kwargs = sql.upsert.call_args.kwargs + # Either positional (table, key_cols, value_cols) or keyword form. + if "value_cols" in kwargs: + payload = kwargs["value_cols"]["setting_value"] + else: + payload = sql.upsert.call_args.args[2]["setting_value"] + assert "sum(x) as total" in payload + assert "count(*) as n" in payload def test_save_empty_list_writes_empty_json(self, svc): s, sql = svc assert s.save_custom_metrics([]) == [] - assert sql.execute.called + assert sql.upsert.called # --------------------------------------------------------------------------- @@ -155,8 +160,13 @@ def scheduler(self): # __init__ wires SqlExecutor for itself; we bypass it and inject # the minimal attributes ``_load_custom_metrics`` actually touches. + # The OLTP executor (``_oltp_sql``) handles ``dq_app_settings``; + # for legacy mode it's the same instance as ``_sql`` so existing + # tests that only set ``_sql`` keep working when both names point + # at the same mock. svc = SchedulerService.__new__(SchedulerService) svc._sql = MagicMock() + svc._oltp_sql = svc._sql svc._settings_table = "dqx.public.dq_app_settings" return svc diff --git a/app/tests/test_retention.py b/app/tests/test_retention.py new file mode 100644 index 000000000..b8543d57a --- /dev/null +++ b/app/tests/test_retention.py @@ -0,0 +1,247 @@ +"""Tests for the retention sweep + admin settings surface. + +Three layers exercised: +* ``AppSettingsService.get_retention_days`` / + ``get_quarantine_retention_days`` and their setters — round-trip the + integer-day setting through ``dq_app_settings``. +* ``backend.routes.v1.config._validate_retention_days`` — request-side + bounds checking for the admin PUT endpoint. +* ``SchedulerService._resolve_retention_days`` and + ``_resolve_quarantine_retention_days`` — fall-back behaviour when the + setting is missing / unparseable / read fails, plus the safety floor. + +The retention defaults differ on purpose — global is 90 days (trend +dashboards) and quarantine is 30 days (tighter PII window) — so the +tests exercise both resolvers separately. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest + + +# --------------------------------------------------------------------------- +# AppSettingsService — storage layer +# --------------------------------------------------------------------------- + + +class TestAppSettingsRetention: + @pytest.fixture + def svc(self, sql_executor_mock): + from databricks_labs_dqx_app.backend.services.app_settings_service import AppSettingsService + + return AppSettingsService(sql_executor_mock), sql_executor_mock + + def test_get_returns_none_when_unset(self, svc): + s, sql = svc + sql.query.return_value = [] + assert s.get_retention_days() is None + assert s.get_quarantine_retention_days() is None + + def test_get_returns_none_when_blank(self, svc): + s, sql = svc + sql.query.return_value = [(None,)] + assert s.get_retention_days() is None + sql.query.return_value = [("",)] + assert s.get_retention_days() is None + + def test_get_parses_integer(self, svc): + s, sql = svc + sql.query.return_value = [("45",)] + assert s.get_retention_days() == 45 + sql.query.return_value = [("14",)] + assert s.get_quarantine_retention_days() == 14 + + def test_get_returns_none_on_garbage_value(self, svc): + s, sql = svc + sql.query.return_value = [("not-a-number",)] + assert s.get_retention_days() is None + + def test_save_persists_integer_string(self, svc): + s, sql = svc + assert s.save_retention_days(60) == 60 + assert sql.upsert.called + kwargs = sql.upsert.call_args.kwargs + if "value_cols" in kwargs: + payload = kwargs["value_cols"]["setting_value"] + else: + payload = sql.upsert.call_args.args[2]["setting_value"] + assert payload == "60" + + def test_save_quarantine_persists_integer_string(self, svc): + s, sql = svc + assert s.save_quarantine_retention_days(21) == 21 + # The most recent upsert should carry the new key. + kwargs = sql.upsert.call_args.kwargs + if "key_cols" in kwargs: + keys = kwargs["key_cols"] + else: + keys = sql.upsert.call_args.args[1] + assert keys["setting_key"] == "quarantine_retention_days" + + +# --------------------------------------------------------------------------- +# Route-level validator +# --------------------------------------------------------------------------- + + +class TestRetentionDaysValidator: + @pytest.fixture + def validate(self): + from databricks_labs_dqx_app.backend.routes.v1.config import _validate_retention_days + + return _validate_retention_days + + def test_min_floor_enforced(self, validate): + from fastapi import HTTPException + + with pytest.raises(HTTPException) as exc: + validate(1, field="retention_days") + assert exc.value.status_code == 400 + assert "at least" in exc.value.detail.lower() + + def test_max_ceiling_enforced(self, validate): + from fastapi import HTTPException + + with pytest.raises(HTTPException) as exc: + validate(10_000, field="retention_days") + assert exc.value.status_code == 400 + assert "at most" in exc.value.detail.lower() + + def test_valid_value_passes_through(self, validate): + assert validate(30, field="quarantine_retention_days") == 30 + assert validate(90, field="retention_days") == 90 + + def test_field_label_appears_in_message(self, validate): + from fastapi import HTTPException + + with pytest.raises(HTTPException) as exc: + validate(2, field="quarantine_retention_days") + assert "quarantine_retention_days" in exc.value.detail + + +# --------------------------------------------------------------------------- +# Scheduler service — both resolvers honour the floor and fall back safely +# --------------------------------------------------------------------------- + + +class TestSchedulerResolveRetention: + """Scheduler must never crash on a missing / corrupt retention setting.""" + + @pytest.fixture + def scheduler(self): + from databricks_labs_dqx_app.backend.services.scheduler_service import SchedulerService + + # Bypass __init__ — only the attributes the resolver touches. + svc = SchedulerService.__new__(SchedulerService) + svc._oltp_sql = MagicMock() + svc._sql = svc._oltp_sql + svc._settings_table = "dqx.public.dq_app_settings" + return svc + + def test_global_falls_back_to_default_when_unset(self, scheduler): + from databricks_labs_dqx_app.backend.services.scheduler_service import _RETENTION_DAYS_DEFAULT + + scheduler._oltp_sql.query.return_value = [] + assert scheduler._resolve_retention_days() == _RETENTION_DAYS_DEFAULT + + def test_quarantine_falls_back_to_its_own_default(self, scheduler): + from databricks_labs_dqx_app.backend.services.scheduler_service import ( + _QUARANTINE_RETENTION_DAYS_DEFAULT, + ) + + scheduler._oltp_sql.query.return_value = [] + assert scheduler._resolve_quarantine_retention_days() == _QUARANTINE_RETENTION_DAYS_DEFAULT + + def test_quarantine_default_is_tighter_than_global(self): + """Sanity check on the constants: quarantine (PII) ages out faster.""" + from databricks_labs_dqx_app.backend.services.scheduler_service import ( + _QUARANTINE_RETENTION_DAYS_DEFAULT, + _RETENTION_DAYS_DEFAULT, + ) + + assert _QUARANTINE_RETENTION_DAYS_DEFAULT < _RETENTION_DAYS_DEFAULT + + def test_returns_persisted_value(self, scheduler): + scheduler._oltp_sql.query.return_value = [("45",)] + assert scheduler._resolve_retention_days() == 45 + + def test_floor_protects_against_too_small_value(self, scheduler): + from databricks_labs_dqx_app.backend.services.scheduler_service import _RETENTION_DAYS_MIN + + scheduler._oltp_sql.query.return_value = [("1",)] + # Stored value of 1 must be raised to the floor — never wipe inside the safety window. + assert scheduler._resolve_retention_days() == _RETENTION_DAYS_MIN + assert scheduler._resolve_quarantine_retention_days() == _RETENTION_DAYS_MIN + + def test_swallows_query_exception(self, scheduler): + from databricks_labs_dqx_app.backend.services.scheduler_service import ( + _QUARANTINE_RETENTION_DAYS_DEFAULT, + _RETENTION_DAYS_DEFAULT, + ) + + scheduler._oltp_sql.query.side_effect = RuntimeError("warehouse offline") + # Must not propagate — sweep retries on the next tick. + assert scheduler._resolve_retention_days() == _RETENTION_DAYS_DEFAULT + assert scheduler._resolve_quarantine_retention_days() == _QUARANTINE_RETENTION_DAYS_DEFAULT + + def test_unparseable_value_returns_default(self, scheduler): + from databricks_labs_dqx_app.backend.services.scheduler_service import _RETENTION_DAYS_DEFAULT + + scheduler._oltp_sql.query.return_value = [("not-a-number",)] + assert scheduler._resolve_retention_days() == _RETENTION_DAYS_DEFAULT + + +# --------------------------------------------------------------------------- +# Scheduler service — _run_retention applies the right cutoff per table +# --------------------------------------------------------------------------- + + +class TestRunRetentionUsesQuarantineCutoff: + """The Delta sweep must use the quarantine-specific cutoff for that one table.""" + + @pytest.fixture + def scheduler(self): + from databricks_labs_dqx_app.backend.services.scheduler_service import SchedulerService + + svc = SchedulerService.__new__(SchedulerService) + svc._sql = MagicMock() + svc._oltp_sql = MagicMock() + # ``_oltp_sql`` is also the one resolvers query for the + # configured day values; stub a missing row so both resolvers + # fall back to their respective defaults. + svc._oltp_sql.query.return_value = [] + # ``dialect`` lookup happens in the OLTP loop. + svc._oltp_sql.dialect = "delta" + svc._settings_table = "dqx.public.dq_app_settings" + svc._catalog = "dqx" + svc._schema = "public" + # ``_qualify_oltp`` is invoked for the OLTP DELETE loop. + svc._qualify_oltp = lambda t: f"dqx.public.{t}" # type: ignore[method-assign] + return svc + + def test_quarantine_table_uses_its_own_default(self, scheduler): + from databricks_labs_dqx_app.backend.services.scheduler_service import ( + _QUARANTINE_RETENTION_DAYS_DEFAULT, + _RETENTION_DAYS_DEFAULT, + ) + + scheduler._run_retention() + + # Collect every DELETE statement that actually fired. + delta_stmts = [c.args[0] for c in scheduler._sql.execute.call_args_list] + quarantine_stmts = [s for s in delta_stmts if "dq_quarantine_records" in s] + non_quarantine_delta = [s for s in delta_stmts if "dq_quarantine_records" not in s] + + assert quarantine_stmts, "Expected at least one DELETE for the quarantine table" + # Every quarantine DELETE should use the *quarantine* default, + # not the global one. + for stmt in quarantine_stmts: + assert f"INTERVAL {_QUARANTINE_RETENTION_DAYS_DEFAULT} DAY" in stmt + assert f"INTERVAL {_RETENTION_DAYS_DEFAULT} DAY" not in stmt + + # And the other Delta tables should still use the global default. + for stmt in non_quarantine_delta: + assert f"INTERVAL {_RETENTION_DAYS_DEFAULT} DAY" in stmt diff --git a/app/tests/test_rules_catalog_service.py b/app/tests/test_rules_catalog_service.py index e8dbb3b90..398375562 100644 --- a/app/tests/test_rules_catalog_service.py +++ b/app/tests/test_rules_catalog_service.py @@ -220,10 +220,11 @@ def test_returns_empty_when_table_has_no_rules(self, svc, sql_executor_mock): def test_detects_existing_match(self, svc, sql_executor_mock): existing_check = {"check": {"function": "is_not_null", "arguments": {"column": "x"}}} # _row_to_entry expects 10 columns from _SELECT_COLS — emulate that. + # Column 1 is to_json(`check`): a bare check object, not an array. sql_executor_mock.query.return_value = [ ( "a.b.c", - json.dumps([existing_check]), + json.dumps(existing_check), 1, "draft", "u@x", @@ -243,7 +244,7 @@ def test_rejected_rules_do_not_count(self, svc, sql_executor_mock): sql_executor_mock.query.return_value = [ ( "a.b.c", - json.dumps([existing_check]), + json.dumps(existing_check), 1, "rejected", # rejected → not a duplicate "u@x", @@ -261,7 +262,7 @@ def test_exclude_rule_id_skips_self(self, svc, sql_executor_mock): sql_executor_mock.query.return_value = [ ( "a.b.c", - json.dumps([existing_check]), + json.dumps(existing_check), 1, "approved", "u@x", diff --git a/app/uv.lock b/app/uv.lock index b4f399763..553feb804 100644 --- a/app/uv.lock +++ b/app/uv.lock @@ -9,7 +9,7 @@ resolution-markers = [ ] [options] -exclude-newer = "2026-04-27T20:27:01.708704Z" +exclude-newer = "2026-05-04T20:36:45.499629Z" exclude-newer-span = "P7D" [options.exclude-newer-package] @@ -900,6 +900,7 @@ dependencies = [ { name = "databricks-sql-connector", extra = ["pyarrow"] }, { name = "fastapi" }, { name = "openpyxl" }, + { name = "psycopg", extra = ["binary", "pool"] }, { name = "pydantic-settings" }, { name = "uvicorn" }, ] @@ -924,6 +925,7 @@ requires-dist = [ { name = "databricks-sql-connector", extras = ["pyarrow"], specifier = "==4.2.5" }, { name = "fastapi", specifier = "~=0.119" }, { name = "openpyxl", specifier = ">=3.1" }, + { name = "psycopg", extras = ["binary", "pool"], specifier = ">=3.2" }, { name = "pydantic-settings", specifier = "~=2.11" }, { name = "uvicorn", specifier = "~=0.37" }, ] @@ -3270,6 +3272,90 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c9/ad/33b2ccec09bf96c2b2ef3f9a6f66baac8253d7565d8839e024a6b905d45d/psutil-7.1.3-cp37-abi3-win_arm64.whl", hash = "sha256:bd0d69cee829226a761e92f28140bec9a5ee9d5b4fb4b0cc589068dbfff559b1", size = 244608, upload-time = "2025-11-02T12:26:36.136Z" }, ] +[[package]] +name = "psycopg" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d3/b6/379d0a960f8f435ec78720462fd94c4863e7a31237cf81bf76d0af5883bf/psycopg-3.3.3.tar.gz", hash = "sha256:5e9a47458b3c1583326513b2556a2a9473a1001a56c9efe9e587245b43148dd9", size = 165624, upload-time = "2026-02-18T16:52:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/5b/181e2e3becb7672b502f0ed7f16ed7352aca7c109cfb94cf3878a9186db9/psycopg-3.3.3-py3-none-any.whl", hash = "sha256:f96525a72bcfade6584ab17e89de415ff360748c766f0106959144dcbb38c698", size = 212768, upload-time = "2026-02-18T16:46:27.365Z" }, +] + +[package.optional-dependencies] +binary = [ + { name = "psycopg-binary", marker = "implementation_name != 'pypy'" }, +] +pool = [ + { name = "psycopg-pool" }, +] + +[[package]] +name = "psycopg-binary" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/c0/b389119dd754483d316805260f3e73cdcad97925839107cc7a296f6132b1/psycopg_binary-3.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a89bb9ee11177b2995d87186b1d9fa892d8ea725e85eab28c6525e4cc14ee048", size = 4609740, upload-time = "2026-02-18T16:47:51.093Z" }, + { url = "https://files.pythonhosted.org/packages/cf/e3/9976eef20f61840285174d360da4c820a311ab39d6b82fa09fbb545be825/psycopg_binary-3.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f7d0cf072c6fbac3795b08c98ef9ea013f11db609659dcfc6b1f6cc31f9e181", size = 4676837, upload-time = "2026-02-18T16:47:55.523Z" }, + { url = "https://files.pythonhosted.org/packages/9f/f2/d28ba2f7404fd7f68d41e8a11df86313bd646258244cb12a8dd83b868a97/psycopg_binary-3.3.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:90eecd93073922f085967f3ed3a98ba8c325cbbc8c1a204e300282abd2369e13", size = 5497070, upload-time = "2026-02-18T16:47:59.929Z" }, + { url = "https://files.pythonhosted.org/packages/de/2f/6c5c54b815edeb30a281cfcea96dc93b3bb6be939aea022f00cab7aa1420/psycopg_binary-3.3.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dac7ee2f88b4d7bb12837989ca354c38d400eeb21bce3b73dac02622f0a3c8d6", size = 5172410, upload-time = "2026-02-18T16:48:05.665Z" }, + { url = "https://files.pythonhosted.org/packages/51/75/8206c7008b57de03c1ada46bd3110cc3743f3fd9ed52031c4601401d766d/psycopg_binary-3.3.3-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b62cf8784eb6d35beaee1056d54caf94ec6ecf2b7552395e305518ab61eb8fd2", size = 6763408, upload-time = "2026-02-18T16:48:13.541Z" }, + { url = "https://files.pythonhosted.org/packages/d4/5a/ea1641a1e6c8c8b3454b0fcb43c3045133a8b703e6e824fae134088e63bd/psycopg_binary-3.3.3-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a39f34c9b18e8f6794cca17bfbcd64572ca2482318db644268049f8c738f35a6", size = 5006255, upload-time = "2026-02-18T16:48:22.176Z" }, + { url = "https://files.pythonhosted.org/packages/aa/fb/538df099bf55ae1637d52d7ccb6b9620b535a40f4c733897ac2b7bb9e14c/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:883d68d48ca9ff3cb3d10c5fdebea02c79b48eecacdddbf7cce6e7cdbdc216b8", size = 4532694, upload-time = "2026-02-18T16:48:27.338Z" }, + { url = "https://files.pythonhosted.org/packages/a1/d1/00780c0e187ea3c13dfc53bd7060654b2232cd30df562aac91a5f1c545ac/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:cab7bc3d288d37a80aa8c0820033250c95e40b1c2b5c57cf59827b19c2a8b69d", size = 4222833, upload-time = "2026-02-18T16:48:31.221Z" }, + { url = "https://files.pythonhosted.org/packages/7a/34/a07f1ff713c51d64dc9f19f2c32be80299a2055d5d109d5853662b922cb4/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:56c767007ca959ca32f796b42379fc7e1ae2ed085d29f20b05b3fc394f3715cc", size = 3952818, upload-time = "2026-02-18T16:48:35.869Z" }, + { url = "https://files.pythonhosted.org/packages/d3/67/d33f268a7759b4445f3c9b5a181039b01af8c8263c865c1be7a6444d4749/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:da2f331a01af232259a21573a01338530c6016dcfad74626c01330535bcd8628", size = 4258061, upload-time = "2026-02-18T16:48:41.365Z" }, + { url = "https://files.pythonhosted.org/packages/b4/3b/0d8d2c5e8e29ccc07d28c8af38445d9d9abcd238d590186cac82ee71fc84/psycopg_binary-3.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:19f93235ece6dbfc4036b5e4f6d8b13f0b8f2b3eeb8b0bd2936d406991bcdd40", size = 3558915, upload-time = "2026-02-18T16:48:46.679Z" }, + { url = "https://files.pythonhosted.org/packages/90/15/021be5c0cbc5b7c1ab46e91cc3434eb42569f79a0592e67b8d25e66d844d/psycopg_binary-3.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6698dbab5bcef8fdb570fc9d35fd9ac52041771bfcfe6fd0fc5f5c4e36f1e99d", size = 4591170, upload-time = "2026-02-18T16:48:55.594Z" }, + { url = "https://files.pythonhosted.org/packages/f1/54/a60211c346c9a2f8c6b272b5f2bbe21f6e11800ce7f61e99ba75cf8b63e1/psycopg_binary-3.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:329ff393441e75f10b673ae99ab45276887993d49e65f141da20d915c05aafd8", size = 4670009, upload-time = "2026-02-18T16:49:03.608Z" }, + { url = "https://files.pythonhosted.org/packages/c1/53/ac7c18671347c553362aadbf65f92786eef9540676ca24114cc02f5be405/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:eb072949b8ebf4082ae24289a2b0fd724da9adc8f22743409d6fd718ddb379df", size = 5469735, upload-time = "2026-02-18T16:49:10.128Z" }, + { url = "https://files.pythonhosted.org/packages/7f/c3/4f4e040902b82a344eff1c736cde2f2720f127fe939c7e7565706f96dd44/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:263a24f39f26e19ed7fc982d7859a36f17841b05bebad3eb47bb9cd2dd785351", size = 5152919, upload-time = "2026-02-18T16:49:16.335Z" }, + { url = "https://files.pythonhosted.org/packages/0c/e7/d929679c6a5c212bcf738806c7c89f5b3d0919f2e1685a0e08d6ff877945/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5152d50798c2fa5bd9b68ec68eb68a1b71b95126c1d70adaa1a08cd5eefdc23d", size = 6738785, upload-time = "2026-02-18T16:49:22.687Z" }, + { url = "https://files.pythonhosted.org/packages/69/b0/09703aeb69a9443d232d7b5318d58742e8ca51ff79f90ffe6b88f1db45e7/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9d6a1e56dd267848edb824dbeb08cf5bac649e02ee0b03ba883ba3f4f0bd54f2", size = 4979008, upload-time = "2026-02-18T16:49:27.313Z" }, + { url = "https://files.pythonhosted.org/packages/cc/a6/e662558b793c6e13a7473b970fee327d635270e41eded3090ef14045a6a5/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73eaaf4bb04709f545606c1db2f65f4000e8a04cdbf3e00d165a23004692093e", size = 4508255, upload-time = "2026-02-18T16:49:31.575Z" }, + { url = "https://files.pythonhosted.org/packages/5f/7f/0f8b2e1d5e0093921b6f324a948a5c740c1447fbb45e97acaf50241d0f39/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:162e5675efb4704192411eaf8e00d07f7960b679cd3306e7efb120bb8d9456cc", size = 4189166, upload-time = "2026-02-18T16:49:35.801Z" }, + { url = "https://files.pythonhosted.org/packages/92/ec/ce2e91c33bc8d10b00c87e2f6b0fb570641a6a60042d6a9ae35658a3a797/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:fab6b5e37715885c69f5d091f6ff229be71e235f272ebaa35158d5a46fd548a0", size = 3924544, upload-time = "2026-02-18T16:49:41.129Z" }, + { url = "https://files.pythonhosted.org/packages/c5/2f/7718141485f73a924205af60041c392938852aa447a94c8cbd222ff389a1/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a4aab31bd6d1057f287c96c0effca3a25584eb9cc702f282ecb96ded7814e830", size = 4235297, upload-time = "2026-02-18T16:49:46.726Z" }, + { url = "https://files.pythonhosted.org/packages/57/f9/1add717e2643a003bbde31b1b220172e64fbc0cb09f06429820c9173f7fc/psycopg_binary-3.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:59aa31fe11a0e1d1bcc2ce37ed35fe2ac84cd65bb9036d049b1a1c39064d0f14", size = 3547659, upload-time = "2026-02-18T16:49:52.999Z" }, + { url = "https://files.pythonhosted.org/packages/03/0a/cac9fdf1df16a269ba0e5f0f06cac61f826c94cadb39df028cdfe19d3a33/psycopg_binary-3.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05f32239aec25c5fb15f7948cffdc2dc0dac098e48b80a140e4ba32b572a2e7d", size = 4590414, upload-time = "2026-02-18T16:50:01.441Z" }, + { url = "https://files.pythonhosted.org/packages/9c/c0/d8f8508fbf440edbc0099b1abff33003cd80c9e66eb3a1e78834e3fb4fb9/psycopg_binary-3.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7c84f9d214f2d1de2fafebc17fa68ac3f6561a59e291553dfc45ad299f4898c1", size = 4669021, upload-time = "2026-02-18T16:50:08.803Z" }, + { url = "https://files.pythonhosted.org/packages/04/05/097016b77e343b4568feddf12c72171fc513acef9a4214d21b9478569068/psycopg_binary-3.3.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:e77957d2ba17cada11be09a5066d93026cdb61ada7c8893101d7fe1c6e1f3925", size = 5467453, upload-time = "2026-02-18T16:50:14.985Z" }, + { url = "https://files.pythonhosted.org/packages/91/23/73244e5feb55b5ca109cede6e97f32ef45189f0fdac4c80d75c99862729d/psycopg_binary-3.3.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:42961609ac07c232a427da7c87a468d3c82fee6762c220f38e37cfdacb2b178d", size = 5151135, upload-time = "2026-02-18T16:50:24.82Z" }, + { url = "https://files.pythonhosted.org/packages/11/49/5309473b9803b207682095201d8708bbc7842ddf3f192488a69204e36455/psycopg_binary-3.3.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae07a3114313dd91fce686cab2f4c44af094398519af0e0f854bc707e1aeedf1", size = 6737315, upload-time = "2026-02-18T16:50:35.106Z" }, + { url = "https://files.pythonhosted.org/packages/d4/5d/03abe74ef34d460b33c4d9662bf6ec1dd38888324323c1a1752133c10377/psycopg_binary-3.3.3-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d257c58d7b36a621dcce1d01476ad8b60f12d80eb1406aee4cf796f88b2ae482", size = 4979783, upload-time = "2026-02-18T16:50:42.067Z" }, + { url = "https://files.pythonhosted.org/packages/f0/6c/3fbf8e604e15f2f3752900434046c00c90bb8764305a1b81112bff30ba24/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:07c7211f9327d522c9c47560cae00a4ecf6687f4e02d779d035dd3177b41cb12", size = 4509023, upload-time = "2026-02-18T16:50:50.116Z" }, + { url = "https://files.pythonhosted.org/packages/9c/6b/1a06b43b7c7af756c80b67eac8bfaa51d77e68635a8a8d246e4f0bb7604a/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8e7e9eca9b363dbedeceeadd8be97149d2499081f3c52d141d7cd1f395a91f83", size = 4185874, upload-time = "2026-02-18T16:50:55.97Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d3/bf49e3dcaadba510170c8d111e5e69e5ae3f981c1554c5bb71c75ce354bb/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:cb85b1d5702877c16f28d7b92ba030c1f49ebcc9b87d03d8c10bf45a2f1c7508", size = 3925668, upload-time = "2026-02-18T16:51:03.299Z" }, + { url = "https://files.pythonhosted.org/packages/f8/92/0aac830ed6a944fe334404e1687a074e4215630725753f0e3e9a9a595b62/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4d4606c84d04b80f9138d72f1e28c6c02dc5ae0c7b8f3f8aaf89c681ce1cd1b1", size = 4234973, upload-time = "2026-02-18T16:51:09.097Z" }, + { url = "https://files.pythonhosted.org/packages/2e/96/102244653ee5a143ece5afe33f00f52fe64e389dfce8dbc87580c6d70d3d/psycopg_binary-3.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:74eae563166ebf74e8d950ff359be037b85723d99ca83f57d9b244a871d6c13b", size = 3551342, upload-time = "2026-02-18T16:51:13.892Z" }, + { url = "https://files.pythonhosted.org/packages/a2/71/7a57e5b12275fe7e7d84d54113f0226080423a869118419c9106c083a21c/psycopg_binary-3.3.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:497852c5eaf1f0c2d88ab74a64a8097c099deac0c71de1cbcf18659a8a04a4b2", size = 4607368, upload-time = "2026-02-18T16:51:19.295Z" }, + { url = "https://files.pythonhosted.org/packages/c7/04/cb834f120f2b2c10d4003515ef9ca9d688115b9431735e3936ae48549af8/psycopg_binary-3.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:258d1ea53464d29768bf25930f43291949f4c7becc706f6e220c515a63a24edd", size = 4687047, upload-time = "2026-02-18T16:51:23.84Z" }, + { url = "https://files.pythonhosted.org/packages/40/e9/47a69692d3da9704468041aa5ed3ad6fc7f6bb1a5ae788d261a26bbca6c7/psycopg_binary-3.3.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:111c59897a452196116db12e7f608da472fbff000693a21040e35fc978b23430", size = 5487096, upload-time = "2026-02-18T16:51:29.645Z" }, + { url = "https://files.pythonhosted.org/packages/0b/b6/0e0dd6a2f802864a4ae3dbadf4ec620f05e3904c7842b326aafc43e5f464/psycopg_binary-3.3.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:17bb6600e2455993946385249a3c3d0af52cd70c1c1cdbf712e9d696d0b0bf1b", size = 5168720, upload-time = "2026-02-18T16:51:36.499Z" }, + { url = "https://files.pythonhosted.org/packages/6f/0d/977af38ac19a6b55d22dff508bd743fd7c1901e1b73657e7937c7cccb0a3/psycopg_binary-3.3.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:642050398583d61c9856210568eb09a8e4f2fe8224bf3be21b67a370e677eead", size = 6762076, upload-time = "2026-02-18T16:51:43.167Z" }, + { url = "https://files.pythonhosted.org/packages/34/40/912a39d48322cf86895c0eaf2d5b95cb899402443faefd4b09abbba6b6e1/psycopg_binary-3.3.3-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:533efe6dc3a7cba5e2a84e38970786bb966306863e45f3db152007e9f48638a6", size = 4997623, upload-time = "2026-02-18T16:51:47.707Z" }, + { url = "https://files.pythonhosted.org/packages/98/0c/c14d0e259c65dc7be854d926993f151077887391d5a081118907a9d89603/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5958dbf28b77ce2033482f6cb9ef04d43f5d8f4b7636e6963d5626f000efb23e", size = 4532096, upload-time = "2026-02-18T16:51:51.421Z" }, + { url = "https://files.pythonhosted.org/packages/39/21/8b7c50a194cfca6ea0fd4d1f276158307785775426e90700ab2eba5cd623/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:a6af77b6626ce92b5817bf294b4d45ec1a6161dba80fc2d82cdffdd6814fd023", size = 4208884, upload-time = "2026-02-18T16:51:57.336Z" }, + { url = "https://files.pythonhosted.org/packages/c7/2c/a4981bf42cf30ebba0424971d7ce70a222ae9b82594c42fc3f2105d7b525/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:47f06fcbe8542b4d96d7392c476a74ada521c5aebdb41c3c0155f6595fc14c8d", size = 3944542, upload-time = "2026-02-18T16:52:04.266Z" }, + { url = "https://files.pythonhosted.org/packages/60/e9/b7c29b56aa0b85a4e0c4d89db691c1ceef08f46a356369144430c155a2f5/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e7800e6c6b5dc4b0ca7cc7370f770f53ac83886b76afda0848065a674231e856", size = 4254339, upload-time = "2026-02-18T16:52:10.444Z" }, + { url = "https://files.pythonhosted.org/packages/98/5a/291d89f44d3820fffb7a04ebc8f3ef5dda4f542f44a5daea0c55a84abf45/psycopg_binary-3.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:165f22ab5a9513a3d7425ffb7fcc7955ed8ccaeef6d37e369d6cc1dff1582383", size = 3652796, upload-time = "2026-02-18T16:52:14.02Z" }, +] + +[[package]] +name = "psycopg-pool" +version = "3.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/56/9a/9470d013d0d50af0da9c4251614aeb3c1823635cab3edc211e3839db0bcf/psycopg_pool-3.3.0.tar.gz", hash = "sha256:fa115eb2860bd88fce1717d75611f41490dec6135efb619611142b24da3f6db5", size = 31606, upload-time = "2025-12-01T11:34:33.11Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/c3/26b8a0908a9db249de3b4169692e1c7c19048a9bc41a4d3209cee7dbb758/psycopg_pool-3.3.0-py3-none-any.whl", hash = "sha256:2e44329155c410b5e8666372db44276a8b1ebd8c90f1c3026ebba40d4bc81063", size = 39995, upload-time = "2025-12-01T11:34:29.761Z" }, +] + [[package]] name = "py4j" version = "0.10.9.7" diff --git a/app/yarn.lock b/app/yarn.lock index ac1bf5216..c1753d1ed 100644 --- a/app/yarn.lock +++ b/app/yarn.lock @@ -43,8 +43,8 @@ picocolors "^1.1.1" "@babel/compat-data@^7.28.6": - version "7.29.0" - integrity sha512-T1NCJqT/j9+cn8fvkt7jtwbLBfLC/1y1c7NtCeXFRgzGTsafi68MRv8yzkYSapBnFA6L3U2VSc02ciDzoAJhJg== + version "7.29.3" + integrity sha512-LIVqM46zQWZhj17qA8wb4nW/ixr2y1Nw+r1etiAWgRM6U1IqP+LNhL1yg440jYZR72jCWcWbLWzIosH+uP1fqg== "@babel/core@^7.23.7", "@babel/core@^7.28.5", "@babel/core@^7.29.0": version "7.29.0" @@ -129,8 +129,8 @@ "@babel/types" "^7.29.0" "@babel/parser@^7.1.0", "@babel/parser@^7.20.7", "@babel/parser@^7.23.6", "@babel/parser@^7.28.5", "@babel/parser@^7.28.6", "@babel/parser@^7.29.0": - version "7.29.2" - integrity sha512-4GgRzy/+fsBa72/RZVJmGKPmZu9Byn8o4MoLpmNe1m8ZfYnz5emHLQz3U4gLud6Zwl0RZIcgiLD7Uq7ySFuDLA== + version "7.29.3" + integrity sha512-b3ctpQwp+PROvU/cttc4OYl4MzfJUWy6FZg+PMXfzmt/+39iHVF0sDfqay8TQM3JA2EUOyKcFZt75jWriQijsA== dependencies: "@babel/types" "^7.29.0" @@ -969,105 +969,105 @@ version "1.0.0-rc.3" integrity sha512-eybk3TjzzzV97Dlj5c+XrBFW57eTNhzod66y9HrBlzJ6NsCrWCp/2kaPS3K9wJmurBC0Tdw4yPjXKZqlznim3Q== -"@rollup/rollup-android-arm-eabi@4.60.2": - version "4.60.2" - integrity sha512-dnlp69efPPg6Uaw2dVqzWRfAWRnYVb1XJ8CyyhIbZeaq4CA5/mLeZ1IEt9QqQxmbdvagjLIm2ZL8BxXv5lH4Yw== +"@rollup/rollup-android-arm-eabi@4.60.3": + version "4.60.3" + integrity sha512-x35CNW/ANXG3hE/EZpRU8MXX1JDN86hBb2wMGAtltkz7pc6cxgjpy1OMMfDosOQ+2hWqIkag/fGok1Yady9nGw== -"@rollup/rollup-android-arm64@4.60.2": - version "4.60.2" - integrity sha512-OqZTwDRDchGRHHm/hwLOL7uVPB9aUvI0am/eQuWMNyFHf5PSEQmyEeYYheA0EPPKUO/l0uigCp+iaTjoLjVoHg== +"@rollup/rollup-android-arm64@4.60.3": + version "4.60.3" + integrity sha512-xw3xtkDApIOGayehp2+Rz4zimfkaX65r4t47iy+ymQB2G4iJCBBfj0ogVg5jpvjpn8UWn/+q9tprxleYeNp3Hw== -"@rollup/rollup-darwin-arm64@4.60.2": - version "4.60.2" - integrity sha512-UwRE7CGpvSVEQS8gUMBe1uADWjNnVgP3Iusyda1nSRwNDCsRjnGc7w6El6WLQsXmZTbLZx9cecegumcitNfpmA== +"@rollup/rollup-darwin-arm64@4.60.3": + version "4.60.3" + integrity sha512-vo6Y5Qfpx7/5EaamIwi0WqW2+zfiusVihKatLvtN1VFVy3D13uERk/6gZLU1UiHRL6fDXqj/ELIeVRGnvcTE1g== -"@rollup/rollup-darwin-x64@4.60.2": - version "4.60.2" - integrity sha512-gjEtURKLCC5VXm1I+2i1u9OhxFsKAQJKTVB8WvDAHF+oZlq0GTVFOlTlO1q3AlCTE/DF32c16ESvfgqR7343/g== +"@rollup/rollup-darwin-x64@4.60.3": + version "4.60.3" + integrity sha512-D+0QGcZhBzTN82weOnsSlY7V7+RMmPuF1CkbxyMAGE8+ZHeUjyb76ZiWmBlCu//AQQONvxcqRbwZTajZKqjuOw== -"@rollup/rollup-freebsd-arm64@4.60.2": - version "4.60.2" - integrity sha512-Bcl6CYDeAgE70cqZaMojOi/eK63h5Me97ZqAQoh77VPjMysA/4ORQBRGo3rRy45x4MzVlU9uZxs8Uwy7ZaKnBw== +"@rollup/rollup-freebsd-arm64@4.60.3": + version "4.60.3" + integrity sha512-6HnvHCT7fDyj6R0Ph7A6x8dQS/S38MClRWeDLqc0MdfWkxjiu1HSDYrdPhqSILzjTIC/pnXbbJbo+ft+gy/9hQ== -"@rollup/rollup-freebsd-x64@4.60.2": - version "4.60.2" - integrity sha512-LU+TPda3mAE2QB0/Hp5VyeKJivpC6+tlOXd1VMoXV/YFMvk/MNk5iXeBfB4MQGRWyOYVJ01625vjkr0Az98OJQ== +"@rollup/rollup-freebsd-x64@4.60.3": + version "4.60.3" + integrity sha512-KHLgC3WKlUYW3ShFKnnosZDOJ0xjg9zp7au3sIm2bs/tGBeC2ipmvRh/N7JKi0t9Ue20C0dpEshi8WUubg+cnA== -"@rollup/rollup-linux-arm-gnueabihf@4.60.2": - version "4.60.2" - integrity sha512-2QxQrM+KQ7DAW4o22j+XZ6RKdxjLD7BOWTP0Bv0tmjdyhXSsr2Ul1oJDQqh9Zf5qOwTuTc7Ek83mOFaKnodPjg== +"@rollup/rollup-linux-arm-gnueabihf@4.60.3": + version "4.60.3" + integrity sha512-DV6fJoxEYWJOvaZIsok7KrYl0tPvga5OZ2yvKHNNYyk/2roMLqQAbGhr78EQ5YhHpnhLKJD3S1WFusAkmUuV5g== -"@rollup/rollup-linux-arm-musleabihf@4.60.2": - version "4.60.2" - integrity sha512-TbziEu2DVsTEOPif2mKWkMeDMLoYjx95oESa9fkQQK7r/Orta0gnkcDpzwufEcAO2BLBsD7mZkXGFqEdMRRwfw== +"@rollup/rollup-linux-arm-musleabihf@4.60.3": + version "4.60.3" + integrity sha512-mQKoJAzvuOs6F+TZybQO4GOTSMUu7v0WdxEk24krQ/uUxXoPTtHjuaUuPmFhtBcM4K0ons8nrE3JyhTuCFtT/w== -"@rollup/rollup-linux-arm64-gnu@4.60.2": - version "4.60.2" - integrity sha512-bO/rVDiDUuM2YfuCUwZ1t1cP+/yqjqz+Xf2VtkdppefuOFS2OSeAfgafaHNkFn0t02hEyXngZkxtGqXcXwO8Rg== +"@rollup/rollup-linux-arm64-gnu@4.60.3": + version "4.60.3" + integrity sha512-Whjj2qoiJ6+OOJMGptTYazaJvjOJm+iKHpXQM1P3LzGjt7Ff++Tp7nH4N8J/BUA7R9IHfDyx4DJIflifwnbmIA== -"@rollup/rollup-linux-arm64-musl@4.60.2": - version "4.60.2" - integrity sha512-hr26p7e93Rl0Za+JwW7EAnwAvKkehh12BU1Llm9Ykiibg4uIr2rbpxG9WCf56GuvidlTG9KiiQT/TXT1yAWxTA== +"@rollup/rollup-linux-arm64-musl@4.60.3": + version "4.60.3" + integrity sha512-4YTNHKqGng5+yiZt3mg77nmyuCfmNfX4fPmyUapBcIk+BdwSwmCWGXOUxhXbBEkFHtoN5boLj/5NON+u5QC9tg== -"@rollup/rollup-linux-loong64-gnu@4.60.2": - version "4.60.2" - integrity sha512-pOjB/uSIyDt+ow3k/RcLvUAOGpysT2phDn7TTUB3n75SlIgZzM6NKAqlErPhoFU+npgY3/n+2HYIQVbF70P9/A== +"@rollup/rollup-linux-loong64-gnu@4.60.3": + version "4.60.3" + integrity sha512-SU3kNlhkpI4UqlUc2VXPGK9o886ZsSeGfMAX2ba2b8DKmMXq4AL7KUrkSWVbb7koVqx41Yczx6dx5PNargIrEA== -"@rollup/rollup-linux-loong64-musl@4.60.2": - version "4.60.2" - integrity sha512-2/w+q8jszv9Ww1c+6uJT3OwqhdmGP2/4T17cu8WuwyUuuaCDDJ2ojdyYwZzCxx0GcsZBhzi3HmH+J5pZNXnd+Q== +"@rollup/rollup-linux-loong64-musl@4.60.3": + version "4.60.3" + integrity sha512-6lDLl5h4TXpB1mTf2rQWnAk/LcXrx9vBfu/DT5TIPhvMhRWaZ5MxkIc8u4lJAmBo6klTe1ywXIUHFjylW505sg== -"@rollup/rollup-linux-ppc64-gnu@4.60.2": - version "4.60.2" - integrity sha512-11+aL5vKheYgczxtPVVRhdptAM2H7fcDR5Gw4/bTcteuZBlH4oP9f5s9zYO9aGZvoGeBpqXI/9TZZihZ609wKw== +"@rollup/rollup-linux-ppc64-gnu@4.60.3": + version "4.60.3" + integrity sha512-BMo8bOw8evlup/8G+cj5xWtPyp93xPdyoSN16Zy90Q2QZ0ZYRhCt6ZJSwbrRzG9HApFabjwj2p25TUPDWrhzqQ== -"@rollup/rollup-linux-ppc64-musl@4.60.2": - version "4.60.2" - integrity sha512-i16fokAGK46IVZuV8LIIwMdtqhin9hfYkCh8pf8iC3QU3LpwL+1FSFGej+O7l3E/AoknL6Dclh2oTdnRMpTzFQ== +"@rollup/rollup-linux-ppc64-musl@4.60.3": + version "4.60.3" + integrity sha512-E0L8X1dZN1/Rph+5VPF6Xj2G7JJvMACVXtamTJIDrVI44Y3K+G8gQaMEAavbqCGTa16InptiVrX6eM6pmJ+7qA== -"@rollup/rollup-linux-riscv64-gnu@4.60.2": - version "4.60.2" - integrity sha512-49FkKS6RGQoriDSK/6E2GkAsAuU5kETFCh7pG4yD/ylj9rKhTmO3elsnmBvRD4PgJPds5W2PkhC82aVwmUcJ7A== +"@rollup/rollup-linux-riscv64-gnu@4.60.3": + version "4.60.3" + integrity sha512-oZJ/WHaVfHUiRAtmTAeo3DcevNsVvH8mbvodjZy7D5QKvCefO371SiKRpxoDcCxB3PTRTLayWBkvmDQKTcX/sw== -"@rollup/rollup-linux-riscv64-musl@4.60.2": - version "4.60.2" - integrity sha512-mjYNkHPfGpUR00DuM1ZZIgs64Hpf4bWcz9Z41+4Q+pgDx73UwWdAYyf6EG/lRFldmdHHzgrYyge5akFUW0D3mQ== +"@rollup/rollup-linux-riscv64-musl@4.60.3": + version "4.60.3" + integrity sha512-Dhbyh7j9FybM3YaTgaHmVALwA8AkUwTPccyCQ79TG9AJUsMQqgN1DDEZNr4+QUfwiWvLDumW5vdwzoeUF+TNxQ== -"@rollup/rollup-linux-s390x-gnu@4.60.2": - version "4.60.2" - integrity sha512-ALyvJz965BQk8E9Al/JDKKDLH2kfKFLTGMlgkAbbYtZuJt9LU8DW3ZoDMCtQpXAltZxwBHevXz5u+gf0yA0YoA== +"@rollup/rollup-linux-s390x-gnu@4.60.3": + version "4.60.3" + integrity sha512-cJd1X5XhHHlltkaypz1UcWLA8AcoIi1aWhsvaWDskD1oz2eKCypnqvTQ8ykMNI0RSmm7NkTdSqSSD7zM0xa6Ig== -"@rollup/rollup-linux-x64-gnu@4.60.2": - version "4.60.2" - integrity sha512-UQjrkIdWrKI626Du8lCQ6MJp/6V1LAo2bOK9OTu4mSn8GGXIkPXk/Vsp4bLHCd9Z9Iz2OTEaokUE90VweJgIYQ== +"@rollup/rollup-linux-x64-gnu@4.60.3": + version "4.60.3" + integrity sha512-DAZDBHQfG2oQuhY7mc6I3/qB4LU2fQCjRvxbDwd/Jdvb9fypP4IJ4qmtu6lNjes6B531AI8cg1aKC2di97bUxA== -"@rollup/rollup-linux-x64-musl@4.60.2": - version "4.60.2" - integrity sha512-bTsRGj6VlSdn/XD4CGyzMnzaBs9bsRxy79eTqTCBsA8TMIEky7qg48aPkvJvFe1HyzQ5oMZdg7AnVlWQSKLTnw== +"@rollup/rollup-linux-x64-musl@4.60.3": + version "4.60.3" + integrity sha512-cRxsE8c13mZOh3vP+wLDxpQBRrOHDIGOWyDL93Sy0Ga8y515fBcC2pjUfFwUe5T7tqvTvWbCpg1URM/AXdWIXA== -"@rollup/rollup-openbsd-x64@4.60.2": - version "4.60.2" - integrity sha512-6d4Z3534xitaA1FcMWP7mQPq5zGwBmGbhphh2DwaA1aNIXUu3KTOfwrWpbwI4/Gr0uANo7NTtaykFyO2hPuFLg== +"@rollup/rollup-openbsd-x64@4.60.3": + version "4.60.3" + integrity sha512-QaWcIgRxqEdQdhJqW4DJctsH6HCmo5vHxY0krHSX4jMtOqfzC+dqDGuHM87bu4H8JBeibWx7jFz+h6/4C8wA5Q== -"@rollup/rollup-openharmony-arm64@4.60.2": - version "4.60.2" - integrity sha512-NetAg5iO2uN7eB8zE5qrZ3CSil+7IJt4WDFLcC75Ymywq1VZVD6qJ6EvNLjZ3rEm6gB7XW5JdT60c6MN35Z85Q== +"@rollup/rollup-openharmony-arm64@4.60.3": + version "4.60.3" + integrity sha512-AaXwSvUi3QIPtroAUw1t5yHGIyqKEXwH54WUocFolZhpGDruJcs8c+xPNDRn4XiQsS7MEwnYsHW2l0MBLDMkWg== -"@rollup/rollup-win32-arm64-msvc@4.60.2": - version "4.60.2" - integrity sha512-NCYhOotpgWZ5kdxCZsv6Iudx0wX8980Q/oW4pNFNihpBKsDbEA1zpkfxJGC0yugsUuyDZ7gL37dbzwhR0VI7pQ== +"@rollup/rollup-win32-arm64-msvc@4.60.3": + version "4.60.3" + integrity sha512-65LAKM/bAWDqKNEelHlcHvm2V+Vfb8C6INFxQXRHCvaVN1rJfwr4NvdP4FyzUaLqWfaCGaadf6UbTm8xJeYfEg== -"@rollup/rollup-win32-ia32-msvc@4.60.2": - version "4.60.2" - integrity sha512-RXsaOqXxfoUBQoOgvmmijVxJnW2IGB0eoMO7F8FAjaj0UTywUO/luSqimWBJn04WNgUkeNhh7fs7pESXajWmkg== +"@rollup/rollup-win32-ia32-msvc@4.60.3": + version "4.60.3" + integrity sha512-EEM2gyhBF5MFnI6vMKdX1LAosE627RGBzIoGMdLloPZkXrUN0Ckqgr2Qi8+J3zip/8NVVro3/FjB+tjhZUgUHA== -"@rollup/rollup-win32-x64-gnu@4.60.2": - version "4.60.2" - integrity sha512-qdAzEULD+/hzObedtmV6iBpdL5TIbKVztGiK7O3/KYSf+HIzU257+MX1EXJcyIiDbMAqmbwaufcYPvyRryeZtA== +"@rollup/rollup-win32-x64-gnu@4.60.3": + version "4.60.3" + integrity sha512-E5Eb5H/DpxaoXH++Qkv28RcUJboMopmdDUALBczvHMf7hNIxaDZqwY5lK12UK1BHacSmvupoEWGu+n993Z0y1A== -"@rollup/rollup-win32-x64-msvc@4.60.2": - version "4.60.2" - integrity sha512-Nd/SgG27WoA9e+/TdK74KnHz852TLa94ovOYySo/yMPuTmpckK/jIF2jSwS3g7ELSKXK13/cVdmg1Z/DaCWKxA== +"@rollup/rollup-win32-x64-msvc@4.60.3": + version "4.60.3" + integrity sha512-hPt/bgL5cE+Qp+/TPHBqptcAgPzgj46mPcg/16zNUmbQk0j+mOEQV/+Lqu8QRtDV3Ek95Q6FeFITpuhl6OTsAA== "@shikijs/engine-oniguruma@^3.23.0": version "3.23.0" @@ -1379,15 +1379,15 @@ version "1.161.6" integrity sha512-NaOGLRrddszbQj9upGat6HG/4TKvXLvu+osAIgfxPYA+eIvYKv8GKDJOrY2D3/U9MRnKfMWD7bU4jeD4xmqyIg== -"@tanstack/query-core@5.100.5": - version "5.100.5" - integrity sha512-t20KrhKkf0HXzqQkPbJ5erhFesup68BAbwFgYmTrS7bxMF7O5MdmL8jUkik4thsG7Hg00fblz30h6yF1d5TxGg== +"@tanstack/query-core@5.100.9": + version "5.100.9" + integrity sha512-SJSFw1S8+kQ0+knv/XGfrbocWoAlT7vDKsSImtLx3ZPQmEcR46hkDjLSvynSy25N8Ms4tIEini1FuBd5k7IscQ== "@tanstack/react-query@^5.90.5": - version "5.100.5" - integrity sha512-aNwj1mi2v2bQ9IxkyR1grLOUkv3BYWoykHy9KDyLNbjC3tsahbOHJibK+Wjtr1wRhG59/AvJhiJG5OlthaCgJA== + version "5.100.9" + integrity sha512-Oa44XkaI3kCNN6ME0KByU3xT3SEUNOMfZpHxL6+wFoTm+OeUFYHKdeYVe0aOXlRDm/f15sgLwEt2HDorIdW8+A== dependencies: - "@tanstack/query-core" "5.100.5" + "@tanstack/query-core" "5.100.9" "@tanstack/react-router-devtools@^1.133.36": version "1.166.13" @@ -1396,12 +1396,12 @@ "@tanstack/router-devtools-core" "1.167.3" "@tanstack/react-router@^1.133.36": - version "1.168.25" - integrity sha512-4U/E76dc+fYuLixjV1RLNfqrkQoexSL8MqGNpIHOodtvY3fMPGaALrvDVtBDQYBEU4z5r5fHaV6+kclWAVFP9A== + version "1.169.1" + integrity sha512-MBtQKSvac3OCcsSa6oBpDrrN90IV47I6Gtv05NxhbFVh+gVjtqvs6HSU4XM9+y5sHZPgS+35eArflX4vM8GEnQ== dependencies: "@tanstack/history" "1.161.6" "@tanstack/react-store" "^0.9.3" - "@tanstack/router-core" "1.168.17" + "@tanstack/router-core" "1.169.1" isbot "^5.1.22" "@tanstack/react-store@^0.9.3": @@ -1411,9 +1411,9 @@ "@tanstack/store" "0.9.3" use-sync-external-store "^1.6.0" -"@tanstack/router-core@1.168.17": - version "1.168.17" - integrity sha512-VDq7HCqRK3sdpxoETwYoTXTaYi+OVQC197g1fdzaiZBUmhntfjn+PQc15OzTqNNhf8Menk6r6ftmuphybMKdig== +"@tanstack/router-core@1.169.1": + version "1.169.1" + integrity sha512-x+2gIGKTTE1qAn7tLieGfrB5ciOviDmmi2ox9fAWUubRV+yTU5ruGFXocoCIWF+lB+SOtnHjo2E9BLSWyYoEmA== dependencies: "@tanstack/history" "1.161.6" cookie-es "^3.0.0" @@ -1427,12 +1427,12 @@ clsx "^2.1.1" goober "^2.1.16" -"@tanstack/router-generator@1.166.36": - version "1.166.36" - integrity sha512-ce8Sg+ONwdd483kXJBYhTcdIAjEwSlWUOkoLsgPdNUIfA05hdnd9JkNnM4X1OnzpFL8/+TBSMo4WYQp9CHhDPg== +"@tanstack/router-generator@1.166.39": + version "1.166.39" + integrity sha512-j2OW/UvpjM/DT9tHVmuhWW1k6UOezTRrPqBPZFFmIth0fY7iTPqK+Erqpo8r5yGTRGCbMvOS4sL3H2MldnIZew== dependencies: "@babel/types" "^7.28.5" - "@tanstack/router-core" "1.168.17" + "@tanstack/router-core" "1.169.1" "@tanstack/router-utils" "1.161.7" "@tanstack/virtual-file-routes" "1.161.7" jiti "^2.6.1" @@ -1441,8 +1441,8 @@ zod "^3.24.2" "@tanstack/router-plugin@^1.133.36": - version "1.167.28" - integrity sha512-O23ba7JaKvx5Eu0l6iTpknu79QcdkMmoW1VtZdsZe5NoQ6dHHru6caoapDc/uOxmz7h7VYfSuLjs/UYg7EA1cA== + version "1.167.32" + integrity sha512-i9BA6GzUCoM20UYZ77orXzHwD5zM0OQTtLuPNbqTTSG38CvR6viRFP/d+QFo2aRNyCvun8PR7zSa49bslSggEQ== dependencies: "@babel/core" "^7.28.5" "@babel/plugin-syntax-jsx" "^7.27.1" @@ -1450,8 +1450,8 @@ "@babel/template" "^7.27.2" "@babel/traverse" "^7.28.5" "@babel/types" "^7.28.5" - "@tanstack/router-core" "1.168.17" - "@tanstack/router-generator" "1.166.36" + "@tanstack/router-core" "1.169.1" + "@tanstack/router-generator" "1.166.39" "@tanstack/router-utils" "1.161.7" "@tanstack/virtual-file-routes" "1.161.7" chokidar "^3.6.0" @@ -1481,8 +1481,8 @@ integrity sha512-olW33+Cn+bsCsZKPwEGhlkqS6w3M2slFv11JIobdnCFKMLG97oAI2kWKdx5/zsywTL8flpnoIgaZZPlQTFYhdQ== "@tybys/wasm-util@^0.10.1": - version "0.10.1" - integrity sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg== + version "0.10.2" + integrity sha512-RoBvJ2X0wuKlWFIjrwffGw1IqZHKQqzIchKaadZZfnNpsAYp2mM0h36JtPCjNDAHGgYez/15uMBpfGwchhiMgg== dependencies: tslib "^2.4.0" @@ -1690,10 +1690,10 @@ available-typed-arrays@^1.0.7: possible-typed-array-names "^1.0.0" axios@^1.13.1: - version "1.15.2" - integrity sha512-wLrXxPtcrPTsNlJmKjkPnNPK2Ihe0hn0wGSaTEiHRPxwjvJwT3hKmXF4dpqxmPO9SoNb2FsYXj/xEo0gHN+D5A== + version "1.16.0" + integrity sha512-6hp5CwvTPlN2A31g5dxnwAX0orzM7pmCRDLnZSX772mv8WDqICwFjowHuPs04Mc8deIld1+ejhtaMn5vp6b+1w== dependencies: - follow-redirects "^1.15.11" + follow-redirects "^1.16.0" form-data "^4.0.5" proxy-from-env "^2.1.0" @@ -1715,8 +1715,8 @@ balanced-match@^4.0.2: integrity sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA== baseline-browser-mapping@^2.10.12: - version "2.10.23" - integrity sha512-xwVXGqevyKPsiuQdLj+dZMVjidjJV508TBqexND5HrF89cGdCYCJFB3qhcxRHSeMctdCfbR1jrxBajhDy7o29g== + version "2.10.27" + integrity sha512-zEs/ufmZoUd7WftKpKyXaT6RFxpQ5Qm9xytKRHvJfxFV9DFJkZph9RvJ1LcOUi0Z1ZVijMte65JbILeV+8QQEA== binary-extensions@^2.0.0: version "2.3.0" @@ -1956,8 +1956,8 @@ dunder-proto@^1.0.0, dunder-proto@^1.0.1: gopd "^1.2.0" electron-to-chromium@^1.5.328: - version "1.5.344" - integrity sha512-4MxfbmNDm+KPh066EZy+eUnkcDPcZ35wNmOWzFuh/ijvHsve6kbLTLURy88uCNK5FbpN+yk2nQY6BYh1GEt+wg== + version "1.5.349" + integrity sha512-QsWVGyRuY07Aqb234QytTfwd5d9AJlfNIQ5wIOl1L+PZDzI9d9+Fn0FRale/QYlFxt/bUnB0/nLd1jFPGxGK1A== emoji-regex@^8.0.0: version "8.0.0" @@ -2203,8 +2203,8 @@ fast-safe-stringify@^2.0.7: integrity sha512-W+KJc2dmILlPplD/H4K9l9LcAHAfPtP6BY84uVLXQ6Evcz9Lcg33Y2z1IVblT6xdY54PXYVHEv+0Wpq8Io6zkA== fast-uri@^3.0.1: - version "3.1.0" - integrity sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA== + version "3.1.1" + integrity sha512-h2r7rcm6Ee/J8o0LD5djLuFVcfbZxhvho4vvsbeV0aMvXjUgqv4YpxpkEx0d68l6+IleVfLAdVEfhR7QNMkGHQ== fastq@^1.6.0: version "1.20.1" @@ -2229,7 +2229,7 @@ find-up@5.0.0: locate-path "^6.0.0" path-exists "^4.0.0" -follow-redirects@^1.15.11: +follow-redirects@^1.16.0: version "1.16.0" integrity sha512-y5rN/uOsadFT/JfYwhxRS5R7Qce+g3zG97+JrtFZlC9klX/W5hD7iiLzScI4nZqUS7DNUdhPgw4xI8W2LuXlUw== @@ -2890,8 +2890,8 @@ ms@^2.1.3: integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA== nanoid@^3.3.11: - version "3.3.11" - integrity sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w== + version "3.3.12" + integrity sha512-ZB9RH/39qpq5Vu6Y+NmUaFhQR6pp+M2Xt76XBnEwDaGcVAqhlvxrl3B2bKS5D3NH3QR76v3aSrKaF/Kiy7lEtQ== nimma@0.2.3: version "0.2.3" @@ -3099,8 +3099,8 @@ possible-typed-array-names@^1.0.0: integrity sha512-/+5VFTchJDoVj3bhoqi6UeymcD00DAwb1nJwamzPvHEszJ4FpF6SNNbUbOS8yI56qHzdV8eK0qEfOSiodkTdxg== postcss@^8.5.6: - version "8.5.12" - integrity sha512-W62t/Se6rA0Az3DfCL0AqJwXuKwBeYg6nOaIgzP+xZ7N5BFCI7DYi1qs6ygUYT6rvfi6t9k65UMLJC+PHZpDAA== + version "8.5.14" + integrity sha512-SoSL4+OSEtR99LHFZQiJLkT59C5B1amGO1NzTwj7TT1qCUgUO6hxOvzkOYxD+vMrXBM3XJIKzokoERdqQq/Zmg== dependencies: nanoid "^3.3.11" picocolors "^1.1.1" @@ -3215,36 +3215,36 @@ reusify@^1.0.4: integrity sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw== rollup@^4.43.0: - version "4.60.2" - integrity sha512-J9qZyW++QK/09NyN/zeO0dG/1GdGfyp9lV8ajHnRVLfo/uFsbji5mHnDgn/qYdUHyCkM2N+8VyspgZclfAh0eQ== + version "4.60.3" + integrity sha512-pAQK9HalE84QSm4Po3EmWIZPd3FnjkShVkiMlz1iligWYkWQ7wHYd1PF/T7QZ5TVSD6uSTon5gBVMSM4JfBV+A== dependencies: "@types/estree" "1.0.8" optionalDependencies: - "@rollup/rollup-android-arm-eabi" "4.60.2" - "@rollup/rollup-android-arm64" "4.60.2" - "@rollup/rollup-darwin-arm64" "4.60.2" - "@rollup/rollup-darwin-x64" "4.60.2" - "@rollup/rollup-freebsd-arm64" "4.60.2" - "@rollup/rollup-freebsd-x64" "4.60.2" - "@rollup/rollup-linux-arm-gnueabihf" "4.60.2" - "@rollup/rollup-linux-arm-musleabihf" "4.60.2" - "@rollup/rollup-linux-arm64-gnu" "4.60.2" - "@rollup/rollup-linux-arm64-musl" "4.60.2" - "@rollup/rollup-linux-loong64-gnu" "4.60.2" - "@rollup/rollup-linux-loong64-musl" "4.60.2" - "@rollup/rollup-linux-ppc64-gnu" "4.60.2" - "@rollup/rollup-linux-ppc64-musl" "4.60.2" - "@rollup/rollup-linux-riscv64-gnu" "4.60.2" - "@rollup/rollup-linux-riscv64-musl" "4.60.2" - "@rollup/rollup-linux-s390x-gnu" "4.60.2" - "@rollup/rollup-linux-x64-gnu" "4.60.2" - "@rollup/rollup-linux-x64-musl" "4.60.2" - "@rollup/rollup-openbsd-x64" "4.60.2" - "@rollup/rollup-openharmony-arm64" "4.60.2" - "@rollup/rollup-win32-arm64-msvc" "4.60.2" - "@rollup/rollup-win32-ia32-msvc" "4.60.2" - "@rollup/rollup-win32-x64-gnu" "4.60.2" - "@rollup/rollup-win32-x64-msvc" "4.60.2" + "@rollup/rollup-android-arm-eabi" "4.60.3" + "@rollup/rollup-android-arm64" "4.60.3" + "@rollup/rollup-darwin-arm64" "4.60.3" + "@rollup/rollup-darwin-x64" "4.60.3" + "@rollup/rollup-freebsd-arm64" "4.60.3" + "@rollup/rollup-freebsd-x64" "4.60.3" + "@rollup/rollup-linux-arm-gnueabihf" "4.60.3" + "@rollup/rollup-linux-arm-musleabihf" "4.60.3" + "@rollup/rollup-linux-arm64-gnu" "4.60.3" + "@rollup/rollup-linux-arm64-musl" "4.60.3" + "@rollup/rollup-linux-loong64-gnu" "4.60.3" + "@rollup/rollup-linux-loong64-musl" "4.60.3" + "@rollup/rollup-linux-ppc64-gnu" "4.60.3" + "@rollup/rollup-linux-ppc64-musl" "4.60.3" + "@rollup/rollup-linux-riscv64-gnu" "4.60.3" + "@rollup/rollup-linux-riscv64-musl" "4.60.3" + "@rollup/rollup-linux-s390x-gnu" "4.60.3" + "@rollup/rollup-linux-x64-gnu" "4.60.3" + "@rollup/rollup-linux-x64-musl" "4.60.3" + "@rollup/rollup-openbsd-x64" "4.60.3" + "@rollup/rollup-openharmony-arm64" "4.60.3" + "@rollup/rollup-win32-arm64-msvc" "4.60.3" + "@rollup/rollup-win32-ia32-msvc" "4.60.3" + "@rollup/rollup-win32-x64-gnu" "4.60.3" + "@rollup/rollup-win32-x64-msvc" "4.60.3" fsevents "~2.3.2" run-parallel@^1.1.9: @@ -3291,12 +3291,12 @@ semver@^6.3.1: integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA== seroval-plugins@^1.5.0: - version "1.5.2" - integrity sha512-qpY0Cl+fKYFn4GOf3cMiq6l72CpuVaawb6ILjubOQ+diJ54LfOWaSSPsaswN8DRPIPW4Yq+tE1k5aKd7ILyaFg== + version "1.5.3" + integrity sha512-LhVh4KjjkKmCxOUjoaUwtqbDjyMfnA535yEmmGDuwZcIYtw8ns6tZmeszNTECeUg/3sJpnEjsz/KhQrcPXPw1Q== seroval@^1.5.0: - version "1.5.2" - integrity sha512-xcRN39BdsnO9Tf+VzsE7b3JyTJASItIV1FVFewJKCFcW4s4haIKS3e6vj8PGB9qBwC7tnuOywQMdv5N4qkzi7Q== + version "1.5.3" + integrity sha512-BXe0x4buEeYiIKaRUnth1WqCILQ3k4O67KP/B4pC3pVz0Mv2c96ngA9QDREUYxWY1sb2RZVRqwI9RcpVMyHCVw== set-function-length@^1.2.2: version "1.2.2" @@ -3791,8 +3791,8 @@ yaml@^1.10.0: integrity sha512-vIYeF1u3CjlhAFekPPAk2h/Kv4T3mAkMox5OymRiJQB0spDP10LHvt+K7G9Ny6NuuMAb25/6n1qyUjAcGNf/AA== yaml@^2.8.0, yaml@^2.8.3: - version "2.8.3" - integrity sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg== + version "2.8.4" + integrity sha512-ml/JPOj9fOQK8RNnWojA67GbZ0ApXAUlN2UQclwv2eVgTgn7O9gg9o7paZWKMp4g0H3nTLtS9LVzhkpOFIKzog== yargs-parser@^21.1.1: version "21.1.1" diff --git a/docs/dqx/docs/dev/contributing.mdx b/docs/dqx/docs/dev/contributing.mdx index 181b2eae0..3da30d43d 100644 --- a/docs/dqx/docs/dev/contributing.mdx +++ b/docs/dqx/docs/dev/contributing.mdx @@ -200,8 +200,11 @@ Before running `make app-start-dev`, configure authentication to a Databricks wo DATABRICKS_CONFIG_PROFILE= # from ~/.databrickscfg DATABRICKS_WAREHOUSE_ID= # SQL Warehouses → connection details DQX_JOB_ID= # optional locally; required for profiler / dry-run +DQX_LAKEBASE_INSTANCE_NAME= # optional locally; empty = OLTP tables run on Delta ``` +Leave `DQX_LAKEBASE_INSTANCE_NAME` empty for most local dev — the app falls back to Delta for the OLTP tables (rules catalog, app settings, RBAC, comments, schedules) so you don't need a Lakebase instance to iterate. To exercise the Lakebase path locally, deploy the bundle once and point the variable at the resulting instance. See [app/DEVELOPMENT.md](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEVELOPMENT.md) for the full set of `DQX_LAKEBASE_*` variables. + If you don't have a profile yet, run `databricks auth login --host -p ` first. See the [Development Mode](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/README.md#development-mode) section of the app README for more detail. @@ -227,7 +230,7 @@ Deploying DQX Studio to a workspace is required when you want to: - verify a change behaves correctly under the production identity model (service principal + on-behalf-of), or - run a review pass against a deployed app before merging. -For the full step-by-step (service principal creation, asset-bundle deploy, schema/volume permission grants, app start, troubleshooting) follow [app/DEPLOYMENT.md](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEPLOYMENT.md). +For the full step-by-step (service principal creation, one-time storage bootstrap, asset-bundle deploy, Lakebase opt-out, post-deploy grants, app start, troubleshooting) follow [app/DEPLOYMENT.md](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEPLOYMENT.md). ### Running integration tests and code coverage diff --git a/docs/dqx/docs/installation.mdx b/docs/dqx/docs/installation.mdx index 9184e53bc..db0631e2c 100644 --- a/docs/dqx/docs/installation.mdx +++ b/docs/dqx/docs/installation.mdx @@ -438,7 +438,8 @@ Databricks CLI will confirm a few options: - **Databricks Apps** feature enabled on the workspace. - **User token passthrough** enabled for Databricks Apps — DQX Studio uses On-Behalf-Of (OBO) tokens to access Unity Catalog with the end user's identity. - **Serverless compute** enabled on the workspace — the `dqx-studio-task-runner` job is serverless-only. -- An **existing Unity Catalog catalog** where the studio's schemas and volumes will be created. The bundle does not create the catalog itself. +- An **existing Unity Catalog catalog** where the studio's schemas and volumes will be provisioned. The bundle does not create the catalog itself; `make app-deploy` provisions the schemas and wheels volume *inside* the catalog automatically. +- **Lakebase Postgres** enabled on the workspace if you keep the default backend layout. DQX Studio stores its OLTP state (rules catalog, app settings, RBAC, comments, schedule configs, scheduler bookkeeping) in a Lakebase instance for sub-millisecond reads. The bundle creates the instance for you. If your workspace doesn't have Lakebase, you can opt out and run everything on Delta — see the **Hybrid storage backend** admonition under the install steps below. ### Install DQX Studio using a Declarative Automation Bundle @@ -464,26 +465,45 @@ Databricks CLI will confirm a few options: # admin_group: # default: admins # app_name: # default: dqx-studio # sql_warehouse_name: # default: dqx-studio-sql-warehouse - # schema_name: # default: dqx_app + # schema_name: # default: dqx_studio + + # Lakebase backend (default: enabled). Set lakebase_instance_name + # to "" to disable Lakebase and run all OLTP tables on Delta. + # lakebase_instance_name: dqx-studio-lakebase + # lakebase_database_name: dqx_studio + # lakebase_capacity: CU_1 # CU_1 / CU_2 / CU_4 / CU_8 presets: trigger_pause_status: PAUSED ``` - See the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEPLOYMENT.md#step-3-configure-databricksyml) for the full reference of each variable, including security implications of `admin_group` and when to override per target. + See the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEPLOYMENT.md#step-4-configure-databricksyml) for the full reference of each variable, including security implications of `admin_group` and when to override per target. + +4. (One-time, only on a workspace whose schemas / volume / Lakebase instance were created out-of-band before adopting this layout) adopt them into bundle management: + ```commandline + make app-bind PROFILE= TARGET= + ``` -4. Build, deploy, grant permissions, and start the studio in a single command: +5. Build, deploy, grant permissions, and start the studio in a single command: ```commandline make app-deploy PROFILE= TARGET= ``` - This runs `make app-build`, `databricks bundle deploy`, `app/scripts/post_deploy_grants.sh`, and `databricks bundle run` in sequence. + This runs `make app-build`, `databricks bundle deploy` (provisions the schemas, wheels volume, Lakebase Postgres instance, logical Postgres database, SQL warehouse, task-runner job, and Databricks App in dependency order; all stateful resources carry `lifecycle.prevent_destroy: true`), `app/scripts/post_deploy_grants.sh`, and `databricks bundle run` in sequence. -5. Open the deployed app from the **Apps** page in your Databricks workspace. +6. Open the deployed app from the **Apps** page in your Databricks workspace. -For the full walkthrough — including step-by-step commands, manual `GRANT` statements, troubleshooting, and target-specific configuration — see the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEPLOYMENT.md). + +The studio's schemas (`dqx_studio`, `dqx_studio_tmp`), wheels volume, Lakebase Postgres instance, and the logical Postgres database inside it are all declared as bundle resources with `lifecycle.prevent_destroy: true` (Databricks CLI 0.268+). `databricks bundle destroy` is blocked from dropping them, so production data survives accidental destroy/replace operations. To intentionally tear something down, remove the flag, `databricks bundle deployment unbind `, then destroy manually. + + +For the full walkthrough — including step-by-step commands, manual `GRANT` statements, troubleshooting, target-specific configuration, and the bind workflow for adopting existing resources — see the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEPLOYMENT.md). + + +DQX Studio splits its data across two physical backends: high-volume append-mostly tables (`dq_validation_runs`, `dq_profiling_results`, `dq_quarantine_records`, `dq_metrics`) live in **Delta Lake** because they're written by Spark; transactional tables (rules catalog, app settings, RBAC, comments, schedule configs) live in **Lakebase Postgres** for fast row-level reads/writes from the FastAPI request handlers. + -On its first start, DQX Studio runs database migrations and uploads its DQX wheel files to the Unity Catalog volume. If the task-runner job is triggered before the app has fully started at least once, it will fail to find its wheels. Wait for `Uploaded databricks_labs_dqx-...` in the app logs before triggering any profiler or dry-run jobs. +On its first start, DQX Studio runs database migrations (Delta and, if enabled, Lakebase) and uploads its DQX wheel files to the Unity Catalog volume. If the task-runner job is triggered before the app has fully started at least once, it will fail to find its wheels. Wait for `Uploaded databricks_labs_dqx-...` in the app logs before triggering any profiler or dry-run jobs. If Lakebase is enabled, also wait for `Lakebase OLTP routing enabled` before opening the UI — the app falls back to UC-only mode if Lakebase init fails. ### Upgrade DQX Studio @@ -495,17 +515,36 @@ git pull make app-deploy PROFILE= TARGET= ``` -Database migrations run automatically on app startup and preserve existing rules, runs, schedules, and configuration. +Database migrations run automatically on app startup and preserve existing rules, runs, schedules, and configuration. All stateful resources are declared with `lifecycle.prevent_destroy: true`, so even an accidental `databricks bundle destroy` won't drop them. ### Uninstall DQX Studio -Destroy the bundle to remove the app, the task-runner job, and the SQL warehouse: - -```commandline -cd app && databricks bundle destroy -p -t -``` +`databricks bundle destroy` removes the app, the task-runner job, and the SQL warehouse, but is **blocked** from dropping the stateful resources (schemas, volume, Lakebase instance, Lakebase logical DB) by their `prevent_destroy` flag. To fully uninstall, you have to opt-in to the destroy: + +1. Edit `app/databricks.yml` and remove `lifecycle.prevent_destroy: true` from each of the stateful resources you want to drop. +2. Unbind the resources so the bundle no longer tracks them, then destroy: + ```bash + cd app + databricks bundle deployment unbind main_schema -t + databricks bundle deployment unbind tmp_schema -t + databricks bundle deployment unbind wheels -t + databricks bundle deployment unbind lakebase_db -t + databricks bundle deployment unbind lakebase -t + databricks bundle destroy -p -t + ``` +3. Drop the now-unbound resources manually if desired: + ```sql + DROP VOLUME IF EXISTS .dqx_studio.wheels; + DROP SCHEMA IF EXISTS .dqx_studio CASCADE; + DROP SCHEMA IF EXISTS .dqx_studio_tmp CASCADE; + ``` + ```bash + databricks database delete-database-instance dqx-studio-lakebase -p + ``` -The Unity Catalog catalog and the studio's schemas (which contain historical rules and run results) are not removed by `bundle destroy` — drop them manually if you no longer need the data. + +`prevent_destroy` is what stops a `databricks bundle destroy` (or a forced replace from a deploy) from wiping your rules, schedules, role mappings, and comments. The multi-step uninstall above is by design — silent data loss is the bigger risk to protect against. + ## Installing Dashboard From 1f6e53d6b150bb883a66e820ef5e16970075c6b4 Mon Sep 17 00:00:00 2001 From: Tasha Date: Tue, 12 May 2026 15:13:49 +0200 Subject: [PATCH 02/12] =?UTF-8?q?feat(app):=20simplify=20Lakebase=20backen?= =?UTF-8?q?d=20=E2=80=94=20connect=20to=20databricks=5Fpostgres,=20drop=20?= =?UTF-8?q?database=5Fcatalogs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The database_catalogs DAB resource is the only one that creates a logical Postgres database, but it also creates a Unity Catalog catalog as a side effect and therefore requires CREATE CATALOG on the metastore — a permission most app deployers don't hold. Drop it. Connect the app to the always-present databricks_postgres admin database instead; per-app isolation comes from the dedicated dqx_studio Postgres schema the app creates inside it on first start. The bundle stays fully declarative with no out-of-band bootstrap steps. - databricks.yml: remove database_catalogs.lakebase_db and the lakebase_uc_catalog_name variable. Default lakebase_database_name to 'databricks_postgres'. App's database: binding now references ${var.lakebase_database_name} directly. - Makefile: add BUNDLE_VARS forwarding to 'make app-deploy' so one-off CLI overrides (e.g. lakebase_instance_name during Lakebase's 7-day soft-delete name retention) don't require ad-hoc databricks.yml edits. - bind_resources.sh: pass --auto-approve explicitly (newer databricks CLI versions reject piped 'yes' confirmation). Drop the lakebase_db bind step (no longer a bundle resource). - Documentation updates across DEPLOYMENT.md, CLAUDE.md files, and installation.mdx to reflect the new layout: no separate logical-DB provisioning step, uninstall drops a Postgres schema instead of a database. --- Makefile | 13 +++- app/CLAUDE.md | 9 +-- app/DEPLOYMENT.md | 27 ++++---- app/databricks.yml | 66 +++++++++---------- app/scripts/bind_resources.sh | 36 +++++----- .../databricks_labs_dqx_app/backend/CLAUDE.md | 47 ++++++++----- docs/dqx/docs/installation.mdx | 11 +++- 7 files changed, 120 insertions(+), 89 deletions(-) diff --git a/Makefile b/Makefile index 9b48afb50..8589d1ac9 100644 --- a/Makefile +++ b/Makefile @@ -149,12 +149,21 @@ app-bind: # the existing resources and fail. # # Usage: make app-deploy PROFILE=my-profile TARGET=dev +# make app-deploy PROFILE=my-profile TARGET=dev \ +# BUNDLE_VARS='--var=lakebase_instance_name=' +# +# BUNDLE_VARS forwards arbitrary ``--var key=value`` arguments to +# ``bundle deploy`` and ``bundle run``. The common use case is +# overriding ``lakebase_instance_name`` when the original name is +# locked by Lakebase's ~7-day soft-delete retention after a manual +# delete (the deploy errors out with "Instance name is not unique" +# otherwise). Per-deploy CLI overrides keep ``databricks.yml`` clean. app-deploy: app-build @test -n "$(PROFILE)" || (echo "Usage: make app-deploy PROFILE= TARGET="; exit 1) @test -n "$(TARGET)" || (echo "Usage: make app-deploy PROFILE= TARGET="; exit 1) - cd app && databricks bundle deploy -p $(PROFILE) -t $(TARGET) + cd app && databricks bundle deploy -p $(PROFILE) -t $(TARGET) $(BUNDLE_VARS) app/scripts/post_deploy_grants.sh -p $(PROFILE) -t $(TARGET) - cd app && databricks bundle run $(APP_NAME) -p $(PROFILE) -t $(TARGET) + cd app && databricks bundle run $(APP_NAME) -p $(PROFILE) -t $(TARGET) $(BUNDLE_VARS) APP_NAME ?= dqx-studio diff --git a/app/CLAUDE.md b/app/CLAUDE.md index f0eec0006..471dd75d2 100644 --- a/app/CLAUDE.md +++ b/app/CLAUDE.md @@ -86,17 +86,18 @@ Lakebase database (when enabled, default = `dqx-studio-lakebase`): ## Bundle conventions -All stateful resources are declared in `databricks.yml`: +Stateful resources declared in `databricks.yml`: - `resources.schemas.main_schema` — `dqx_studio` schema - `resources.schemas.tmp_schema` — `dqx_studio_tmp` schema - `resources.volumes.wheels` — wheels volume - `resources.database_instances.lakebase` — Lakebase Postgres instance (autoscaling) -- `resources.database_catalogs.lakebase_db` — logical Postgres database (via `create_database_if_not_exists: true`) and a surrounding UC catalog (informational; the app connects directly via psycopg) -Every one of them carries `lifecycle.prevent_destroy: true` (Databricks CLI 0.268+), which blocks `databricks bundle destroy` and any deploy that would force-replace the resource. To intentionally tear something down: drop the flag, `databricks bundle deployment unbind -t `, then destroy. +Each carries `lifecycle.prevent_destroy: true` (Databricks CLI 0.268+), which blocks `databricks bundle destroy` and any deploy that would force-replace the resource. To intentionally tear something down: drop the flag, `databricks bundle deployment unbind -t `, then destroy. -For workspaces where these resources already exist (e.g. created out-of-band before this layout existed), run `make app-bind PROFILE=... TARGET=...` once per target to adopt them — otherwise `databricks bundle deploy` errors out with "already exists" / "Instance name is not unique". +The app connects to the always-present `databricks_postgres` admin database on the Lakebase instance (set as the default `lakebase_database_name`) and creates its own `dqx_studio` Postgres schema there on first start. No DAB resource is needed to provision a per-app logical database; the bundle stays fully declarative. We deliberately do not use `database_catalogs` because it also creates a Unity Catalog catalog and therefore requires `CREATE CATALOG` on the metastore — a permission most app deployers don't hold. + +For workspaces where the schemas / volume / Lakebase instance already exist (e.g. created out-of-band before this layout existed), run `make app-bind PROFILE=... TARGET=...` once per target to adopt them — otherwise `databricks bundle deploy` errors out with "already exists" / "Instance name is not unique". Privileges on UC objects for the auto-created app SP are still reapplied with `scripts/post_deploy_grants.sh` after each deploy, because the app SP's UUID isn't known at bundle-write time. diff --git a/app/DEPLOYMENT.md b/app/DEPLOYMENT.md index c54c3356e..ef40a3a92 100644 --- a/app/DEPLOYMENT.md +++ b/app/DEPLOYMENT.md @@ -22,7 +22,7 @@ The deploying user (you) needs the permissions below. They are **all** consumed | 2 | **Databricks SQL access** entitlement | You, in the workspace | `bundle deploy` (creates the X-Small SQL warehouse) | `Error: not authorized to create SQL Endpoint` | | 3 | **Allow cluster create** entitlement | You, in the workspace | `bundle deploy` (warehouse + job clusters) | Warehouse / job creation rejected | | 4 | **Databricks Apps: Can Manage** workspace permission | You, in the workspace | `bundle deploy` of the App resource | App creation rejected | -| 5 | **Databricks Database (Lakebase): Manager** entitlement | You, in the workspace | `bundle deploy` of the `database_instances` and `database_catalogs` resources | `Error: User does not have permission to create database instances` | +| 5 | **Databricks Database (Lakebase): Manager** entitlement | You, in the workspace | `bundle deploy` of the `database_instances` resource | `Error: User does not have permission to create database instances` | | 6 | **USE CATALOG** + **CREATE SCHEMA** on `` | Your user or an admin group you're in | `bundle deploy` of the `schemas` and `volumes` resources | `Error: User does not have CREATE_SCHEMA on catalog ''` | | 7 | **MANAGE** on `` (or be the catalog owner) | Your user or an admin group you're in | `post_deploy_grants.sh` (issues `GRANT USE CATALOG / ALL PRIVILEGES … TO ` and `… TO account users`) | `Error: User does not have privilege MANAGE on catalog ''` | | 8 | **Service Principal: User** role on the task-runner SP | Your user, on the SP you'll use as `dqx_service_principal_application_id` | `bundle deploy` of the `jobs.dqx_task_runner` resource (sets `run_as.service_principal_name`) | `Error: User is not authorized to use this service principal` | @@ -41,11 +41,11 @@ These are configured at the workspace or account level — not by you, not by th - **Databricks Apps** is enabled on the workspace - **User token passthrough** (a.k.a. user authorization / OBO) is enabled for Databricks Apps — see [Step 2](#step-2-enable-user-token-passthrough). Without this the app can't make OBO calls and Unity Catalog browsing fails. - **Serverless compute** is enabled on the workspace — the task-runner job runs exclusively on serverless -- **Lakebase Postgres** is enabled on the workspace (default OLTP backend). The Lakebase instance, logical Postgres database, and surrounding UC catalog are declared as bundle resources and provisioned by `databricks bundle deploy`. They carry `lifecycle.prevent_destroy: true` so a `bundle destroy` cannot drop them and wipe OLTP state — see [Stateful storage and destroy protection](#stateful-storage-and-destroy-protection). +- **Lakebase Postgres** is enabled on the workspace (default OLTP backend). The Lakebase instance is declared as a bundle resource (`resources.database_instances.lakebase`) with `lifecycle.prevent_destroy: true` so a `bundle destroy` cannot drop it and wipe OLTP state — see [Stateful storage and destroy protection](#step-3-stateful-storage-and-destroy-protection). The app connects to the always-present `databricks_postgres` admin database on the instance and creates its own `dqx_studio` Postgres schema inside it on first connection — no separate logical-DB provisioning step. ### The catalog must already exist -The bundle **does not create the catalog itself** — that's deliberate. Catalogs are typically owned by a governance team and creating them requires `CREATE CATALOG` on the metastore. Pick an existing catalog you (or an admin group you're in) have rights on, and set `catalog_name` in [Step 4](#step-4-configure-databricksyml). The bundle creates the schemas (`dqx_studio`, `dqx_studio_tmp`) and the wheels volume *inside* that catalog. The Lakebase-backed UC catalog (`dqx_studio_lakebase` by default) is created at the metastore level by the `database_catalogs` resource — you need `CREATE CATALOG` on the metastore for the first deploy if it doesn't already exist. +The bundle **does not create the catalog itself** — that's deliberate. Catalogs are typically owned by a governance team and creating them requires `CREATE CATALOG` on the metastore. Pick an existing catalog you (or an admin group you're in) have rights on, and set `catalog_name` in [Step 4](#step-4-configure-databricksyml). The bundle creates the schemas (`dqx_studio`, `dqx_studio_tmp`) and the wheels volume *inside* that catalog — no `CREATE CATALOG` permission required at the metastore level. ## Step 1: Create a Service Principal @@ -73,17 +73,19 @@ Contact your workspace admin or enable it via the workspace settings if not alre ## Step 3: Stateful storage and destroy protection -DQX Studio's stateful resources — the two schemas (`dqx_studio`, `dqx_studio_tmp`), the wheels volume, the Lakebase instance, and the Lakebase logical Postgres database — are all declared as bundle resources in `app/databricks.yml`. Each one carries `lifecycle.prevent_destroy: true` (Databricks CLI 0.268+), which **blocks `databricks bundle destroy` from dropping the resource** and wiping the data. Use this command line to verify: +DQX Studio's stateful resources — the two schemas (`dqx_studio`, `dqx_studio_tmp`), the wheels volume, and the Lakebase instance — are all declared as bundle resources in `app/databricks.yml`. Each one carries `lifecycle.prevent_destroy: true` (Databricks CLI 0.268+), which **blocks `databricks bundle destroy` from dropping the resource** and wiping the data. Use this command line to verify: ```bash grep -A1 'lifecycle:' app/databricks.yml | head ``` -You'll see one `prevent_destroy: true` for each of: `schemas.main_schema`, `schemas.tmp_schema`, `volumes.wheels`, `database_instances.lakebase`, `database_catalogs.lakebase_db`. +You'll see one `prevent_destroy: true` for each of: `schemas.main_schema`, `schemas.tmp_schema`, `volumes.wheels`, `database_instances.lakebase`. + +> **The app's `dqx_studio` Postgres schema** (inside the `databricks_postgres` admin database on the Lakebase instance) is created by the app at first start. It's stateful but lives below the resource layer DABs models, so `prevent_destroy` doesn't apply to it directly. The instance-level guard above is what protects it: as long as `database_instances.lakebase` survives, the schema and its tables survive. What this means in practice: -- **Fresh workspace** — `databricks bundle deploy` creates everything in the right order (schemas → volume → Lakebase instance → Lakebase logical DB → SQL warehouse → job → app). No extra bootstrap step. +- **Fresh workspace** — `make app-deploy` does everything in one command: `databricks bundle deploy` provisions the schemas → volume → Lakebase instance → SQL warehouse → job → app in dependency order, then `post_deploy_grants.sh` issues catalog/schema/volume GRANTs. - **Existing workspace** where these resources were created out-of-band (e.g. from a previous version of this app that used a bootstrap script) — you must **bind** them into bundle management once per target. See [Migrating an existing workspace](#migrating-an-existing-workspace). - **Schema drift** — if you change `catalog_name`, `schema_name`, or `lakebase_instance_name` in a way that would force the bundle to delete and recreate the resource, `prevent_destroy` blocks the destroy step and the deploy fails fast (good — the alternative is silent data loss). Treat those names as immutable. - **Intentional teardown** — to drop a protected resource, remove `lifecycle.prevent_destroy: true` from `databricks.yml`, run `databricks bundle deployment unbind -t ` to detach it from bundle state, then destroy it manually. @@ -129,11 +131,10 @@ All target-level variables, their defaults, and what they control: | `tmp_schema_name` | `dqx_studio_tmp` | No | Per-user temp-view schema. Declared as `resources.schemas.tmp_schema` with `lifecycle.prevent_destroy: true`. | | `wheels_volume_name` | `wheels` | No | UC volume under `.` for the DQX + task-runner wheels. Declared as `resources.volumes.wheels` with `lifecycle.prevent_destroy: true`. | | `lakebase_instance_name` | `dqx-studio-lakebase` | No | Lakebase Postgres instance for OLTP state. Declared as `resources.database_instances.lakebase` with `lifecycle.prevent_destroy: true`. Autoscaling by default per [Lakebase Autoscaling](https://docs.databricks.com/aws/en/oltp/upgrade-to-autoscaling). | -| `lakebase_database_name` | `dqx_studio` | No | Logical Postgres database inside the Lakebase instance. Created by `resources.database_catalogs.lakebase_db` (`create_database_if_not_exists: true`). | -| `lakebase_uc_catalog_name` | `dqx_studio_lakebase` | No | UC catalog created by the `database_catalogs` resource. The app connects to Postgres directly via psycopg, so this UC catalog is informational only — it lets you ad-hoc query the Postgres tables via UC SQL. | +| `lakebase_database_name` | `databricks_postgres` | No | Logical Postgres database inside the Lakebase instance the app connects to. Defaults to `databricks_postgres` (always present, no provisioning step). All DQX tables live in a dedicated `dqx_studio` Postgres schema inside this database, so multiple apps can safely share the same `databricks_postgres` on one Lakebase instance. Override only if you've manually created a different logical DB you want to use. | | `lakebase_capacity` | `CU_1` | No | Lakebase compute capacity. Valid values: `CU_1`, `CU_2`, `CU_4`, `CU_8`. To resize an existing instance, change this value and redeploy. Bump up if Lakebase queries queue in the app logs. | -> **Note on duplicate names in Databricks:** SQL warehouses, jobs, and apps within the same workspace are tracked by ID, not by name, so technically duplicates are allowed. Lakebase database instances are looked up by name in the bootstrap script, so they're effectively unique-per-workspace. Operators browse the Jobs / Apps / Warehouses / Databases UI by name, so distinct names per target are strongly recommended when you deploy more than one target to the same workspace. +> **Note on duplicate names in Databricks:** SQL warehouses, jobs, and apps within the same workspace are tracked by ID, not by name, so technically duplicates are allowed. Lakebase database instances are looked up by name by the app at runtime, so they're effectively unique-per-workspace. Operators browse the Jobs / Apps / Warehouses / Databases UI by name, so distinct names per target are strongly recommended when you deploy more than one target to the same workspace. ## Step 5: One-Command Deploy (recommended) @@ -150,7 +151,7 @@ make app-deploy PROFILE= TARGET= `make app-deploy` runs the following steps automatically: 1. `make app-build` — builds the frontend and wheels. -2. `databricks bundle deploy` — provisions or updates the schemas, wheels volume, Lakebase instance, Lakebase logical Postgres database, SQL warehouse, task-runner job, and Databricks App in dependency order. Stateful resources carry `lifecycle.prevent_destroy: true` so a future destroy can't drop them — see [Step 3](#step-3-stateful-storage-and-destroy-protection). +2. `databricks bundle deploy` — provisions or updates the schemas, wheels volume, Lakebase instance, SQL warehouse, task-runner job, and Databricks App in dependency order. Stateful resources carry `lifecycle.prevent_destroy: true` so a future destroy can't drop them — see [Step 3](#step-3-stateful-storage-and-destroy-protection). 3. `app/scripts/post_deploy_grants.sh` — discovers both service principals and executes the `GRANT` statements on the catalog, schemas, and volume (the auto-created app SP's UUID isn't known at bundle-write time, which is why grants live in a post-deploy script). Lakebase grants are handled by the bundle's `database` resource binding. 4. `databricks bundle run` — starts the app. @@ -167,8 +168,8 @@ make app-build # (One-time, only on a workspace whose storage was created out-of-band) make app-bind PROFILE= TARGET= -# Deploy the bundle (creates / updates all resources, including -# schemas, volume, Lakebase instance and logical DB) +# Deploy the bundle (creates / updates schemas, volume, Lakebase +# instance, SQL warehouse, task-runner job, app) cd app && databricks bundle deploy -p -t # Grant permissions to the app SP (auto-discovered after deploy) @@ -220,7 +221,7 @@ DQX Studio stores its **OLTP state** — rules catalog, app settings, RBAC, comm | Delta Lake | `dq_validation_runs`, `dq_profiling_results`, `dq_quarantine_records`, `dq_metrics` | High-volume append; Spark task runner writes them; columnar reads. | | Lakebase Postgres | `dq_app_settings`, `dq_role_mappings`, `dq_quality_rules`, `dq_quality_rules_history`, `dq_comments`, `dq_schedule_configs`, `dq_schedule_configs_history`, `dq_schedule_runs` | OLTP — sub-ms reads from FastAPI handlers, row-level upserts, primary keys. | -The Lakebase instance, logical Postgres database, and surrounding UC catalog are declared as bundle resources (`database_instances.lakebase`, `database_catalogs.lakebase_db`) and provisioned by `databricks bundle deploy`. All three carry `lifecycle.prevent_destroy: true` — see [Step 3](#step-3-stateful-storage-and-destroy-protection). +The Lakebase instance is declared as a bundle resource (`database_instances.lakebase`) and provisioned by `databricks bundle deploy` with `lifecycle.prevent_destroy: true` — see [Step 3](#step-3-stateful-storage-and-destroy-protection). The app connects to the always-present `databricks_postgres` admin database on the instance and creates its own `dqx_studio` Postgres schema there on first start; nothing else needs to be provisioned. ### Lakebase token rotation diff --git a/app/databricks.yml b/app/databricks.yml index 6c075a76f..c17447c18 100644 --- a/app/databricks.yml +++ b/app/databricks.yml @@ -38,25 +38,33 @@ variables: # dq_quarantine_records) stay in Delta — those are written by the # Spark task runner and consumed by AI/BI dashboards. # - # The Lakebase instance and its logical Postgres database are - # declared as bundle resources below with + # The Lakebase instance is declared as a bundle resource below with # ``lifecycle.prevent_destroy: true``. New Lakebase instances are # autoscaling by default (see # https://docs.databricks.com/aws/en/oltp/upgrade-to-autoscaling). + # + # Connection target: ``databricks_postgres`` — the always-present + # admin database every Lakebase instance ships with. We don't create + # a per-app logical Postgres database because the only DAB resource + # that creates one (``database_catalogs``) also creates a Unity + # Catalog catalog and therefore requires ``CREATE CATALOG`` on the + # metastore — a permission most app deployers don't hold. Using + # ``databricks_postgres`` keeps the bundle fully declarative with no + # external bootstrap steps, and we still get per-app isolation via a + # dedicated Postgres schema (``DQX_LAKEBASE_SCHEMA_NAME`` in the app + # config, defaults to ``dqx_studio``). lakebase_instance_name: description: "Name of the Lakebase Postgres instance for OLTP state." default: "dqx-studio-lakebase" lakebase_database_name: - description: "Logical Postgres database the app connects to inside the Lakebase instance." - default: "dqx_studio" - lakebase_uc_catalog_name: description: > - UC catalog created by the database_catalogs resource. ``database_catalogs`` - surfaces the Lakebase Postgres database in Unity Catalog (so it's queryable - via UC SQL) and is the only DAB resource that can create the logical - Postgres database — that's why we use it. The app itself connects to - Postgres directly via psycopg, so this UC catalog is informational only. - default: "dqx_studio_lakebase" + Logical Postgres database the app connects to inside the Lakebase + instance. Defaults to ``databricks_postgres`` (always present, no + creation required). All DQX tables live inside a dedicated + ``dqx_studio`` schema within this database for isolation, so + multiple apps can safely share ``databricks_postgres`` on the + same Lakebase instance. + default: "databricks_postgres" lakebase_capacity: description: "Lakebase compute capacity (CU_1, CU_2, CU_4, CU_8)" default: "CU_1" @@ -88,10 +96,12 @@ variables: value: "${var.admin_group}" - name: "DQX_WHEELS_VOLUME" value: "/Volumes/${var.catalog_name}/${var.schema_name}/${var.wheels_volume_name}" - # Lakebase wiring. The instance and logical Postgres database - # are declared as bundle resources further down (with - # ``lifecycle.prevent_destroy: true`` so ``bundle destroy`` - # cannot drop them). + # Lakebase wiring. The instance is declared as a bundle + # resource further down (with ``lifecycle.prevent_destroy: true`` + # so ``bundle destroy`` cannot drop it). The logical database + # defaults to the always-present ``databricks_postgres``; the + # app creates its ``dqx_studio`` Postgres schema inside it on + # first connection. - name: "DQX_LAKEBASE_INSTANCE_NAME" value: "${var.lakebase_instance_name}" - name: "DQX_LAKEBASE_DATABASE_NAME" @@ -163,13 +173,17 @@ resources: permission: "CAN_MANAGE" # Lakebase Postgres for OLTP state (rules catalog, app # settings, RBAC, comments, schedule configs, scheduler - # bookkeeping). The instance + logical Postgres database are - # declared as bundle resources below; this entry binds the app - # to them so DABs sets up CAN_CONNECT_AND_CREATE automatically. + # bookkeeping). The instance is declared as a bundle resource + # below; the database defaults to the always-present + # ``databricks_postgres`` admin DB. This entry binds the app + # to the (instance, database) pair so DABs configures + # ``CAN_CONNECT_AND_CREATE`` automatically — that lets the app + # SP create its ``dqx_studio`` schema inside the bound DB on + # first connection. - name: "dqx-lakebase" description: "Lakebase Postgres backend for app OLTP state" database: - database_name: ${resources.database_catalogs.lakebase_db.database_name} + database_name: ${var.lakebase_database_name} instance_name: ${resources.database_instances.lakebase.name} permission: "CAN_CONNECT_AND_CREATE" @@ -235,19 +249,6 @@ resources: lifecycle: prevent_destroy: true - database_catalogs: - # Creates the logical Postgres database AND a Unity Catalog catalog - # that surfaces it for ad-hoc UC SQL queries. The app itself - # connects to Postgres directly via psycopg; the UC catalog is - # informational and not on the request path. - lakebase_db: - database_instance_name: ${resources.database_instances.lakebase.name} - name: ${var.lakebase_uc_catalog_name} - database_name: ${var.lakebase_database_name} - create_database_if_not_exists: true - lifecycle: - prevent_destroy: true - jobs: dqx_task_runner: name: "dqx-studio-task-runner" @@ -321,7 +322,6 @@ targets: # # wheels_volume_name: # # lakebase_instance_name: # # lakebase_database_name: - # # lakebase_uc_catalog_name: # # lakebase_capacity: CU_1 | CU_2 | CU_4 | CU_8 # presets: # trigger_pause_status: PAUSED diff --git a/app/scripts/bind_resources.sh b/app/scripts/bind_resources.sh index a3cb10128..e8e4d1eac 100755 --- a/app/scripts/bind_resources.sh +++ b/app/scripts/bind_resources.sh @@ -2,12 +2,17 @@ # # One-time binding of pre-existing storage resources to the bundle. # -# Use this when the schemas / volume / Lakebase instance / Lakebase -# logical database already exist in the target workspace (e.g. from -# the previous bootstrap-script flow, or from manual creation) and -# you're adopting them into the bundle for the first time. Without -# binding, ``databricks bundle deploy`` tries to CREATE the resources -# and fails with "already exists". +# Use this when the schemas / volume / Lakebase instance already +# exist in the target workspace (e.g. from the previous bootstrap- +# script flow, or from manual creation) and you're adopting them into +# the bundle for the first time. Without binding, ``databricks bundle +# deploy`` tries to CREATE the resources and fails with "already +# exists". +# +# The app connects to the always-present ``databricks_postgres`` admin +# database on the Lakebase instance (no separate logical database to +# provision or bind), and creates its ``dqx_studio`` Postgres schema +# there on first start. # # Bind is idempotent at the CLI level; re-running this script on a # fully-bound workspace is a no-op (the CLI replies "already bound"). @@ -67,7 +72,6 @@ SCHEMA=$(echo "$BUNDLE_JSON" | jq -r '.variables.schema_name.value // .variables TMP_SCHEMA=$(echo "$BUNDLE_JSON" | jq -r '.variables.tmp_schema_name.value // .variables.tmp_schema_name.default // "dqx_studio_tmp"') VOLUME=$(echo "$BUNDLE_JSON" | jq -r '.variables.wheels_volume_name.value // .variables.wheels_volume_name.default // "wheels"') LB_INSTANCE=$(echo "$BUNDLE_JSON" | jq -r '.variables.lakebase_instance_name.value // .variables.lakebase_instance_name.default // empty') -LB_UC_CATALOG=$(echo "$BUNDLE_JSON" | jq -r '.variables.lakebase_uc_catalog_name.value // .variables.lakebase_uc_catalog_name.default // empty') if [[ -z "$CATALOG" ]]; then echo "ERROR: catalog_name is not configured in the bundle target." >&2 @@ -80,19 +84,20 @@ echo " Main schema: $SCHEMA" echo " Tmp schema: $TMP_SCHEMA" echo " Volume: $VOLUME" echo " Lakebase: $LB_INSTANCE" -echo " Lakebase UC cat: $LB_UC_CATALOG" echo "" -# ``databricks bundle deployment bind`` is interactive by default. We -# pipe ``yes`` so this runs unattended; the prompt only confirms that -# updates to the resource in the bundle will be applied to the -# existing remote resource on the next deploy — which is exactly what -# we want, so auto-confirming is safe. +# ``databricks bundle deployment bind`` requires explicit ``--auto-approve`` +# when stdin is not a TTY (newer CLI versions stopped accepting piped +# ``yes`` and now error out with "current console does not support +# prompting"). The confirmation it would otherwise prompt for is just +# "apply bundle resource updates to the existing remote resource on the +# next deploy" — which is exactly what we want, so auto-approving is +# safe. bind() { local key="$1" local id="$2" echo " binding ${key} -> ${id}" - if ! yes | $CLI bundle deployment bind "$key" "$id" "${BUNDLE_FLAGS[@]}"; then + if ! $CLI bundle deployment bind "$key" "$id" --auto-approve "${BUNDLE_FLAGS[@]}"; then echo " WARNING: bind for ${key} failed. It may already be bound, or the remote resource may not exist yet." >&2 fi } @@ -104,9 +109,6 @@ bind wheels "${CATALOG}.${SCHEMA}.${VOLUME}" if [[ -n "$LB_INSTANCE" ]]; then bind lakebase "$LB_INSTANCE" fi -if [[ -n "$LB_UC_CATALOG" ]]; then - bind lakebase_db "$LB_UC_CATALOG" -fi echo "" echo "==> Bind complete. Run 'make app-deploy PROFILE=$PROFILE TARGET=$TARGET' next." diff --git a/app/src/databricks_labs_dqx_app/backend/CLAUDE.md b/app/src/databricks_labs_dqx_app/backend/CLAUDE.md index 245479fa8..7b6b46fe7 100644 --- a/app/src/databricks_labs_dqx_app/backend/CLAUDE.md +++ b/app/src/databricks_labs_dqx_app/backend/CLAUDE.md @@ -242,7 +242,7 @@ to their native syntax. ### Bundle / DAB conventions -All stateful resources are declared in `databricks.yml` with +Stateful resources declared in `databricks.yml` with `lifecycle.prevent_destroy: true` (Databricks CLI 0.268+): * `resources.schemas.main_schema` — `dqx_studio` schema @@ -250,25 +250,38 @@ All stateful resources are declared in `databricks.yml` with * `resources.volumes.wheels` — wheels volume * `resources.database_instances.lakebase` — Lakebase Postgres instance (autoscaling by default per [Lakebase Autoscaling](https://docs.databricks.com/aws/en/oltp/upgrade-to-autoscaling)) -* `resources.database_catalogs.lakebase_db` — logical Postgres database - via `create_database_if_not_exists: true`, plus a surrounding Unity - Catalog catalog (informational only; the app connects to Postgres - directly via psycopg) -The app→database binding stays in `resources.apps.dqx-studio.resources`, -referencing the bundle resources so DABs orders the deploy correctly -(instance + logical DB created before the app binds to them). +The app connects to the always-present `databricks_postgres` admin +database on the Lakebase instance — that's the default value of +`lakebase_database_name` and the value the app→database binding +wires up. On first start, the app creates its own `dqx_studio` +Postgres schema inside `databricks_postgres` and runs migrations +against it. Multiple apps can therefore share the same +`databricks_postgres` on one Lakebase instance safely; each gets its +own schema namespace. + +The bundle deliberately does NOT use `database_catalogs`. That DAB +resource is the only way to *create* a custom logical Postgres +database, but it also creates a Unity Catalog catalog as a side +effect and therefore requires `CREATE CATALOG` on the metastore — a +permission most app deployers don't hold. Connecting to the +pre-existing `databricks_postgres` instead keeps the bundle fully +declarative with no out-of-band bootstrap step and no metastore-level +permissions assumed. `prevent_destroy` blocks `databricks bundle destroy` and any deploy -that would force-replace the resource — the alternative is silent data -loss. To intentionally tear something down: remove the flag, run -`databricks bundle deployment unbind `, then destroy. - -For workspaces where the resources were provisioned out-of-band before -this layout existed (e.g. by the legacy bootstrap script), one-time -binding is required: `make app-bind PROFILE=... TARGET=...`. After bind, -`bundle deploy` adopts the existing resources instead of trying to -CREATE them. +that would force-replace a bundle-managed resource — the alternative +is silent data loss. To intentionally tear one down: remove the flag, +run `databricks bundle deployment unbind `, then destroy. The +app's `dqx_studio` Postgres schema lives below the resource layer +DABs models, so `prevent_destroy` doesn't apply to it directly; the +instance-level guard is what protects it. + +For workspaces where the schemas, volume, or Lakebase instance were +provisioned out-of-band (e.g. by the legacy bootstrap script), +one-time binding is required: `make app-bind PROFILE=... TARGET=...`. +After bind, `bundle deploy` adopts the existing resources instead of +trying to CREATE them. Privileges on UC objects for the auto-created app SP are still applied by `scripts/post_deploy_grants.sh` after each deploy — the app SP's diff --git a/docs/dqx/docs/installation.mdx b/docs/dqx/docs/installation.mdx index db0631e2c..cf9fce93b 100644 --- a/docs/dqx/docs/installation.mdx +++ b/docs/dqx/docs/installation.mdx @@ -519,7 +519,7 @@ Database migrations run automatically on app startup and preserve existing rules ### Uninstall DQX Studio -`databricks bundle destroy` removes the app, the task-runner job, and the SQL warehouse, but is **blocked** from dropping the stateful resources (schemas, volume, Lakebase instance, Lakebase logical DB) by their `prevent_destroy` flag. To fully uninstall, you have to opt-in to the destroy: +`databricks bundle destroy` removes the app, the task-runner job, and the SQL warehouse, but is **blocked** from dropping the bundle-managed stateful resources (schemas, volume, Lakebase instance) by their `prevent_destroy` flag. The DQX `dqx_studio` Postgres schema inside `databricks_postgres` lives below the resource layer DABs models and is therefore unaffected by `bundle destroy` — drop it manually if you want a fully clean wipe. To fully uninstall: 1. Edit `app/databricks.yml` and remove `lifecycle.prevent_destroy: true` from each of the stateful resources you want to drop. 2. Unbind the resources so the bundle no longer tracks them, then destroy: @@ -528,17 +528,22 @@ Database migrations run automatically on app startup and preserve existing rules databricks bundle deployment unbind main_schema -t databricks bundle deployment unbind tmp_schema -t databricks bundle deployment unbind wheels -t - databricks bundle deployment unbind lakebase_db -t databricks bundle deployment unbind lakebase -t databricks bundle destroy -p -t ``` -3. Drop the now-unbound resources manually if desired: +3. Drop the now-unbound UC resources manually if desired: ```sql DROP VOLUME IF EXISTS .dqx_studio.wheels; DROP SCHEMA IF EXISTS .dqx_studio CASCADE; DROP SCHEMA IF EXISTS .dqx_studio_tmp CASCADE; ``` +4. Drop the DQX Postgres schema and the Lakebase instance if desired: ```bash + # DQX schema inside ``databricks_postgres`` (connect to the + # Lakebase instance with psql via ``databricks_postgres``): + # DROP SCHEMA IF EXISTS dqx_studio CASCADE; + + # Lakebase instance: databricks database delete-database-instance dqx-studio-lakebase -p ``` From 8c8f78579c95e1dbdab7d22ce4f84a05fb12787e Mon Sep 17 00:00:00 2001 From: Tasha Date: Tue, 12 May 2026 16:26:20 +0200 Subject: [PATCH 03/12] =?UTF-8?q?docs:=20bump=20v0.13.0=20=E2=86=92=20v0.1?= =?UTF-8?q?4.0=20GitHub=20URLs=20to=20satisfy=20fmt=20check?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit make fmt rewrites GitHub source URLs to match the version in __about__.py. CI merges this branch onto main (now at v0.14.0) and the lingering v0.13.0 refs added on this branch trip git diff --exit-code. Bump them to v0.14.0 explicitly. --- docs/dqx/docs/dev/contributing.mdx | 6 +++--- docs/dqx/docs/installation.mdx | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/dqx/docs/dev/contributing.mdx b/docs/dqx/docs/dev/contributing.mdx index 3da30d43d..7c89a3245 100644 --- a/docs/dqx/docs/dev/contributing.mdx +++ b/docs/dqx/docs/dev/contributing.mdx @@ -203,9 +203,9 @@ DQX_JOB_ID= # optional locally; required for DQX_LAKEBASE_INSTANCE_NAME= # optional locally; empty = OLTP tables run on Delta ``` -Leave `DQX_LAKEBASE_INSTANCE_NAME` empty for most local dev — the app falls back to Delta for the OLTP tables (rules catalog, app settings, RBAC, comments, schedules) so you don't need a Lakebase instance to iterate. To exercise the Lakebase path locally, deploy the bundle once and point the variable at the resulting instance. See [app/DEVELOPMENT.md](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEVELOPMENT.md) for the full set of `DQX_LAKEBASE_*` variables. +Leave `DQX_LAKEBASE_INSTANCE_NAME` empty for most local dev — the app falls back to Delta for the OLTP tables (rules catalog, app settings, RBAC, comments, schedules) so you don't need a Lakebase instance to iterate. To exercise the Lakebase path locally, deploy the bundle once and point the variable at the resulting instance. See [app/DEVELOPMENT.md](https://github.com/databrickslabs/dqx/blob/v0.14.0/app/DEVELOPMENT.md) for the full set of `DQX_LAKEBASE_*` variables. -If you don't have a profile yet, run `databricks auth login --host -p ` first. See the [Development Mode](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/README.md#development-mode) section of the app README for more detail. +If you don't have a profile yet, run `databricks auth login --host -p ` first. See the [Development Mode](https://github.com/databrickslabs/dqx/blob/v0.14.0/app/README.md#development-mode) section of the app README for more detail. The **profiler** and **dry-run** features rely on a Databricks Job (`dqx-app-task-runner`) that only exists after you deploy the app bundle to a workspace. For local UI and backend development (routes, components, auth wiring, config), you can skip this — `DQX_JOB_ID` is not required and the app will start without it. All other features will work locally. @@ -230,7 +230,7 @@ Deploying DQX Studio to a workspace is required when you want to: - verify a change behaves correctly under the production identity model (service principal + on-behalf-of), or - run a review pass against a deployed app before merging. -For the full step-by-step (service principal creation, one-time storage bootstrap, asset-bundle deploy, Lakebase opt-out, post-deploy grants, app start, troubleshooting) follow [app/DEPLOYMENT.md](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEPLOYMENT.md). +For the full step-by-step (service principal creation, one-time storage bootstrap, asset-bundle deploy, Lakebase opt-out, post-deploy grants, app start, troubleshooting) follow [app/DEPLOYMENT.md](https://github.com/databrickslabs/dqx/blob/v0.14.0/app/DEPLOYMENT.md). ### Running integration tests and code coverage diff --git a/docs/dqx/docs/installation.mdx b/docs/dqx/docs/installation.mdx index cf9fce93b..e6b503199 100644 --- a/docs/dqx/docs/installation.mdx +++ b/docs/dqx/docs/installation.mdx @@ -476,7 +476,7 @@ Databricks CLI will confirm a few options: trigger_pause_status: PAUSED ``` - See the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEPLOYMENT.md#step-4-configure-databricksyml) for the full reference of each variable, including security implications of `admin_group` and when to override per target. + See the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.14.0/app/DEPLOYMENT.md#step-4-configure-databricksyml) for the full reference of each variable, including security implications of `admin_group` and when to override per target. 4. (One-time, only on a workspace whose schemas / volume / Lakebase instance were created out-of-band before adopting this layout) adopt them into bundle management: ```commandline @@ -496,7 +496,7 @@ Databricks CLI will confirm a few options: The studio's schemas (`dqx_studio`, `dqx_studio_tmp`), wheels volume, Lakebase Postgres instance, and the logical Postgres database inside it are all declared as bundle resources with `lifecycle.prevent_destroy: true` (Databricks CLI 0.268+). `databricks bundle destroy` is blocked from dropping them, so production data survives accidental destroy/replace operations. To intentionally tear something down, remove the flag, `databricks bundle deployment unbind `, then destroy manually. -For the full walkthrough — including step-by-step commands, manual `GRANT` statements, troubleshooting, target-specific configuration, and the bind workflow for adopting existing resources — see the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEPLOYMENT.md). +For the full walkthrough — including step-by-step commands, manual `GRANT` statements, troubleshooting, target-specific configuration, and the bind workflow for adopting existing resources — see the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.14.0/app/DEPLOYMENT.md). DQX Studio splits its data across two physical backends: high-volume append-mostly tables (`dq_validation_runs`, `dq_profiling_results`, `dq_quarantine_records`, `dq_metrics`) live in **Delta Lake** because they're written by Spark; transactional tables (rules catalog, app settings, RBAC, comments, schedule configs) live in **Lakebase Postgres** for fast row-level reads/writes from the FastAPI request handlers. From 71563f95c1911767a9b3ad65fbc49b6bfb0a006b Mon Sep 17 00:00:00 2001 From: Tasha Date: Tue, 12 May 2026 16:29:51 +0200 Subject: [PATCH 04/12] fix(deps): make OltpExecutor a real Union type alias and actually use it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous OltpExecutor = "SqlExecutor | PgExecutor" was just a string assignment, not a type alias — mypy/pyright treated it as str. Switch to Union["SqlExecutor", "PgExecutor"] (string forward refs because PgExecutor is TYPE_CHECKING-only) and use OltpExecutor | None for _pg_executor and the get/set helpers, so type checkers actually enforce the parity contract between the two executors. --- .../backend/dependencies.py | 29 +++++++++---------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/app/src/databricks_labs_dqx_app/backend/dependencies.py b/app/src/databricks_labs_dqx_app/backend/dependencies.py index beff16cfc..a275a8983 100644 --- a/app/src/databricks_labs_dqx_app/backend/dependencies.py +++ b/app/src/databricks_labs_dqx_app/backend/dependencies.py @@ -4,7 +4,7 @@ import hashlib import os from collections.abc import Callable -from typing import TYPE_CHECKING, Annotated, Any, cast +from typing import TYPE_CHECKING, Annotated, Any, Union, cast if TYPE_CHECKING: from .common.connectors.sql import SQLConnector @@ -34,24 +34,21 @@ if TYPE_CHECKING: from .pg_executor import PgExecutor -# Type alias used by every OLTP-touching service: either the legacy -# Delta-backed :class:`SqlExecutor` or, when Lakebase is enabled, the -# :class:`PgExecutor`. The two classes share the public surface +# Union of the two executors that share the OLTP service surface # (``execute``, ``query``, ``query_dicts``, ``upsert``, ``q``, -# ``json_literal_expr``, ``ts_text``, ``dialect``) so service code can -# stay backend-agnostic. -OltpExecutor = "SqlExecutor | PgExecutor" +# ``json_literal_expr``, ``ts_text``, ``dialect``). String forward refs +# because ``PgExecutor`` is TYPE_CHECKING-only to avoid importing psycopg +# at module load when Lakebase isn't configured. +OltpExecutor = Union["SqlExecutor", "PgExecutor"] -# Process-wide OLTP executor (Lakebase Postgres). Constructed once at -# app startup by ``app.lifespan`` and re-used across all requests so -# the psycopg connection pool isn't rebuilt per call. ``None`` means -# Lakebase is not configured and the legacy Delta executor handles -# OLTP traffic instead. Lower-cased to keep basedpyright from -# flagging it as an immutable module-level constant. -_pg_executor: "SqlExecutor | PgExecutor | None" = None +# Process-wide OLTP executor. Constructed once at app startup by +# ``app.lifespan`` and reused across all requests so the psycopg pool +# isn't rebuilt per call. ``None`` means Lakebase is not configured and +# the legacy Delta executor handles OLTP traffic instead. +_pg_executor: OltpExecutor | None = None -def set_oltp_executor(executor: "SqlExecutor | PgExecutor | None") -> None: +def set_oltp_executor(executor: OltpExecutor | None) -> None: """Register (or clear) the process-wide OLTP executor. Called from :func:`backend.app.lifespan` after the connection pool @@ -63,7 +60,7 @@ def set_oltp_executor(executor: "SqlExecutor | PgExecutor | None") -> None: _pg_executor = executor -def get_oltp_executor() -> "SqlExecutor | PgExecutor | None": +def get_oltp_executor() -> OltpExecutor | None: """Return the registered OLTP executor or ``None`` if Lakebase is off.""" return _pg_executor From 87c8d38313d44e5d84b8e3b21e41b6dc55dd09cb Mon Sep 17 00:00:00 2001 From: Tasha Date: Tue, 12 May 2026 17:56:59 +0200 Subject: [PATCH 05/12] refactor(app): atomic Pg migrations, fqn() helper, bind/wheel script hardening, plus PgMigrationRunner unit tests and CI fixes --- Makefile | 10 +- app/scripts/_align_wheel_version.py | 15 +- app/scripts/bind_resources.sh | 42 ++- app/src/databricks_labs_dqx_app/_metadata.py | 2 +- .../backend/migrations/postgres.py | 51 ++- .../backend/pg_executor.py | 29 +- .../backend/services/app_settings_service.py | 5 +- .../backend/services/comments_service.py | 5 +- .../backend/services/role_service.py | 5 +- .../backend/services/rules_catalog_service.py | 18 +- .../services/schedule_config_service.py | 8 +- .../backend/services/scheduler_service.py | 16 +- .../backend/sql_executor.py | 14 + app/tests/test_pg_migration_runner.py | 304 ++++++++++++++++++ app/tests/test_retention.py | 5 +- docs/dqx/docs/dev/contributing.mdx | 6 +- docs/dqx/docs/installation.mdx | 4 +- tests/unit/test_app_backend.py | 8 +- 18 files changed, 469 insertions(+), 78 deletions(-) create mode 100644 app/tests/test_pg_migration_runner.py diff --git a/Makefile b/Makefile index 8589d1ac9..808300590 100644 --- a/Makefile +++ b/Makefile @@ -136,10 +136,18 @@ app-grant-permissions: # creates the resources directly. # # Usage: make app-bind PROFILE=my-profile TARGET=dev +# make app-bind PROFILE=my-profile TARGET=dev \ +# BUNDLE_VARS='--var=lakebase_instance_name=' +# +# BUNDLE_VARS forwards arbitrary ``--var key=value`` arguments through +# to the bundle CLI. Use the same override here as you intend to pass +# to ``make app-deploy`` — the bind step reads the resolved bundle +# variables to know which instance/schema name to bind to, so an +# override applied only at deploy time would bind the wrong resource. app-bind: @test -n "$(PROFILE)" || (echo "Usage: make app-bind PROFILE= TARGET="; exit 1) @test -n "$(TARGET)" || (echo "Usage: make app-bind PROFILE= TARGET="; exit 1) - app/scripts/bind_resources.sh -p $(PROFILE) -t $(TARGET) + app/scripts/bind_resources.sh -p $(PROFILE) -t $(TARGET) $(if $(BUNDLE_VARS),-- $(BUNDLE_VARS)) # Full deploy: build, bundle deploy (creates storage on fresh # workspaces, updates managed resources otherwise), grant permissions diff --git a/app/scripts/_align_wheel_version.py b/app/scripts/_align_wheel_version.py index 3e57331b3..aa7ed3375 100644 --- a/app/scripts/_align_wheel_version.py +++ b/app/scripts/_align_wheel_version.py @@ -102,9 +102,18 @@ def align(wheel_path: Path) -> None: stem = old_dist_info.name[: -len(".dist-info")] existing_prefix = stem.rsplit("-", 1)[0] new_dist_info = tmp / f"{existing_prefix}-{filename_version}.dist-info" - if new_dist_info.exists(): - shutil.rmtree(new_dist_info) - old_dist_info.rename(new_dist_info) + # Skip the rename when the dist-info dir is already correctly + # named (e.g. apx didn't inject a build tag on this rebuild so + # the filename version matches what's already inside the + # wheel). Without this guard, the ``rmtree`` below would delete + # the source directory — both paths point at the same dir — and + # the subsequent ``rename`` would crash with FileNotFoundError. + # The METADATA/RECORD rewrites further down are themselves + # idempotent, so we still run them to repair any drift. + if new_dist_info != old_dist_info: + if new_dist_info.exists(): + shutil.rmtree(new_dist_info) + old_dist_info.rename(new_dist_info) metadata = new_dist_info / "METADATA" if not metadata.exists(): diff --git a/app/scripts/bind_resources.sh b/app/scripts/bind_resources.sh index e8e4d1eac..a105ef427 100755 --- a/app/scripts/bind_resources.sh +++ b/app/scripts/bind_resources.sh @@ -44,9 +44,18 @@ while getopts "p:t:" opt; do *) usage ;; esac done +shift $((OPTIND - 1)) [[ -z "$PROFILE" || -z "$TARGET" ]] && usage +# Everything after a ``--`` separator is forwarded to every bundle +# subcommand as extra ``--var key=value`` overrides. Threading them +# into ``bundle validate`` matters: that call is what produces the +# JSON we parse below to learn which instance/schema name to bind to. +# Without forwarding, a deploy-time override would bind the wrong +# resource and the next deploy would still see a state-vs-config drift. +EXTRA_VARS=("$@") + CLI="databricks -p $PROFILE" BUNDLE_FLAGS=(-t "$TARGET") @@ -61,7 +70,7 @@ cd "$BUNDLE_DIR" # --------------------------------------------------------------------------- BUNDLE_VALIDATE_STDERR=$(mktemp) trap 'rm -f "$BUNDLE_VALIDATE_STDERR"' EXIT -if ! BUNDLE_JSON=$($CLI bundle validate "${BUNDLE_FLAGS[@]}" -o json 2>"$BUNDLE_VALIDATE_STDERR"); then +if ! BUNDLE_JSON=$($CLI bundle validate "${BUNDLE_FLAGS[@]}" "${EXTRA_VARS[@]}" -o json 2>"$BUNDLE_VALIDATE_STDERR"); then echo "ERROR: 'databricks bundle validate' failed:" >&2 cat "$BUNDLE_VALIDATE_STDERR" >&2 exit 1 @@ -93,12 +102,39 @@ echo "" # "apply bundle resource updates to the existing remote resource on the # next deploy" — which is exactly what we want, so auto-approving is # safe. +# +# Bind is idempotent on the CLI side: re-binding an already-bound +# resource exits 0. Any non-zero exit from ``bundle deployment bind`` +# therefore signals a genuine problem (the resource doesn't exist, the +# principal lacks permission, the target points at the wrong workspace, +# …) rather than a benign "already bound" state. We surface it loudly +# instead of warning-and-continuing so the user sees the real cause +# now, not a confusing "resource already exists" failure inside the +# next ``bundle deploy``. bind() { local key="$1" local id="$2" echo " binding ${key} -> ${id}" - if ! $CLI bundle deployment bind "$key" "$id" --auto-approve "${BUNDLE_FLAGS[@]}"; then - echo " WARNING: bind for ${key} failed. It may already be bound, or the remote resource may not exist yet." >&2 + if ! $CLI bundle deployment bind "$key" "$id" --auto-approve "${BUNDLE_FLAGS[@]}" "${EXTRA_VARS[@]}"; then + echo "" >&2 + echo "ERROR: failed to bind ${key} -> ${id}" >&2 + echo "" >&2 + echo "Common causes:" >&2 + echo " - The remote resource does not exist yet. This script adopts" >&2 + echo " pre-existing resources; on a fresh workspace, skip it and" >&2 + echo " run 'make app-deploy' directly — the bundle creates the" >&2 + echo " resources for you." >&2 + echo " - The current principal (profile=${PROFILE}) lacks permission" >&2 + echo " to read or bind the resource." >&2 + echo " - The bundle target (${TARGET}) points at a different" >&2 + echo " workspace than the one where the resource was created." >&2 + echo "" >&2 + echo "Re-run after fixing the underlying cause, or run this single" >&2 + echo "bind manually if you're certain the resource exists and is" >&2 + echo "accessible:" >&2 + echo " databricks -p ${PROFILE} bundle deployment bind \\" >&2 + echo " ${key} ${id} --auto-approve -t ${TARGET}" >&2 + exit 1 fi } diff --git a/app/src/databricks_labs_dqx_app/_metadata.py b/app/src/databricks_labs_dqx_app/_metadata.py index b517c9e11..c4f660c7f 100644 --- a/app/src/databricks_labs_dqx_app/_metadata.py +++ b/app/src/databricks_labs_dqx_app/_metadata.py @@ -1,4 +1,4 @@ app_name = "DQX Studio" app_module = "databricks_labs_dqx_app.backend.app:app" app_slug = "databricks_labs_dqx_app" -api_prefix = "/api" +api_prefix = "/api" \ No newline at end of file diff --git a/app/src/databricks_labs_dqx_app/backend/migrations/postgres.py b/app/src/databricks_labs_dqx_app/backend/migrations/postgres.py index 5731380e7..c2a1a2d24 100644 --- a/app/src/databricks_labs_dqx_app/backend/migrations/postgres.py +++ b/app/src/databricks_labs_dqx_app/backend/migrations/postgres.py @@ -35,8 +35,9 @@ from __future__ import annotations import logging +from contextlib import AbstractContextManager from dataclasses import dataclass -from typing import Protocol +from typing import Any, Protocol logger = logging.getLogger(__name__) @@ -62,6 +63,10 @@ def database(self) -> str: ... def execute(self, sql: str, *, timeout_seconds: int = 120) -> None: ... def query(self, sql: str, *, timeout_seconds: int = 120) -> list[list[str]]: ... + # Yields a psycopg Connection. Typed loosely so this Protocol stays + # free of a psycopg dependency. + def connection(self) -> AbstractContextManager[Any]: ... + @dataclass(frozen=True) class PgMigration: @@ -274,21 +279,35 @@ def _applied_versions(self) -> set[int]: return {int(row[0]) for row in rows} def _apply(self, migration: PgMigration) -> None: + """Apply *migration* atomically. + + Postgres DDL is transactional (CREATE TABLE / INDEX inside + BEGIN/COMMIT all roll back together on error — modulo + ``CREATE INDEX CONCURRENTLY`` which we don't use). We run every + DDL statement in the migration **and** the ``dq_migrations`` + INSERT inside a single transaction so a half-applied migration + can never end up with committed DDL but no version row. If any + statement fails the whole migration rolls back and the next run + retries cleanly from the beginning. + + Statements are still split on ``;`` and executed one at a time + through a single cursor so an error message pinpoints the exact + failing DDL rather than a position inside a multi-kilobyte + compound string. + """ formatted = migration.sql.format(schema=self._schema) - # Postgres supports compound statements per ``execute`` call, - # but breaking on ``;`` keeps the error trace pinned to the - # specific DDL statement that failed — much easier to debug. - for stmt in formatted.split(";"): - stmt = stmt.strip() - if stmt: - self._exec.execute(stmt) - - # ANSI-escape the description rather than parameterising the - # INSERT — the executor surface intentionally doesn't expose - # parameter binding to keep parity with SqlExecutor. escaped_desc = migration.description.replace("'", "''") - self._exec.execute( - f"INSERT INTO {self._meta_table} (version, description, applied_at) " - f"VALUES ({migration.version}, '{escaped_desc}', CURRENT_TIMESTAMP)" - ) + with self._exec.connection() as conn: + with conn.cursor() as cur: + for stmt in formatted.split(";"): + stmt = stmt.strip() + if stmt: + cur.execute(stmt) + # Same transaction: either the whole migration lands + # *and* its version row is recorded, or nothing does. + cur.execute( + f"INSERT INTO {self._meta_table} (version, description, applied_at) " + f"VALUES ({migration.version}, '{escaped_desc}', CURRENT_TIMESTAMP)" + ) + conn.commit() logger.info("Postgres migration v%d applied", migration.version) diff --git a/app/src/databricks_labs_dqx_app/backend/pg_executor.py b/app/src/databricks_labs_dqx_app/backend/pg_executor.py index 28029e21b..e94224eb5 100644 --- a/app/src/databricks_labs_dqx_app/backend/pg_executor.py +++ b/app/src/databricks_labs_dqx_app/backend/pg_executor.py @@ -31,11 +31,14 @@ import threading import time import uuid +from collections.abc import Iterator +from contextlib import contextmanager from datetime import date, datetime from decimal import Decimal from typing import Any from databricks.sdk import WorkspaceClient +from psycopg import Connection from psycopg_pool import ConnectionPool from databricks_labs_dqx_app.backend.sql_executor import RawSql, _render_value @@ -239,6 +242,15 @@ def schema(self) -> str: def database(self) -> str: return self._database + def fqn(self, table: str) -> str: + """Return the schema-qualified path for *table*. + + Postgres only has one catalog per connection so we return + ``schema.table``; :meth:`SqlExecutor.fqn` returns three parts. + See :meth:`SqlExecutor.fqn` for the parity contract. + """ + return f"{self._schema}.{table}" + def q(self, identifier: str) -> str: """Quote a Postgres identifier (ANSI double quotes, doubled internal ``"``).""" return '"' + identifier.replace('"', '""') + '"' @@ -258,8 +270,23 @@ def ts_text(self, col: str) -> str: """ return col + @contextmanager + def connection(self) -> Iterator[Connection]: + """Yield a pooled connection for multi-statement transactional work. + + Use this when several statements MUST be atomic — e.g. DDL + a + bookkeeping INSERT that record their joint application. The + caller is responsible for ``conn.commit()``; if the ``with`` + block exits without committing (exception or otherwise) the + connection is rolled back and returned to the pool by psycopg- + pool. Single one-shot statements should keep using + :meth:`execute`, which commits per call. + """ + with self._pool.connection() as conn: + yield conn + def execute(self, sql: str, *, timeout_seconds: int = 120) -> None: # noqa: ARG002 - parity with SqlExecutor - """Run a non-result-returning statement.""" + """Run a single non-result-returning statement and commit it.""" with self._pool.connection() as conn: with conn.cursor() as cur: # psycopg accepts ``str`` at runtime (``Query`` is a diff --git a/app/src/databricks_labs_dqx_app/backend/services/app_settings_service.py b/app/src/databricks_labs_dqx_app/backend/services/app_settings_service.py index bc1869813..2333e075a 100644 --- a/app/src/databricks_labs_dqx_app/backend/services/app_settings_service.py +++ b/app/src/databricks_labs_dqx_app/backend/services/app_settings_service.py @@ -25,10 +25,7 @@ class AppSettingsService: def __init__(self, sql: SqlExecutor) -> None: self._sql = sql - if getattr(sql, "dialect", "delta") == "postgres": - self._table = f"{sql.schema}.dq_app_settings" - else: - self._table = f"{sql.catalog}.{sql.schema}.dq_app_settings" + self._table = sql.fqn("dq_app_settings") # ------------------------------------------------------------------ # Public API diff --git a/app/src/databricks_labs_dqx_app/backend/services/comments_service.py b/app/src/databricks_labs_dqx_app/backend/services/comments_service.py index 7ae2348ba..33480eb6d 100644 --- a/app/src/databricks_labs_dqx_app/backend/services/comments_service.py +++ b/app/src/databricks_labs_dqx_app/backend/services/comments_service.py @@ -32,10 +32,7 @@ class CommentsService: def __init__(self, sql: SqlExecutor) -> None: self._sql = sql - if getattr(sql, "dialect", "delta") == "postgres": - self._table = f"{sql.schema}.dq_comments" - else: - self._table = f"{sql.catalog}.{sql.schema}.dq_comments" + self._table = sql.fqn("dq_comments") def add_comment(self, entity_type: str, entity_id: str, user_email: str, comment: str) -> Comment: from databricks_labs_dqx_app.backend.sql_utils import escape_sql_string, validate_entity_type diff --git a/app/src/databricks_labs_dqx_app/backend/services/role_service.py b/app/src/databricks_labs_dqx_app/backend/services/role_service.py index 6776bc3ee..5fc8eb34f 100644 --- a/app/src/databricks_labs_dqx_app/backend/services/role_service.py +++ b/app/src/databricks_labs_dqx_app/backend/services/role_service.py @@ -41,10 +41,7 @@ class RoleService: def __init__(self, sql: SqlExecutor) -> None: self._sql = sql - if getattr(sql, "dialect", "delta") == "postgres": - self._table = f"{sql.schema}.dq_role_mappings" - else: - self._table = f"{sql.catalog}.{sql.schema}.dq_role_mappings" + self._table = sql.fqn("dq_role_mappings") self._mappings_cache: list[RoleMapping] | None = None self._mappings_cache_expires: float = 0.0 diff --git a/app/src/databricks_labs_dqx_app/backend/services/rules_catalog_service.py b/app/src/databricks_labs_dqx_app/backend/services/rules_catalog_service.py index a55f3d49e..b3e4663d9 100644 --- a/app/src/databricks_labs_dqx_app/backend/services/rules_catalog_service.py +++ b/app/src/databricks_labs_dqx_app/backend/services/rules_catalog_service.py @@ -66,28 +66,14 @@ class RulesCatalogService: def __init__(self, sql: SqlExecutor) -> None: self._sql = sql - self._table = self._qualify(sql, "dq_quality_rules") - self._history_table = self._qualify(sql, "dq_quality_rules_history") + self._table = sql.fqn("dq_quality_rules") + self._history_table = sql.fqn("dq_quality_rules_history") # ``check`` is a SQL reserved word in both Delta and Postgres, # so quote it via the executor so we get backticks on Delta and # double-quotes on Postgres. self._check_col = sql.q("check") self._select_cols = self._build_select_cols() - @staticmethod - def _qualify(sql: SqlExecutor, table: str) -> str: - """Return the fully-qualified table path for either backend. - - Delta: ``catalog.schema.table``. Postgres: ``schema.table`` - (Postgres only has a single database per connection so we drop - the catalog component there). The Postgres executor exposes - the database via :attr:`PgExecutor.database` if a future - cross-database join is ever needed. - """ - if getattr(sql, "dialect", "delta") == "postgres": - return f"{sql.schema}.{table}" - return f"{sql.catalog}.{sql.schema}.{table}" - def _build_select_cols(self) -> str: """Build the column projection used by every SELECT. diff --git a/app/src/databricks_labs_dqx_app/backend/services/schedule_config_service.py b/app/src/databricks_labs_dqx_app/backend/services/schedule_config_service.py index 044bdee24..f6daad7a0 100644 --- a/app/src/databricks_labs_dqx_app/backend/services/schedule_config_service.py +++ b/app/src/databricks_labs_dqx_app/backend/services/schedule_config_service.py @@ -35,12 +35,8 @@ class ScheduleConfigService: def __init__(self, sql: SqlExecutor) -> None: self._sql = sql - if getattr(sql, "dialect", "delta") == "postgres": - self._table = f"{sql.schema}.dq_schedule_configs" - self._history_table = f"{sql.schema}.dq_schedule_configs_history" - else: - self._table = f"{sql.catalog}.{sql.schema}.dq_schedule_configs" - self._history_table = f"{sql.catalog}.{sql.schema}.dq_schedule_configs_history" + self._table = sql.fqn("dq_schedule_configs") + self._history_table = sql.fqn("dq_schedule_configs_history") def list_schedules(self) -> list[ScheduleConfigEntry]: ts = self._sql.ts_text diff --git a/app/src/databricks_labs_dqx_app/backend/services/scheduler_service.py b/app/src/databricks_labs_dqx_app/backend/services/scheduler_service.py index 7864ce4f3..199b42cc6 100644 --- a/app/src/databricks_labs_dqx_app/backend/services/scheduler_service.py +++ b/app/src/databricks_labs_dqx_app/backend/services/scheduler_service.py @@ -153,10 +153,10 @@ def __init__( self._force_recalc = False # Both backend layouts qualify the table differently — let the # OLTP executor's catalog/schema decide. - self._table = self._qualify_oltp("dq_schedule_runs") - self._configs_table = self._qualify_oltp("dq_schedule_configs") - self._settings_table = self._qualify_oltp("dq_app_settings") - self._rules_table = self._qualify_oltp("dq_quality_rules") + self._table = self._oltp_sql.fqn("dq_schedule_runs") + self._configs_table = self._oltp_sql.fqn("dq_schedule_configs") + self._settings_table = self._oltp_sql.fqn("dq_app_settings") + self._rules_table = self._oltp_sql.fqn("dq_quality_rules") # Orphan-tmp-view GC: fires every Saturday at 01:00 UTC. Held in # process memory rather than persisted — a missed Saturday (e.g. @@ -170,12 +170,6 @@ def __init__( # missed sweep is harmless since the next one catches up. self._next_retention_at: datetime = datetime.now(timezone.utc) + timedelta(hours=_RETENTION_INTERVAL_HOURS) - def _qualify_oltp(self, table: str) -> str: - """Fully-qualify *table* for whichever backend the OLTP executor uses.""" - if getattr(self._oltp_sql, "dialect", "delta") == "postgres": - return f"{self._oltp_sql.schema}.{table}" - return f"{self._oltp_sql.catalog}.{self._oltp_sql.schema}.{table}" - # ------------------------------------------------------------------ # Lifecycle # ------------------------------------------------------------------ @@ -917,7 +911,7 @@ def _run_retention(self) -> None: # backticks/double-quotes follow the dialect. is_postgres = getattr(self._oltp_sql, "dialect", "delta") == "postgres" for table_name, time_col in _OLTP_RETENTION_TABLES: - table = self._qualify_oltp(table_name) + table = self._oltp_sql.fqn(table_name) if is_postgres: interval = f"INTERVAL '{days} days'" else: diff --git a/app/src/databricks_labs_dqx_app/backend/sql_executor.py b/app/src/databricks_labs_dqx_app/backend/sql_executor.py index 1c464bd9f..bded013c1 100644 --- a/app/src/databricks_labs_dqx_app/backend/sql_executor.py +++ b/app/src/databricks_labs_dqx_app/backend/sql_executor.py @@ -98,6 +98,20 @@ def schema(self) -> str: # Dialect helpers — kept identical-named on every executor so callers # can hand-write portable SQL without an "if dialect" branch. # ------------------------------------------------------------------ + def fqn(self, table: str) -> str: + """Return the fully-qualified path for *table* on this backend. + + Delta uses three-part names (``catalog.schema.table``) because + Unity Catalog organises tables under a catalog. Postgres has + only one catalog per connection so :class:`PgExecutor.fqn` drops + the catalog component and returns ``schema.table``. Services + that just need an addressable table identifier should call this + instead of inlining ``f"{sql.catalog}.{sql.schema}.{table}"`` — + the dialect branch lives here once instead of being repeated at + every call site. + """ + return f"{self._catalog}.{self._schema}.{table}" + def q(self, identifier: str) -> str: """Quote an identifier for this dialect. diff --git a/app/tests/test_pg_migration_runner.py b/app/tests/test_pg_migration_runner.py new file mode 100644 index 000000000..0a0712711 --- /dev/null +++ b/app/tests/test_pg_migration_runner.py @@ -0,0 +1,304 @@ +"""Unit tests for :class:`PgMigrationRunner` (Lakebase Postgres migrations). + +The runner is tested in pure isolation against a mock satisfying the +``_Executor`` Protocol. The real :class:`PgExecutor` is exercised in +integration tests against a live Lakebase instance. + +Two non-obvious things the helpers handle: + +1. ``MagicMock``'s auto-generated ``__exit__`` returns a (truthy) + :class:`MagicMock`, which would *suppress* every exception raised + inside a ``with`` block. We explicitly wire ``__exit__`` to ``None`` + on both the connection and cursor context managers so a real + ``RuntimeError`` from ``cur.execute`` propagates out — otherwise the + partial-failure test would pass for the wrong reason. + +2. The runner uses ``with self._exec.connection() as conn: with + conn.cursor() as cur: cur.execute(...)``. The cursor used inside + ``_apply`` therefore lives several attribute hops deep in the mock + chain. :func:`_cursor_of` and :func:`_connection_of` walk that chain + so assertions stay legible. +""" + +from __future__ import annotations + +import dataclasses +from unittest.mock import MagicMock + +import pytest + +from databricks_labs_dqx_app.backend.migrations.postgres import ( + PG_MIGRATIONS, + PgMigration, + PgMigrationRunner, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_executor(*, applied_versions: tuple[int, ...] = (), schema: str = "public") -> MagicMock: + """Build a mock ``_Executor`` pre-seeded with the given applied-versions list.""" + exec_mock = MagicMock(name="PgExecutor") + exec_mock.schema = schema + exec_mock.database = "test_db" + # _applied_versions issues one SELECT and parses ``[[str(v)], ...]``. + exec_mock.query.return_value = [[str(v)] for v in applied_versions] + + # Disarm MagicMock's truthy auto-__exit__ on both context managers + # so exceptions inside ``with self._exec.connection() as conn:`` and + # ``with conn.cursor() as cur:`` propagate normally. + conn_cm = exec_mock.connection.return_value + conn_cm.__exit__.return_value = None + conn = conn_cm.__enter__.return_value + cur_cm = conn.cursor.return_value + cur_cm.__exit__.return_value = None + + return exec_mock + + +def _connection_of(exec_mock: MagicMock) -> MagicMock: + """Return the ``conn`` object the runner sees inside its ``with`` block.""" + return exec_mock.connection.return_value.__enter__.return_value + + +def _cursor_of(exec_mock: MagicMock) -> MagicMock: + """Return the ``cur`` object the runner uses inside :meth:`_apply`.""" + return _connection_of(exec_mock).cursor.return_value.__enter__.return_value + + +def _executed_sqls(cursor_mock: MagicMock) -> list[str]: + """All SQL strings that hit the inner cursor, in call order.""" + return [c.args[0] for c in cursor_mock.execute.call_args_list] + + +def _insert_meta_sqls(cursor_mock: MagicMock) -> list[str]: + """Just the ``INSERT INTO ...dq_migrations`` calls.""" + return [s for s in _executed_sqls(cursor_mock) if "INSERT INTO" in s and "dq_migrations" in s] + + +# --------------------------------------------------------------------------- +# PG_MIGRATIONS catalogue invariants +# --------------------------------------------------------------------------- + + +class TestPgMigrationsCatalogue: + def test_versions_are_unique(self): + versions = [m.version for m in PG_MIGRATIONS] + assert len(versions) == len(set(versions)), "PG_MIGRATIONS version numbers must be unique" + + def test_versions_are_monotonically_increasing(self): + versions = [m.version for m in PG_MIGRATIONS] + assert versions == sorted(versions), "PG_MIGRATIONS entries must be listed in ascending version order" + + def test_every_migration_has_a_description(self): + # Empty descriptions break the dq_migrations INSERT (NOT NULL). + for m in PG_MIGRATIONS: + assert m.description.strip(), f"v{m.version} migration has an empty description" + + +# --------------------------------------------------------------------------- +# PgMigration dataclass behaviour +# --------------------------------------------------------------------------- + + +class TestPgMigrationDataclass: + def test_is_frozen(self): + m = PgMigration(version=99, description="test", sql="SELECT 1") + with pytest.raises(dataclasses.FrozenInstanceError): + m.version = 100 # type: ignore[misc] + + def test_equality_is_by_value(self): + a = PgMigration(version=1, description="d", sql="s") + b = PgMigration(version=1, description="d", sql="s") + c = PgMigration(version=2, description="d", sql="s") + assert a == b + assert a != c + + +# --------------------------------------------------------------------------- +# run_all — pending/applied dispatch +# --------------------------------------------------------------------------- + + +class TestRunAllDispatch: + def test_applies_all_when_none_applied(self): + exec_mock = _make_executor(applied_versions=()) + runner = PgMigrationRunner(exec_mock) + + applied = runner.run_all() + + assert applied == len(PG_MIGRATIONS) + # Each migration records its version in dq_migrations. + assert len(_insert_meta_sqls(_cursor_of(exec_mock))) == len(PG_MIGRATIONS) + + def test_skips_all_when_already_applied(self): + all_versions = tuple(m.version for m in PG_MIGRATIONS) + exec_mock = _make_executor(applied_versions=all_versions) + runner = PgMigrationRunner(exec_mock) + + applied = runner.run_all() + + assert applied == 0 + # _apply is never entered, so connection() must not be touched. + exec_mock.connection.assert_not_called() + + def test_runs_only_pending_when_some_already_applied(self, monkeypatch): + """With v1 already applied, only the new v2 should run.""" + fake_migrations = [ + PgMigration(version=1, description="v1", sql="CREATE TABLE {schema}.t1 (id int);"), + PgMigration(version=2, description="v2", sql="CREATE TABLE {schema}.t2 (id int);"), + ] + monkeypatch.setattr( + "databricks_labs_dqx_app.backend.migrations.postgres.PG_MIGRATIONS", + fake_migrations, + ) + exec_mock = _make_executor(applied_versions=(1,)) + runner = PgMigrationRunner(exec_mock) + + applied = runner.run_all() + + assert applied == 1 + cur = _cursor_of(exec_mock) + inserts = _insert_meta_sqls(cur) + assert len(inserts) == 1 + assert "VALUES (2," in inserts[0] + # v1's DDL must not be re-applied. + sqls = _executed_sqls(cur) + assert not any(".t1 " in s for s in sqls), "v1 DDL ran a second time" + assert any(".t2 " in s for s in sqls), "v2 DDL did not run" + + def test_bootstraps_schema_and_meta_table_before_any_migration(self): + exec_mock = _make_executor(applied_versions=()) + runner = PgMigrationRunner(exec_mock) + runner.run_all() + + # _ensure_schema and _ensure_meta_table go through executor.execute + # (NOT cursor.execute) — they are one-shot statements applied + # before the transactional _apply loop. + bootstrap = [c.args[0] for c in exec_mock.execute.call_args_list] + assert any("CREATE SCHEMA IF NOT EXISTS" in s for s in bootstrap) + assert any("CREATE TABLE IF NOT EXISTS" in s and "dq_migrations" in s for s in bootstrap) + + def test_schema_placeholder_is_substituted(self): + """Every {schema} placeholder must be replaced before SQL leaves the runner.""" + exec_mock = _make_executor(applied_versions=(), schema="custom_app_schema") + runner = PgMigrationRunner(exec_mock) + runner.run_all() + + cur = _cursor_of(exec_mock) + for sql in _executed_sqls(cur): + assert "{schema}" not in sql, f"Unsubstituted placeholder in: {sql!r}" + # And the configured schema name actually appears somewhere. + assert any("custom_app_schema" in s for s in _executed_sqls(cur)) + + +# --------------------------------------------------------------------------- +# _apply — atomicity contract +# --------------------------------------------------------------------------- + + +class TestApplyAtomicity: + """The contract Laurence's review surfaced: DDL + version row are atomic.""" + + def test_one_commit_per_migration(self): + """Each migration ends with exactly one ``conn.commit()`` — the + DDL and the dq_migrations INSERT share that single transaction.""" + exec_mock = _make_executor(applied_versions=()) + runner = PgMigrationRunner(exec_mock) + runner.run_all() + + conn = _connection_of(exec_mock) + assert conn.commit.call_count == len(PG_MIGRATIONS) + + def test_per_statement_split_emits_multiple_cursor_executes(self): + """The runner still splits on ``;`` so cursor errors pinpoint the + exact failing DDL statement rather than a position inside a + multi-kilobyte compound string.""" + exec_mock = _make_executor(applied_versions=()) + runner = PgMigrationRunner(exec_mock) + runner.run_all() + + cur = _cursor_of(exec_mock) + # Floor: every migration produces at least one DDL execute plus + # the bookkeeping INSERT. + assert cur.execute.call_count >= 2 * len(PG_MIGRATIONS) + + def test_version_row_insert_runs_through_the_same_cursor(self): + """The dq_migrations INSERT must reach the same transactional + cursor as the DDL — never the executor's auto-committing + ``execute`` path.""" + exec_mock = _make_executor(applied_versions=()) + runner = PgMigrationRunner(exec_mock) + runner.run_all() + + cur_inserts = _insert_meta_sqls(_cursor_of(exec_mock)) + exec_inserts = [c.args[0] for c in exec_mock.execute.call_args_list if "INSERT INTO" in c.args[0]] + assert cur_inserts, "dq_migrations INSERT must go through the transactional cursor" + assert not any("dq_migrations" in s for s in exec_inserts), ( + "dq_migrations INSERT leaked onto the auto-committing executor.execute path — " + "would break migration atomicity" + ) + + def test_failure_during_ddl_aborts_transaction_and_propagates(self, monkeypatch): + """If a DDL statement raises, the transaction rolls back (no + commit), the version row is never written, and the exception + propagates so the next start retries cleanly.""" + # Replace PG_MIGRATIONS with a deterministic single migration so + # we know exactly which cur.execute call should fail. + fake_migration = PgMigration( + version=42, + description="failing test", + sql=( + "CREATE TABLE {schema}.t1 (id int);" + "CREATE TABLE {schema}.t2 (id int);" + "CREATE TABLE {schema}.t3 (id int);" + ), + ) + monkeypatch.setattr( + "databricks_labs_dqx_app.backend.migrations.postgres.PG_MIGRATIONS", + [fake_migration], + ) + + exec_mock = _make_executor(applied_versions=()) + cur = _cursor_of(exec_mock) + # Pass: t1 → succeed, t2 → succeed, t3 → boom. INSERT never reached. + cur.execute.side_effect = [None, None, RuntimeError("boom"), None, None] + + runner = PgMigrationRunner(exec_mock) + with pytest.raises(RuntimeError, match="boom"): + runner.run_all() + + conn = _connection_of(exec_mock) + # commit() must NOT have run — Postgres would roll the + # transaction back when the connection returns to the pool. + conn.commit.assert_not_called() + # And the version row must never have been requested either. + assert not _insert_meta_sqls(cur) + + def test_failure_in_first_migration_skips_subsequent_migrations(self, monkeypatch): + """A failed migration must short-circuit the rest of the run — + otherwise a later migration could land on top of a partially- + rolled-back schema.""" + fake_migrations = [ + PgMigration(version=1, description="v1", sql="BROKEN;"), + PgMigration(version=2, description="v2", sql="CREATE TABLE {schema}.t2 (id int);"), + ] + monkeypatch.setattr( + "databricks_labs_dqx_app.backend.migrations.postgres.PG_MIGRATIONS", + fake_migrations, + ) + + exec_mock = _make_executor(applied_versions=()) + cur = _cursor_of(exec_mock) + cur.execute.side_effect = RuntimeError("boom") # every execute fails + + runner = PgMigrationRunner(exec_mock) + with pytest.raises(RuntimeError, match="boom"): + runner.run_all() + + # v2's DDL must not have been attempted. + sqls = _executed_sqls(cur) + assert not any(".t2 " in s for s in sqls), "v2 ran despite v1 failing — short-circuit broken" diff --git a/app/tests/test_retention.py b/app/tests/test_retention.py index b8543d57a..14dc55e84 100644 --- a/app/tests/test_retention.py +++ b/app/tests/test_retention.py @@ -218,8 +218,9 @@ def scheduler(self): svc._settings_table = "dqx.public.dq_app_settings" svc._catalog = "dqx" svc._schema = "public" - # ``_qualify_oltp`` is invoked for the OLTP DELETE loop. - svc._qualify_oltp = lambda t: f"dqx.public.{t}" # type: ignore[method-assign] + # OLTP DELETE loop calls ``self._oltp_sql.fqn(table)`` to build + # each fully-qualified path; stub the executor's helper directly. + svc._oltp_sql.fqn = lambda t: f"dqx.public.{t}" return svc def test_quarantine_table_uses_its_own_default(self, scheduler): diff --git a/docs/dqx/docs/dev/contributing.mdx b/docs/dqx/docs/dev/contributing.mdx index 7c89a3245..3da30d43d 100644 --- a/docs/dqx/docs/dev/contributing.mdx +++ b/docs/dqx/docs/dev/contributing.mdx @@ -203,9 +203,9 @@ DQX_JOB_ID= # optional locally; required for DQX_LAKEBASE_INSTANCE_NAME= # optional locally; empty = OLTP tables run on Delta ``` -Leave `DQX_LAKEBASE_INSTANCE_NAME` empty for most local dev — the app falls back to Delta for the OLTP tables (rules catalog, app settings, RBAC, comments, schedules) so you don't need a Lakebase instance to iterate. To exercise the Lakebase path locally, deploy the bundle once and point the variable at the resulting instance. See [app/DEVELOPMENT.md](https://github.com/databrickslabs/dqx/blob/v0.14.0/app/DEVELOPMENT.md) for the full set of `DQX_LAKEBASE_*` variables. +Leave `DQX_LAKEBASE_INSTANCE_NAME` empty for most local dev — the app falls back to Delta for the OLTP tables (rules catalog, app settings, RBAC, comments, schedules) so you don't need a Lakebase instance to iterate. To exercise the Lakebase path locally, deploy the bundle once and point the variable at the resulting instance. See [app/DEVELOPMENT.md](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEVELOPMENT.md) for the full set of `DQX_LAKEBASE_*` variables. -If you don't have a profile yet, run `databricks auth login --host -p ` first. See the [Development Mode](https://github.com/databrickslabs/dqx/blob/v0.14.0/app/README.md#development-mode) section of the app README for more detail. +If you don't have a profile yet, run `databricks auth login --host -p ` first. See the [Development Mode](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/README.md#development-mode) section of the app README for more detail. The **profiler** and **dry-run** features rely on a Databricks Job (`dqx-app-task-runner`) that only exists after you deploy the app bundle to a workspace. For local UI and backend development (routes, components, auth wiring, config), you can skip this — `DQX_JOB_ID` is not required and the app will start without it. All other features will work locally. @@ -230,7 +230,7 @@ Deploying DQX Studio to a workspace is required when you want to: - verify a change behaves correctly under the production identity model (service principal + on-behalf-of), or - run a review pass against a deployed app before merging. -For the full step-by-step (service principal creation, one-time storage bootstrap, asset-bundle deploy, Lakebase opt-out, post-deploy grants, app start, troubleshooting) follow [app/DEPLOYMENT.md](https://github.com/databrickslabs/dqx/blob/v0.14.0/app/DEPLOYMENT.md). +For the full step-by-step (service principal creation, one-time storage bootstrap, asset-bundle deploy, Lakebase opt-out, post-deploy grants, app start, troubleshooting) follow [app/DEPLOYMENT.md](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEPLOYMENT.md). ### Running integration tests and code coverage diff --git a/docs/dqx/docs/installation.mdx b/docs/dqx/docs/installation.mdx index e6b503199..cf9fce93b 100644 --- a/docs/dqx/docs/installation.mdx +++ b/docs/dqx/docs/installation.mdx @@ -476,7 +476,7 @@ Databricks CLI will confirm a few options: trigger_pause_status: PAUSED ``` - See the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.14.0/app/DEPLOYMENT.md#step-4-configure-databricksyml) for the full reference of each variable, including security implications of `admin_group` and when to override per target. + See the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEPLOYMENT.md#step-4-configure-databricksyml) for the full reference of each variable, including security implications of `admin_group` and when to override per target. 4. (One-time, only on a workspace whose schemas / volume / Lakebase instance were created out-of-band before adopting this layout) adopt them into bundle management: ```commandline @@ -496,7 +496,7 @@ Databricks CLI will confirm a few options: The studio's schemas (`dqx_studio`, `dqx_studio_tmp`), wheels volume, Lakebase Postgres instance, and the logical Postgres database inside it are all declared as bundle resources with `lifecycle.prevent_destroy: true` (Databricks CLI 0.268+). `databricks bundle destroy` is blocked from dropping them, so production data survives accidental destroy/replace operations. To intentionally tear something down, remove the flag, `databricks bundle deployment unbind `, then destroy manually. -For the full walkthrough — including step-by-step commands, manual `GRANT` statements, troubleshooting, target-specific configuration, and the bind workflow for adopting existing resources — see the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.14.0/app/DEPLOYMENT.md). +For the full walkthrough — including step-by-step commands, manual `GRANT` statements, troubleshooting, target-specific configuration, and the bind workflow for adopting existing resources — see the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEPLOYMENT.md). DQX Studio splits its data across two physical backends: high-volume append-mostly tables (`dq_validation_runs`, `dq_profiling_results`, `dq_quarantine_records`, `dq_metrics`) live in **Delta Lake** because they're written by Spark; transactional tables (rules catalog, app settings, RBAC, comments, schedule configs) live in **Lakebase Postgres** for fast row-level reads/writes from the FastAPI request handlers. diff --git a/tests/unit/test_app_backend.py b/tests/unit/test_app_backend.py index f66a3f182..69dea0c9f 100644 --- a/tests/unit/test_app_backend.py +++ b/tests/unit/test_app_backend.py @@ -116,9 +116,15 @@ def _failed_response(message: str = "boom") -> StatementResponse: _SAMPLE_CHECKS = [{"criticality": "error", "check": {"function": "is_not_null", "arguments": {"column": "id"}}}] +# Row[1] must JSON-decode to a *bare* check object, not a list. After +# the v1 baseline split each catalog row stores one check in the +# VARIANT/JSONB ``check`` column rather than an array of checks +# (see RulesCatalogService._row_to_entry — anything that decodes to a +# non-dict is dropped). ``_SAMPLE_CHECKS`` keeps its list shape because +# it is the *input* to ``save`` (which writes one row per element). _SAMPLE_ROW = [ "catalog.schema.table", - json.dumps(_SAMPLE_CHECKS), + json.dumps(_SAMPLE_CHECKS[0]), "3", "draft", "alice@example.com", From 3f247de658f25e11e96b3dc8d369c0d52aaffd30 Mon Sep 17 00:00:00 2001 From: Tasha Date: Tue, 12 May 2026 18:16:14 +0200 Subject: [PATCH 06/12] fix(ci): restore trailing newline in _metadata.py and bump v0.13.0 GitHub URLs to v0.14.0 in docs --- app/src/databricks_labs_dqx_app/_metadata.py | 2 +- docs/dqx/docs/dev/contributing.mdx | 6 +++--- docs/dqx/docs/installation.mdx | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/app/src/databricks_labs_dqx_app/_metadata.py b/app/src/databricks_labs_dqx_app/_metadata.py index c4f660c7f..b517c9e11 100644 --- a/app/src/databricks_labs_dqx_app/_metadata.py +++ b/app/src/databricks_labs_dqx_app/_metadata.py @@ -1,4 +1,4 @@ app_name = "DQX Studio" app_module = "databricks_labs_dqx_app.backend.app:app" app_slug = "databricks_labs_dqx_app" -api_prefix = "/api" \ No newline at end of file +api_prefix = "/api" diff --git a/docs/dqx/docs/dev/contributing.mdx b/docs/dqx/docs/dev/contributing.mdx index 444b45c50..4fc7b883b 100644 --- a/docs/dqx/docs/dev/contributing.mdx +++ b/docs/dqx/docs/dev/contributing.mdx @@ -203,9 +203,9 @@ DQX_JOB_ID= # optional locally; required for DQX_LAKEBASE_INSTANCE_NAME= # optional locally; empty = OLTP tables run on Delta ``` -Leave `DQX_LAKEBASE_INSTANCE_NAME` empty for most local dev — the app falls back to Delta for the OLTP tables (rules catalog, app settings, RBAC, comments, schedules) so you don't need a Lakebase instance to iterate. To exercise the Lakebase path locally, deploy the bundle once and point the variable at the resulting instance. See [app/DEVELOPMENT.md](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEVELOPMENT.md) for the full set of `DQX_LAKEBASE_*` variables. +Leave `DQX_LAKEBASE_INSTANCE_NAME` empty for most local dev — the app falls back to Delta for the OLTP tables (rules catalog, app settings, RBAC, comments, schedules) so you don't need a Lakebase instance to iterate. To exercise the Lakebase path locally, deploy the bundle once and point the variable at the resulting instance. See [app/DEVELOPMENT.md](https://github.com/databrickslabs/dqx/blob/v0.14.0/app/DEVELOPMENT.md) for the full set of `DQX_LAKEBASE_*` variables. -If you don't have a profile yet, run `databricks auth login --host -p ` first. See the [Development Mode](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/README.md#development-mode) section of the app README for more detail. +If you don't have a profile yet, run `databricks auth login --host -p ` first. See the [Development Mode](https://github.com/databrickslabs/dqx/blob/v0.14.0/app/README.md#development-mode) section of the app README for more detail. The **profiler** and **dry-run** features rely on a Databricks Job (`dqx-app-task-runner`) that only exists after you deploy the app bundle to a workspace. For local UI and backend development (routes, components, auth wiring, config), you can skip this — `DQX_JOB_ID` is not required and the app will start without it. All other features will work locally. @@ -230,7 +230,7 @@ Deploying DQX Studio to a workspace is required when you want to: - verify a change behaves correctly under the production identity model (service principal + on-behalf-of), or - run a review pass against a deployed app before merging. -For the full step-by-step (service principal creation, one-time storage bootstrap, asset-bundle deploy, Lakebase opt-out, post-deploy grants, app start, troubleshooting) follow [app/DEPLOYMENT.md](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEPLOYMENT.md). +For the full step-by-step (service principal creation, one-time storage bootstrap, asset-bundle deploy, Lakebase opt-out, post-deploy grants, app start, troubleshooting) follow [app/DEPLOYMENT.md](https://github.com/databrickslabs/dqx/blob/v0.14.0/app/DEPLOYMENT.md). ### Running integration tests and code coverage diff --git a/docs/dqx/docs/installation.mdx b/docs/dqx/docs/installation.mdx index 297e32254..8b6f12555 100644 --- a/docs/dqx/docs/installation.mdx +++ b/docs/dqx/docs/installation.mdx @@ -476,7 +476,7 @@ Databricks CLI will confirm a few options: trigger_pause_status: PAUSED ``` - See the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEPLOYMENT.md#step-4-configure-databricksyml) for the full reference of each variable, including security implications of `admin_group` and when to override per target. + See the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.14.0/app/DEPLOYMENT.md#step-4-configure-databricksyml) for the full reference of each variable, including security implications of `admin_group` and when to override per target. 4. (One-time, only on a workspace whose schemas / volume / Lakebase instance were created out-of-band before adopting this layout) adopt them into bundle management: ```commandline @@ -496,7 +496,7 @@ Databricks CLI will confirm a few options: The studio's schemas (`dqx_studio`, `dqx_studio_tmp`), wheels volume, Lakebase Postgres instance, and the logical Postgres database inside it are all declared as bundle resources with `lifecycle.prevent_destroy: true` (Databricks CLI 0.268+). `databricks bundle destroy` is blocked from dropping them, so production data survives accidental destroy/replace operations. To intentionally tear something down, remove the flag, `databricks bundle deployment unbind `, then destroy manually. -For the full walkthrough — including step-by-step commands, manual `GRANT` statements, troubleshooting, target-specific configuration, and the bind workflow for adopting existing resources — see the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.13.0/app/DEPLOYMENT.md). +For the full walkthrough — including step-by-step commands, manual `GRANT` statements, troubleshooting, target-specific configuration, and the bind workflow for adopting existing resources — see the [DQX Studio deployment guide](https://github.com/databrickslabs/dqx/blob/v0.14.0/app/DEPLOYMENT.md). DQX Studio splits its data across two physical backends: high-volume append-mostly tables (`dq_validation_runs`, `dq_profiling_results`, `dq_quarantine_records`, `dq_metrics`) live in **Delta Lake** because they're written by Spark; transactional tables (rules catalog, app settings, RBAC, comments, schedule configs) live in **Lakebase Postgres** for fast row-level reads/writes from the FastAPI request handlers. From 151a52dec32160d8514ec677da0f82cd77067e0e Mon Sep 17 00:00:00 2001 From: Tasha Date: Tue, 12 May 2026 21:41:36 +0200 Subject: [PATCH 07/12] fix pytest --- tests/integration/test_app_backend.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_app_backend.py b/tests/integration/test_app_backend.py index 5a5fb4c61..c3487efa5 100644 --- a/tests/integration/test_app_backend.py +++ b/tests/integration/test_app_backend.py @@ -107,8 +107,15 @@ async def override_get_user_role() -> UserRole: warehouse_id = os.environ.get("DATABRICKS_WAREHOUSE_ID", "") test_schema = make_schema(catalog_name=TEST_CATALOG) _sql = SqlExecutor(ws=ws, warehouse_id=warehouse_id, catalog=TEST_CATALOG, schema=test_schema.name) + + # Create the OLTP fallback tables (dq_app_settings, dq_quality_rules, ...) + # in the test schema. ``AppSettingsService.ensure_table()`` is a no-op + # since the Lakebase refactor — DDL now lives in MigrationRunner. + from databricks_labs_dqx_app.backend.migrations import MigrationRunner + + MigrationRunner(_sql).run_all(include_oltp_fallback=True) + _settings_svc = AppSettingsService(sql=_sql) - _settings_svc.ensure_table() async def override_get_app_settings_service() -> AppSettingsService: return _settings_svc From bde6ab7012f2c63cd8e3d90a226a233958fa58ce Mon Sep 17 00:00:00 2001 From: Tasha Date: Wed, 13 May 2026 09:58:44 +0200 Subject: [PATCH 08/12] fix test --- tests/integration/test_app_backend.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_app_backend.py b/tests/integration/test_app_backend.py index c3487efa5..621206618 100644 --- a/tests/integration/test_app_backend.py +++ b/tests/integration/test_app_backend.py @@ -5,7 +5,7 @@ import pytest from fastapi.testclient import TestClient -from databricks_labs_dqx_app.backend.common.authorization import UserRole +from databricks_labs_dqx_app.backend.common.authorization import UserRole, get_user_email from databricks_labs_dqx_app.backend.dependencies import get_obo_ws, get_user_role, get_app_settings_service from databricks_labs_dqx_app.backend.models import InstallationSettings from databricks_labs_dqx_app.backend.services.app_settings_service import AppSettingsService @@ -120,9 +120,17 @@ async def override_get_user_role() -> UserRole: async def override_get_app_settings_service() -> AppSettingsService: return _settings_svc + # The production proxy injects ``X-Forwarded-Email``; ``TestClient`` does + # not, so without this override every write endpoint returns 401. + test_user_email = ws.current_user.me().user_name or "test@example.com" + + def override_get_user_email() -> str: + return test_user_email + app.dependency_overrides[get_obo_ws] = override_get_obo_ws app.dependency_overrides[get_user_role] = override_get_user_role app.dependency_overrides[get_app_settings_service] = override_get_app_settings_service + app.dependency_overrides[get_user_email] = override_get_user_email client = TestClient(app) yield client From 06757c4a0631b1f159a6e67b39428a24b6972156 Mon Sep 17 00:00:00 2001 From: Tasha Date: Thu, 11 Jun 2026 07:44:07 +0200 Subject: [PATCH 09/12] feat(app): per-run review status, embedded Insights dashboard, and bring-your-own SQL warehouse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-run review status - New ReviewStatusService + /api/v1/runs/{run_id}/review-status endpoints (GET / PUT / DELETE + /history) with admin-managed catalogue via /api/v1/config/run-review-statuses (e.g. Pending review → Acknowledged). - Storage: dq_run_review_status (mutable current state) + dq_run_review_status_history (append-only audit). Default value surfaced virtually so dashboards/filters never see NULL. - UI: RunReviewStatusPanel on the Run detail page next to the comments thread; multi-select status filter on Runs History. - Task runner emits the review-status field on completion; dryrun and quarantine routes include effective status in their payloads. Insights page + starter dashboard - New /insights route embeds a Databricks AI/BI dashboard via iframe. - Admin-configurable dashboard ID through config.py with QX_DEFAULT_DASHBOARD_ID env fallback so the bundle can ship a default without preventing customer overrides. - Ships dashboards/dqx_quality_overview.lvdash.json as the starter. DAB: bring-your-own SQL warehouse - databricks.yml now supports two patterns per target: bundle-managed (declared under resources.sql_warehouses.dqx_sql_warehouse) and existing-warehouse-by-id (just set sql_warehouse_id). - post_deploy_grants.sh hardened: CAN_USE grants to app + task-runner SPs degrade to warnings when the deployer lacks CAN_MANAGE on a shared warehouse, instead of failing the whole script. Settings + plumbing - app_settings_service expanded for review-status catalogue and dashboard config; postgres migrations and models updated; DI wiring in dependencies.py + app.py to expose the new service. Docs - New CUSTOMER_QA.md (positioning, security, lifecycle, ops Q&A). - DEPLOYMENT.md / README.md refreshed for the new warehouse modes, Insights page, and review-status flow. --- SECURITY.md | 45 + app/CUSTOMER_QA.md | 437 + app/DEPLOYMENT.md | 143 +- app/README.md | 2 +- .../dqx_quality_overview.lvdash.json | 625 + app/databricks.yml | 337 +- app/scripts/post_deploy_grants.sh | 82 +- .../databricks_labs_dqx_app/backend/app.py | 13 + .../databricks_labs_dqx_app/backend/config.py | 37 +- .../backend/dependencies.py | 15 + .../backend/migrations/__init__.py | 41 + .../backend/migrations/postgres.py | 39 + .../databricks_labs_dqx_app/backend/models.py | 11 + .../backend/pg_executor.py | 12 +- .../backend/routes/v1/__init__.py | 2 + .../backend/routes/v1/config.py | 292 + .../backend/routes/v1/dryrun.py | 68 +- .../backend/routes/v1/quarantine.py | 16 +- .../backend/routes/v1/review_status.py | 208 + .../backend/services/app_settings_service.py | 216 + .../backend/services/review_status_service.py | 366 + .../ui/components/RunReviewStatusPanel.tsx | 301 + .../ui/lib/api-custom.ts | 355 + app/src/databricks_labs_dqx_app/ui/lib/api.ts | 10813 +++++++++------- .../ui/routes/_sidebar/config.tsx | 621 +- .../ui/routes/_sidebar/insights.tsx | 159 + .../ui/routes/_sidebar/route.tsx | 20 + .../ui/routes/_sidebar/runs-history.tsx | 213 +- .../ui/types/routeTree.gen.ts | 21 + app/tasks/src/dqx_task_runner/runner.py | 36 +- app/tests/test_runner_helpers.py | 29 +- 31 files changed, 10944 insertions(+), 4631 deletions(-) create mode 100644 SECURITY.md create mode 100644 app/CUSTOMER_QA.md create mode 100644 app/dashboards/dqx_quality_overview.lvdash.json create mode 100644 app/src/databricks_labs_dqx_app/backend/routes/v1/review_status.py create mode 100644 app/src/databricks_labs_dqx_app/backend/services/review_status_service.py create mode 100644 app/src/databricks_labs_dqx_app/ui/components/RunReviewStatusPanel.tsx create mode 100644 app/src/databricks_labs_dqx_app/ui/routes/_sidebar/insights.tsx diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..166bf16ff --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,45 @@ +# Security Policy + +## Reporting a Vulnerability + +**Please do not report security vulnerabilities through public GitHub issues, discussions, or pull requests.** + +If you believe you've found a security vulnerability in DQX (the library, the CLI, or the DQX Studio web application), report it privately through one of the following channels: + +1. **GitHub Security Advisories (preferred)** — open a private advisory on the [DQX repository](https://github.com/databrickslabs/dqx/security/advisories/new). This keeps the report confidential until a fix is released. +2. **Email** — if you cannot use GitHub Security Advisories, contact the Databricks Labs maintainers at [labs-oss@databricks.com](mailto:labs-oss@databricks.com) with `DQX SECURITY` in the subject line. + +### What to include + +To help us triage quickly, please include as much of the following as you can: + +- A description of the issue and its potential impact. +- Steps to reproduce (proof-of-concept code, sample input, or a minimal repro repo). +- The affected version(s) of DQX and, if relevant, the Databricks Runtime version. +- Any mitigations or workarounds you have identified. + +We will acknowledge receipt within **5 business days** and provide a more detailed response within **10 business days** indicating our next steps. We aim to patch confirmed vulnerabilities in a timely manner and will coordinate disclosure with the reporter. + +## Supported Versions + +Security fixes are only guaranteed for the **latest minor release** on the [`main` branch](https://github.com/databrickslabs/dqx) and published on [PyPI](https://pypi.org/project/databricks-labs-dqx/). Please upgrade to the latest release before reporting an issue when possible. + +## Scope + +The following are in scope: + +- The `databricks-labs-dqx` Python library and CLI. +- The DQX Studio web application under `app/` (FastAPI backend + React frontend). +- The task-runner Databricks Job under `app/tasks/`. +- Example code in `demos/` (informational only — reports welcome but low priority). + +Out of scope: + +- Vulnerabilities in third-party dependencies that are better reported upstream. +- Issues requiring a privileged user (e.g., workspace admin) to already be compromised. +- Denial-of-service resulting from reasonable use of Databricks platform limits. + +## Disclosure Policy + +- We follow a **coordinated disclosure** process and will credit reporters in release notes unless they prefer anonymity. +- We request that reporters give us a reasonable window (typically 90 days) to issue a fix before any public disclosure. diff --git a/app/CUSTOMER_QA.md b/app/CUSTOMER_QA.md new file mode 100644 index 000000000..225de506c --- /dev/null +++ b/app/CUSTOMER_QA.md @@ -0,0 +1,437 @@ +# DQX Studio — Customer Q&A + +A reference for the customer Q&A session. Questions are grouped by theme; answers are written to be **honest and specific** (with limitations called out) rather than marketing prose. Skip what you don't need. + +> "DQX" = the open-source [`databrickslabs/dqx`](https://github.com/databrickslabs/dqx) data-quality engine. "DQX Studio" = this app — a UI + control plane on top of DQX, deployed as a Databricks App in the customer's own workspace. + +--- + +## 1. Product positioning + +### 1.1 What is DQX Studio in one sentence? +A workspace-native UI for authoring, approving, scheduling, and reviewing DQX data-quality rules — running entirely inside the customer's Databricks account, using the customer's own compute and storage. + +### 1.2 How is it different from running DQX in a notebook? +DQX (the library) gives you the engine. DQX Studio gives you the workflow around it: + +- **Rule authoring UI** (form-based + YAML) instead of editing notebooks +- **Approval workflow** with `RULE_AUTHOR` / `RULE_APPROVER` separation +- **Schedules** that submit runs to a serverless job — no notebook orchestration to maintain +- **Run history** with status, sample bad rows, metrics, comments, and review status per run +- **Insights dashboard** — a Lakeview/AI-BI dashboard embedded as an iframe, customisable per workspace +- **Hybrid storage** (Lakebase OLTP for rules/settings, Delta for run history) so the UI feels like a real app, not a Spark query under every click + +### 1.3 Is this a Databricks product or open source? +It's an open-source companion to the `databrickslabs/dqx` library — both live under `databrickslabs/` (Databricks Labs is the field-maintained ecosystem, not the core product). You get the source, you deploy it into your own workspace via a Declarative Asset Bundle, and you own the operating model. + +### 1.4 What does it *not* do? +Stated up front so there are no surprises: + +- **No cross-workspace rules.** DQX Studio runs inside one workspace and authors rules for tables visible in that workspace's Unity Catalog metastore. If you have multiple metastores (regions, business units), you deploy one instance per metastore. +- **No vendor SaaS plane.** All metadata stays in your account. That's a feature (sovereignty) and a constraint (no cross-customer benchmarks, no external data-quality marketplace). +- **No anomaly detection / ML quality models.** Today it executes rules you (or the profiler) author. Anomaly-style "this column drifted" checks would be net-new work. +- **Not a catalog or lineage tool.** It reads from Unity Catalog and writes results to it; it does not replace Atlan, Collibra, or Purview. + +--- + +## 2. Architecture + +### 2.1 What does deployment look like? +One Databricks Asset Bundle (`databricks.yml`) provisions everything in one `make app-deploy`: + +- **Databricks App** (FastAPI + React, single process, served by the Apps runtime) +- **Serverless Job** for Spark work — the *task runner* — invoked for profiler, dry-run, and scheduled runs +- **Lakebase Postgres instance** (`database_instances.lakebase`) for OLTP state +- **Two UC schemas** (`dqx_studio`, `dqx_studio_tmp`) under a customer-supplied catalog +- **UC volume** (`wheels`) the app uses to ship DQX wheels into the job +- **SQL warehouse** — managed by the bundle, or BYO (bring-your-own) if you already have one +- **Lakeview dashboard** (`dashboards.dqx_quality_overview`) pinned to the app's *Insights* page + +All stateful resources carry `lifecycle.prevent_destroy: true`, so a stray `bundle destroy` cannot drop them and wipe state. + +### 2.2 Why a separate job for the Spark work? +Databricks Apps run in a container without Spark. We use the FastAPI process for the UI, REST API, and short OLTP queries — anything that needs Spark (profiling, dry runs, scheduled validation) is submitted to a serverless job we provision and pin to a wheels volume. The app polls job status and reads results back from Delta. + +That gives you: + +- **Predictable autoscaling** — serverless handles the spiky workload +- **Tight cost control** — the app process stays small; nothing Spark-shaped is running when no one is validating +- **Identity isolation** — Spark work runs as the task-runner SP with the user's OBO token threaded through, so UC permissions still apply + +### 2.3 Why a hybrid storage model (Lakebase + Delta)? +Data quality has two very different workloads, and we picked the right tool for each: + +| Workload | Backend | Why | +|---|---|---| +| Rules CRUD, app settings, RBAC, comments, schedules | **Lakebase Postgres** | Sub-millisecond reads, real transactions, indexes on small tables. Delta's not made for "give me one row by primary key, fast." | +| Profiling results, validation runs, metrics, quarantine rows | **Delta on UC** | High-volume append, time-series scans, dashboard JOINs, AI/BI integration. Postgres would melt under quarantine volumes. | + +The split is invisible to service code — `SqlExecutor` (Delta) and `PgExecutor` (Lakebase) share the same surface. **If a workspace can't or won't enable Lakebase, the OLTP tables fall back to Delta** via the `oltp_fallback` migration. The app still works; latency on rule CRUD is just higher. + +### 2.4 How does authentication work? +Two identities, each with a deliberate scope: + +- **User identity (OBO)** — every request from the browser carries an `X-Forwarded-Access-Token`. UC browsing (catalogs/schemas/tables), temporary view creation for profiler/dry-runs, and user-initiated job submissions all run as the user. This is how we enforce "you can only validate tables you can already read." +- **App / task-runner SP** — owns the app's own state (rules catalog, settings, OLTP migrations, wheel uploads) and runs scheduled jobs that have no user attached. Critically, the app SP is **only granted what it needs inside the DQX schemas** — it never gets blanket UC access. + +There are no admin-scoped REST calls made by the app itself. + +### 2.5 What's the data residency story? +Everything lives in the customer's Databricks account: + +- Source tables stay where they are — DQX Studio never moves them. +- Quarantine rows (potentially PII) land in Delta tables inside the customer's catalog. +- OLTP state (rules, comments, schedules) lives in the customer's Lakebase instance. +- Wheels and app code live in the customer's UC volume + workspace files. + +No data, metadata, or telemetry leaves the account. There is no DQX Studio SaaS plane to phone home to. + +--- + +## 3. Rule authoring & execution + +### 3.1 What kinds of checks does DQX support? +Three families, in increasing power: + +1. **Column-level row checks** — `is_not_null`, `is_in_range`, `regex_match`, `is_unique`, etc. Configured per column. +2. **Row-level dataset checks** — composite expressions on multiple columns of the same row. +3. **Cross-table SQL checks** — full SQL queries (referential integrity, aggregate reconciliation, business-logic joins). Whatever you can write as a `SELECT` returning the offending rows, DQX can run. + +All three are first-class in the engine, the UI, and the metrics pipeline. + +### 3.2 How do users author rules? +Two surfaces, same backing store: + +- **Form mode** — pick a table, pick columns, pick a check function, set parameters, optional labels. Good for the long tail. +- **YAML mode** — paste/edit DQX YAML directly. Good for power users and copy-paste from existing notebooks. + +Rules are versioned in `dq_quality_rules` + `dq_quality_rules_history` (audit log). They flow through statuses (`draft → proposed → approved`) gated by RBAC. + +### 3.3 What's the profiler? +A one-click way to bootstrap rules for a table. You point at a table; the profiler runs a serverless job that samples the data, infers candidates (null thresholds, value ranges, uniqueness), and writes them to `dq_profiling_results`. You then pick the ones you want and promote them to draft rules. It's not magic — it's a starting point. + +### 3.4 What's a "dry run"? +Validate a rule (or rule set) against the live table **before** approving or scheduling it. It returns: + +- Pass/fail counts (valid rows, error rows, warning rows, total rows) +- A 10-row sample of failing data inline +- A full quarantine table written to `dq_quarantine_records` (capped at 100k rows for SQL checks to bound storage) +- Per-check metrics + custom metrics into `dq_metrics` + +Dry runs use OBO, so the user can only validate tables they have `SELECT` on. + +### 3.5 How do schedules work? +Configure a per-rule-set schedule (cron or interval) in the UI. An in-process asyncio loop inside the FastAPI worker ticks every minute, picks up due schedules, and submits the same task-runner job that dry runs use — `task_type` discriminates. Scheduled runs use the **app SP**, not OBO, because there's no user at 3 AM. The schedule's *target rules* honour the rule-set the author selected. + +A `/tmp/.dqx_scheduler.lock` exclusive file lock guarantees the loop runs in exactly one app worker even if Databricks Apps autoscales to multiple replicas. + +### 3.6 What's "review status"? +A per-run label set by reviewers (the business owner, the on-call SA, whoever) after a run completes. Values are admin-configurable in **Configuration → Run review statuses**; the default catalogue ships with *Pending review*, *Acknowledged*, *Resolved*, *False positive*. The Runs History page filters on it as multi-select chips, so a steward can ask "what's still pending review?" in one click. Every change is audited in `dq_run_review_status_history`. + +It sits next to free-text comments on the run detail row — the dropdown is the structured action, the comment is the explanation. + +--- + +## 4. Permissions & governance + +### 4.1 What roles exist inside the app? +Four primary roles plus one orthogonal: + +- **`ADMIN`** — configure the app (review-status catalogue, custom metrics, labels, role mappings, retention, embedded dashboard, role↔group mapping) +- **`RULE_APPROVER`** — move rules from `proposed` to `approved` +- **`RULE_AUTHOR`** — create/edit drafts and propose rules +- **`VIEWER`** — read runs, dashboards, results +- **`RUNNER`** (orthogonal) — can execute approved rules on-demand + +Roles are resolved from **Databricks workspace-group membership** via the `dq_role_mappings` table. Customers map their existing IAM groups onto DQX roles — we never invent a new identity system. + +### 4.2 Who can do what on a table they don't own? +- **Browse a table in the UI**: the user needs `USE CATALOG` + `USE SCHEMA` + `SELECT` on the table (standard UC). The UI uses OBO, so the catalog tree only shows what the user can already see. +- **Author a rule against a table**: same — OBO read access is required to author and to dry-run. +- **Approve a rule**: the `RULE_APPROVER` role inside DQX Studio, regardless of table ownership. This is governance-by-process; the data owner does *not* need to use the app. +- **See a run's quarantine rows**: catalog-filtered server-side. The runs list and the quarantine table both gate on the user's UC-accessible catalogs. + +### 4.3 What permissions does the app's service principal need? +Scoped tight: + +- `USE CATALOG` + `USE SCHEMA` + `ALL PRIVILEGES` on the two DQX schemas (`dqx_studio`, `dqx_studio_tmp`) only +- `READ VOLUME` + `WRITE VOLUME` on the wheels volume only +- `CAN USE` on the SQL warehouse the app is bound to +- `Service Principal: User` role on the task-runner SP (so the app can submit jobs as it) + +It is **not** a workspace admin and **not** a metastore admin. If you remove the app, those grants are the only blast radius. + +### 4.4 What's the deployer's permission burden? +Documented as a table in `DEPLOYMENT.md` — about 10 line items, the bulk of which collapse if the deployer is added to a UC-admin group. The single most common failure on first deploy is missing `MANAGE` on the target catalog (the `post_deploy_grants.sh` script needs it to GRANT to the app SP). We surface that error explicitly with a fix. + +### 4.5 Is everything audited? +Yes, but the auditing is **distributed** across the platform rather than in one DQX log: + +- **Rule edits** → `dq_quality_rules_history` +- **Schedule edits** → `dq_schedule_configs_history` +- **Review-status changes** → `dq_run_review_status_history` +- **Job submissions** → Databricks Jobs UI / `system.lakeflow.jobs` +- **Table reads (OBO)** → UC audit logs (the standard ones) +- **App requests** → Databricks Apps logs + +We didn't invent a parallel audit store because UC + Jobs already audit everything we'd put in one. + +--- + +## 5. Operations + +### 5.1 What's the upgrade story? +`make app-deploy` is idempotent. Bundle changes go through `databricks bundle deploy`; schema changes go through versioned, idempotent migration runners (`MigrationRunner` for Delta, `PgMigrationRunner` for Lakebase). On every app start the wheel hash gates whether a re-upload + job-environment patch is needed. + +You re-run the same command. If you forgot to update a variable, the bundle complains; it doesn't silently mutate state. + +### 5.2 What happens if `bundle destroy` runs accidentally? +The three stateful resource keys — `schemas.main_schema`, `volumes.wheels`, `database_instances.lakebase` — carry `lifecycle.prevent_destroy: true`. The CLI refuses to drop them. To intentionally tear those down you have to edit `databricks.yml`, `unbind` the resource, and destroy it manually — a three-step opt-in. + +### 5.3 What about backup? +Three layers, all leveraging native Databricks: + +- **Delta tables** (runs, metrics, quarantine): UC's time travel + Delta retention. Standard backup story. +- **Lakebase Postgres**: managed by the Lakebase service (snapshots, PITR depending on the tier). The instance is the unit of backup. +- **Bundle source of truth**: `databricks.yml` lives in your VCS. The whole deployment is reproducible from source + a Postgres restore. + +### 5.4 What's the cost footprint? +Three line items: + +- **App compute** — Databricks Apps charges a flat hourly rate per running app. The FastAPI process is small (one or two replicas). +- **Lakebase instance** — runs continuously; size is small (the rules catalog is in the low-thousands of rows for most customers). +- **Task-runner job (serverless)** — only billed when validations run. Cost scales with row count and check complexity; profiler/dryrun is bounded, scheduled is what you make it. + +The app does **not** keep a SQL warehouse warm. The Lakeview dashboard auto-stops between uses. + +### 5.5 How do we monitor it? +- App-level logs in Databricks Apps UI +- Job-level logs in Jobs UI (each task-runner submission shows up as a run) +- UC audit logs for table access +- `dq_validation_runs` is itself a run history table — `SELECT … WHERE status = 'FAILED'` is your alert query if you want one +- The Insights dashboard ships KPIs for run health, error trends, top-failing tables + +--- + +## 6. Comparisons (FAQ-style) + +### 6.1 How does DQX Studio compare to Soda / Great Expectations? +Both are rule-driven DQ frameworks; Soda is the closer comparison (commercial product with a UI; Great Expectations is library-first). For Soda specifically, see [Section 11 — Soda deep dive](#11-soda-comparison--deep-dive) which is the answer this customer is actually here for. + +The short version: Soda is broader (multi-source, anomaly detection, alerts/incidents, SaaS UI); DQX Studio is **deeper inside Databricks** (PySpark-native, UC-aware OBO, Delta-resident quarantine, single workspace plane). Customers serving multiple warehouses often pick Soda; customers all-in on Databricks tend to land on DQX. + +### 6.2 How does it compare to Monte Carlo / Anomalo (SaaS observability)? +Those tools shine at automatic anomaly detection across many sources by indexing metadata in a vendor cloud. DQX Studio is the opposite end of the spectrum: rule-driven, single-source-of-truth-is-UC, sovereign. Customers tend to use both — DQX for *explicit* rules they care about, MC/Anomalo for *implicit* drift detection across the catalog. We do not pretend to replace anomaly detection today. + +### 6.3 How does it compare to Collibra / Atlan? +Different layer. Collibra/Atlan are governance catalogs. DQX Studio is a quality control plane. They integrate cleanly: Atlan/Collibra reference UC tables; DQX Studio writes quality scores into UC; the catalog renders them. + +### 6.4 What about SDP / DLT Expectations? +Expectations are inline with pipelines. DQX rules sit outside the pipeline and validate the *result* — useful when: + +- You don't own the pipeline (it's a vendor / partner load) +- You want post-hoc validation independent of pipeline framework +- You want a UI for non-data-engineers to author rules + +The two coexist. DLT Expectations are great inside DLT pipelines; DQX Studio is the catch-all for everything else. + +--- + +## 7. Roadmap-ish questions + +### 7.1 What's the cross-metastore story? +Today: one DQX Studio per metastore. We're not solving multi-metastore authoring in this version — the trade-off would be moving metadata out of Databricks, which kills the sovereignty story. If a customer needs a federated view across metastores, they roll up `dq_metrics` from each instance into a central Delta share and dashboard on it. + +### 7.2 Anomaly / ML quality? +On the roadmap conceptually, not committed. DQX has the metric infrastructure (`dq_metrics` is a long-format event store with `rule_set_fingerprint` provenance), so a Z-score / forecast-residual layer on top is feasible. We'd prefer the customer's appetite to drive that than ship a half-baked anomaly detector. + +### 7.3 API surface for external automation? +Already there. Everything the UI does is REST under `/api/v1/*`. The OpenAPI spec is published, the React frontend is generated from it (`orval`), so external scripts get the same typed surface. Customers have used this to scaffold rules from dbt manifests. + +### 7.4 Embedded dashboards beyond the default? +The bundle ships a starter Lakeview dashboard pinned to the *Insights* page. Customers customise it in **AI/BI Dashboards** (the iframe picks up changes immediately, no redeploy) or point Insights at a completely different dashboard ID via **Configuration → Insights dashboard**. + +--- + +## 8. Security & compliance + +### 8.1 Where's PII? +Two places, both Delta tables in the customer catalog: + +- `dq_quarantine_records` — failing rows, by definition contain the raw values that broke the rule +- `dq_profiling_results.sample_invalid_json` — a 10-row sample from profiler/dry-run for UI preview + +Both inherit UC's access control. The app filters quarantine reads by the user's UC-accessible catalogs server-side. To hide PII from the UI, restrict the `SELECT` grant on those tables; the app respects that automatically (OBO). + +### 8.2 Network egress? +None. The app's outbound calls are: + +- Databricks REST API (UC, Jobs, Apps, Secrets) over the workspace's hostname +- Lakebase Postgres (over the workspace VNet) +- That's it. There is no third-party CDN, telemetry endpoint, or vendor cloud. + +### 8.3 How are secrets managed? +The task-runner SP's OAuth client secret is the only one. It's stored in Databricks Secret Scopes, injected into the job's environment, never logged. The bundle's `make app-deploy` flow walks the deployer through minting it (or reuses an existing one). + +### 8.4 Compliance certifications? +DQX Studio inherits the workspace's certifications — there is no separate compliance scope because there is no separate plane. If your workspace is HIPAA / FedRAMP / etc., DQX Studio inherits those controls because every byte lives inside it. + +--- + +## 9. The customer's likely follow-ups + +Pre-empt these — we've heard them: + +- **"Can we run this in our own VPC?"** — Yes, automatically. It's a Databricks App in your workspace. +- **"Does it work with Unity Catalog Federation?"** — UC Federation tables (foreign tables to Snowflake/BigQuery/Postgres) show up in the catalog tree and can be authored against. Performance depends on the federation source, not on DQX. +- **"Can rules be promoted across environments (dev → prod)?"** — Today: export YAML from one deployment, commit, import to another. We don't have an automated promotion pipeline in the UI yet. +- **"What if Lakebase isn't enabled in our workspace?"** — Set `lakebase_enabled: false` (or leave `lakebase_instance_name` empty). Migrations fall back to Delta automatically. The app still works. +- **"Why a service principal instead of just our personal token?"** — Scheduled runs at 3 AM can't carry a user token. We need a stable identity. The OBO model still ensures *interactive* requests use the user. + +--- + +## 10. Things to demo (in order) + +If the customer wants a 20-minute walkthrough: + +1. **Profiler** — point at one of their actual tables, get suggestions in 30 seconds +2. **Promote → Author → Dry run** — go from suggestion to approved rule with one round-trip +3. **Schedule** — set it to run hourly; show the run hit immediately +4. **Run detail** — sample bad rows, comments, review status dropdown, label by team +5. **Insights dashboard** — the embedded AI/BI iframe, then jump out to AI/BI and edit a tile to show the live update path +6. **Configuration page** — show how every operational knob (labels, statuses, custom metrics, retention, role mappings, dashboard ID) is admin-configurable without redeploy + +--- + +## 11. Soda comparison — deep dive + +Most customers evaluating both have already piloted Soda Core / Soda Cloud and want to know: *what changes if we switch?* This section is structured around the questions Soda users actually ask. **It's written to be honest about where Soda wins**, because the customer has almost certainly already read Soda's marketing and will catch handwaving immediately. + +### 11.1 What is Soda, briefly, so we're talking about the same thing? +Three products under one brand: + +- **Soda Core** (OSS) — Python library + CLI; you author checks in `SodaCL` (their YAML DSL) and run `soda scan` against a data source. Results print or POST to Soda Cloud. +- **Soda Library** (commercial) — the same engine plus the connector to Soda Cloud and additional check types (reconciliation, anomaly detection, MLOps). +- **Soda Cloud** (SaaS) — the web UI: datasets, incidents, alerts, anomaly dashboards, Slack/PagerDuty integrations, collaboration features. + +Soda's compute model: the library translates `SodaCL` into SQL, runs it against your warehouse (Databricks via JDBC is one of ~15 connectors), and ships the **metric values + failing-row samples** back to Soda Cloud. Your raw data stays in the warehouse; the *results* live in their SaaS. + +### 11.2 What does the customer actually gain by picking Soda over DQX Studio? +Stated up front, because it's real: + +1. **Multi-source out of the box.** ~15 connectors (Snowflake, BigQuery, Redshift, Postgres, MSSQL, Spark, Databricks, …). If the customer has a heterogeneous stack — Snowflake + Databricks + Postgres — Soda monitors all three from one UI. DQX Studio is Databricks-only. +2. **Anomaly detection** (Sherlock algorithm). Soda Cloud auto-detects "row count dropped 30 % vs the last 14 days" without you writing a rule. DQX is rule-driven; you'd need to write the equivalent expectation manually (and tune it). +3. **Incident management.** Soda Cloud has first-class incidents — assign to a person, link to a Slack thread, mark resolved, see MTTR over time. DQX has comments + review status, which is enough for triage but isn't an incident workflow. +4. **Alerting.** Soda Cloud routes check failures to Slack / Teams / PagerDuty / email / webhooks. DQX has scheduled runs and a dashboard; **there is no built-in alerting today** — you'd build it from `dq_validation_runs` (e.g. a SQL alert on a Databricks dashboard, or a downstream Lakeflow job). Honest gap. +5. **Time-tested at scale.** Soda has been shipping since ~2020 with a dedicated team. DQX is younger and field-maintained (Databricks Labs). + +If the customer needs any of those five things, **Soda is the better choice** and we should say so. The next question (11.3) is where DQX usually wins back. + +### 11.3 What does the customer gain by picking DQX Studio? + +1. **No data ever leaves the workspace.** Soda Cloud is SaaS — even with the on-prem Soda Agent, metric values and failed-row samples travel to their cloud. DQX Studio is *entirely* in the customer's Databricks account; quarantine rows land in their UC, OLTP state in their Lakebase, no telemetry phones home. For regulated industries (banking, healthcare, public sector) this is often the decisive factor. +2. **OBO permission enforcement.** A user authoring or dry-running a rule against table X can only do it if **they** have `SELECT` on X. Soda runs every scan as the **connection identity** — typically one service account with broad SELECT. DQX is fine-grained; Soda is coarse. +3. **PySpark-native execution.** DQX checks run as Spark jobs in the customer's serverless compute, parallelised, with native Delta predicates. Soda Library translates checks into SQL strings shipped over JDBC — for a billion-row table this is slower and more expensive than a serverless Spark scan, especially for distinct-count / referential-integrity / cross-table checks. +4. **Quarantine is a Delta table.** When a DQX check fails, the offending rows are persisted to `dq_quarantine_records` in UC. Downstream teams can `JOIN dq_quarantine_records WITH the original source` in plain SQL to investigate, build remediation pipelines, feed bug-tracker tickets — anything you'd do with a normal Delta table. Soda stores **samples** (default 100 rows) in Soda Cloud; the full failing set is not retained. +5. **Cost.** DQX Studio is open source — the customer pays Databricks Apps + serverless job + Lakebase, all of which they already have on their bill. Soda Cloud is per-dataset / per-check / per-seat pricing on top of the warehouse compute they pay Soda's scan to consume. For Databricks-heavy estates this difference is material. +6. **Integration with the surrounding stack.** DQX writes to UC; UC feeds AI/BI dashboards; rule metadata sits next to lineage; access is governed by the same UC permissions every other tool respects. Soda is an island that integrates *with* the warehouse, not *as* part of it. +7. **UC-native authoring UX.** Browsing catalogs/schemas/tables in DQX Studio uses live UC reads as the logged-in user, so authors see *exactly* what they have access to. Soda's UI builds against the connections an admin configured — there's a layer of translation between "what's in the warehouse" and "what's in Soda." + +### 11.4 Direct feature-by-feature +The table the customer is going to ask for, side by side: + +| Capability | Soda Cloud + Library | DQX Studio | +|---|---|---| +| Rule DSL | SodaCL (YAML) | DQX YAML (compatible with [`databrickslabs/dqx`](https://github.com/databrickslabs/dqx)) | +| UI for rule authoring | Yes (Soda Cloud) | Yes (form + YAML modes in app) | +| Profiler / suggested rules | Yes (column profile) | Yes (`dq_profiling_results`) | +| Cross-table SQL rules | `failed_rows_query` | Native `sql` check type, persisted to `dq_quarantine_records` | +| Anomaly detection | Yes (Sherlock, on metric history) | **No** (rule-driven only today) | +| Reconciliation checks | Yes (Soda Library) | Yes via cross-table SQL | +| Schedules | Soda Agent + cron, or CI/CD | In-process scheduler (cron/interval) + serverless job | +| Approval workflow (author → approver) | No formal split | Yes (`RULE_AUTHOR` / `RULE_APPROVER` roles) | +| Run history | Yes (Soda Cloud retention) | Yes (Delta — retention is whatever you set, default unbounded) | +| Quarantine of failing rows | Samples (default 100 in Soda Cloud) | Full set in Delta UC table (capped at 100k for SQL checks) | +| Comments / review workflow | Incidents (first-class) | Comments + per-run review status with audit | +| Alerts / notifications | Slack / Teams / PagerDuty / email / webhook | **None built-in** (use Databricks SQL Alerts on `dq_validation_runs`) | +| Dashboards | Soda Cloud built-in | Embedded Lakeview/AI-BI; fully customisable | +| Multi-source | ~15 warehouses | Databricks (UC) only | +| Identity for scans | Single connection identity (typically a SP) | OBO per-user for interactive; SP for scheduled | +| Data residency | Soda Cloud SaaS (or Agent if licensed) | 100 % customer Databricks account | +| Audit trail | Soda Cloud audit | Distributed: `*_history` tables + UC + Jobs | +| Cost model | Per-dataset/check/seat (SaaS) + warehouse compute | Databricks compute only (open source app) | +| Open source | Soda Core only (limited) | Yes, all of it | +| Cross-workspace / cross-metastore | Yes (single Soda Cloud over many connections) | No — one instance per metastore | +| API / programmatic surface | Soda Cloud API + Python | OpenAPI under `/api/v1/*`, typed React client generated | + +### 11.5 "Can we migrate rules from Soda to DQX?" +**Mostly yes, with rewrites — there is no one-shot importer.** SodaCL and DQX YAML are structurally similar (both are check-list YAML rooted at a table) but the check function names and parameters differ. A typical migration path: + +1. Export the SodaCL checks for the Databricks-targeted datasets. +2. For each check, map it: `missing_count = 0` → DQX `is_not_null`, `invalid_count(col) using regex … = 0` → DQX `regex_match`, `failed_rows_query` → DQX `sql` check. Most map 1:1. +3. The non-mappable checks are exactly the ones DQX doesn't have today: anomaly detection, change-detection-vs-history. Those need a manual replacement (write the rule explicitly, or accept it as a gap). +4. Bulk-import the rewritten YAML through the DQX Studio rule API or the rules UI's YAML mode. + +A field engineer can usually rewrite ~50 SodaCL checks in an afternoon if they're standard shapes. For very large estates we'd consider scripting it, but there's no productised tool today. + +### 11.6 "Can we run both side-by-side during the migration?" +Yes, with no conflicts. Soda reads through JDBC; DQX runs Spark jobs. They don't share state or coordinate on rule definitions, so there's nothing to deconflict — the only consideration is double-billed warehouse / serverless compute during the overlap period. We recommend running both for one to two weeks against the same critical tables to verify equivalent rule outcomes before turning Soda off. + +### 11.7 "Soda has anomaly detection. Is the lack of it a blocker?" +Honest answer: **it depends on what they're using it for**. + +- If they rely on anomaly detection as the *primary* DQ signal ("we don't write rules, we let Soda find weirdness"), DQX is not a like-for-like swap today. They should keep an anomaly tool — Soda, Monte Carlo, Anomalo — alongside DQX. +- If they use anomaly detection as a *safety net* on top of explicit rules ("our rules catch 80 %, Sherlock catches the 20 % we forgot"), they can get most of that 20 % through good rule discipline + the DQX profiler. The metric infrastructure in DQX (`dq_metrics` with `rule_set_fingerprint` provenance) is anomaly-detection-ready — a Z-score or forecast-residual layer on top is feasible as a customer-side notebook today. + +The roadmap honest answer: anomaly detection is conceptually on our roadmap, not committed. We'd rather not promise a date. + +### 11.8 "Soda has alerts to Slack. We need that." +Real gap. The pragmatic answer in Databricks today: + +- **`dq_validation_runs`** is a normal Delta table. Run a **Databricks SQL Alert** on `SELECT count(*) FROM dq_validation_runs WHERE status = 'FAILED' AND created_at > current_timestamp() - INTERVAL 1 HOUR` and route it to Slack/Teams/email via the standard SQL Alerts destination plumbing. +- Same for "run with > N error rows": `SELECT … WHERE error_rows > `. +- For richer routing (per-table owner, per-severity), a Lakeflow job downstream of `dq_metrics` is a few hours of work. + +Native alerting *inside* DQX Studio is a fair future ask; right now it's "use the platform's alerting layer." + +### 11.9 "Soda Cloud has incidents — do you?" +DQX Studio has the two pieces that matter for triage: + +- **Comments** on every run (a `dq_comments` thread, just like comments on a rule) +- **Review status** per run with an audit trail (the new dropdown we added — see [§3.6](#36-whats-review-status)) + +That covers "acknowledge / under investigation / resolved / false positive" with attribution and history. What it does not cover is: + +- Auto-creating an external ticket (Jira / ServiceNow) — that's a webhook we don't have today. +- A separate incident object that aggregates multiple runs of the same failing rule into one open issue. + +If the customer's DQ model is incident-driven (an on-call rotation triages failures across many checks), Soda has a more polished surface today. If their model is rule-owner-driven ("the team that owns the rule fixes the rule"), DQX is sufficient. + +### 11.10 "What about Soda Agent — they say data doesn't leave our network either?" +The Soda Agent is a containerised executor a customer hosts in their own infrastructure to run scans without the data passing through Soda's cloud. **The check definitions, metric results, and failed-row samples still travel to Soda Cloud** — that's how the UI works. The Agent improves the data path; it does not eliminate the SaaS plane. + +DQX Studio has no SaaS plane at all. Every byte — definitions, results, samples, audit logs — lives in the customer's Databricks account. + +### 11.11 "Pricing — how should we model the comparison?" +Approximate, not a quote: + +- **Soda Cloud**: per-dataset, per-check, or per-seat depending on the contract. Plus the Databricks warehouse compute that runs the scans. +- **DQX Studio**: Databricks Apps (small flat hourly), one small Lakebase instance, serverless job billed only when validating. All on the customer's existing Databricks bill. + +The honest framing: for a small estate (~50 monitored tables), Soda Cloud is often cost-comparable to DQX. For a large estate (hundreds to thousands of tables), DQX scales sub-linearly because there's no per-dataset SaaS fee — only compute, which the customer already accepts. Get the customer to share approximate counts before quoting. + +### 11.12 "Is Soda integration with DQX possible?" +There's no first-party integration. If a customer really wants both — Soda's anomaly detection + alerts plus DQX's UC integration and OBO authoring — the practical pattern is: + +- Run DQX as the authoring + execution layer (its strength) +- Use Soda Cloud against the **same source tables** with the anomaly-detection check types only (its strength) +- Don't double-write the explicit rules — pick one tool per rule + +This is unusual but real for customers who can absorb both bills and want best-of-both. Most pick one. + +### 11.13 The two-line elevator answer +If the customer asks you to summarise the whole comparison in two sentences: + +> **Soda is the right answer if you need multi-source coverage, anomaly detection, or built-in alerting today — and you're comfortable with metadata in their SaaS plane.** **DQX Studio is the right answer if you're Databricks-centric, want every byte to stay in your account, want OBO-level permission enforcement, and prefer rule-driven over anomaly-driven DQ.** + +Most Databricks-all-in customers land on DQX. Most heterogeneous customers (Snowflake + Databricks + Postgres) land on Soda. Neither answer is wrong. diff --git a/app/DEPLOYMENT.md b/app/DEPLOYMENT.md index ef40a3a92..86b90e544 100644 --- a/app/DEPLOYMENT.md +++ b/app/DEPLOYMENT.md @@ -19,20 +19,21 @@ The deploying user (you) needs the permissions below. They are **all** consumed | # | Permission | Granted on | Used by | What fails without it | |---|---|---|---|---| | 1 | **Workspace access** entitlement | You, in the workspace | All CLI calls | `databricks` CLI can't reach the workspace | -| 2 | **Databricks SQL access** entitlement | You, in the workspace | `bundle deploy` (creates the X-Small SQL warehouse) | `Error: not authorized to create SQL Endpoint` | -| 3 | **Allow cluster create** entitlement | You, in the workspace | `bundle deploy` (warehouse + job clusters) | Warehouse / job creation rejected | +| 2 | **Databricks SQL access** entitlement | You, in the workspace | `bundle deploy` — **only** when the target uses the *managed* SQL warehouse pattern (see [SQL warehouse: managed vs. BYO](#sql-warehouse-managed-vs-byo)). Not needed for BYO. | `Error: not authorized to create SQL Endpoint` | +| 3 | **Allow cluster create** entitlement | You, in the workspace | `bundle deploy` — only for the managed warehouse + job clusters. Not needed for BYO warehouse. | Warehouse / job creation rejected | | 4 | **Databricks Apps: Can Manage** workspace permission | You, in the workspace | `bundle deploy` of the App resource | App creation rejected | | 5 | **Databricks Database (Lakebase): Manager** entitlement | You, in the workspace | `bundle deploy` of the `database_instances` resource | `Error: User does not have permission to create database instances` | | 6 | **USE CATALOG** + **CREATE SCHEMA** on `` | Your user or an admin group you're in | `bundle deploy` of the `schemas` and `volumes` resources | `Error: User does not have CREATE_SCHEMA on catalog ''` | | 7 | **MANAGE** on `` (or be the catalog owner) | Your user or an admin group you're in | `post_deploy_grants.sh` (issues `GRANT USE CATALOG / ALL PRIVILEGES … TO ` and `… TO account users`) | `Error: User does not have privilege MANAGE on catalog ''` | -| 8 | **Service Principal: User** role on the task-runner SP | Your user, on the SP you'll use as `dqx_service_principal_application_id` | `bundle deploy` of the `jobs.dqx_task_runner` resource (sets `run_as.service_principal_name`) | `Error: User is not authorized to use this service principal` | -| 9 | **Service Principal: Manager** role on the task-runner SP, *or* a pre-shared OAuth client secret | Your user, on the same SP | Only needed if you want to **mint a fresh OAuth secret yourself** for the task-runner (e.g. via `databricks service-principal-secrets-proxy create `) | `Error: User is not authorized to perform this operation` when minting a new secret | -| 10 | **Account admin** (one-time, post-deploy) | Account level | Updating the app's OAuth custom-app integration to include the `all-apis` scope (see [Expand OAuth Scopes](#optional-expand-oauth-scopes)) | Some app features (job submission, advanced SCIM lookups) return 403 | +| 8 | **CAN_MANAGE** on the SQL warehouse | Your user, on the warehouse bound to the app | `post_deploy_grants.sh` (PATCHes the warehouse permissions API to grant `CAN_USE` to the app SP and task-runner SP). For the *managed* pattern this is automatic — you become the warehouse owner. For the *BYO* pattern an admin must grant you CAN_MANAGE on the existing warehouse. | Logged as `WARNING: PERMISSION_DENIED` from the grants script; UC grants still succeed, but the SPs won't be able to use the warehouse until granted CAN_USE another way. | +| 9 | **Service Principal: User** role on the task-runner SP | Your user, on the SP you'll use as `dqx_service_principal_application_id` | `bundle deploy` of the `jobs.dqx_task_runner` resource (sets `run_as.service_principal_name`) | `Error: User is not authorized to use this service principal` | +| 10 | **Service Principal: Manager** role on the task-runner SP, *or* a pre-shared OAuth client secret | Your user, on the same SP | Only needed if you want to **mint a fresh OAuth secret yourself** for the task-runner (e.g. via `databricks service-principal-secrets-proxy create `) | `Error: User is not authorized to perform this operation` when minting a new secret | +| 11 | **Account admin** (one-time, post-deploy) | Account level | Updating the app's OAuth custom-app integration to include the `all-apis` scope (see [Expand OAuth Scopes](#optional-expand-oauth-scopes)) | Some app features (job submission, advanced SCIM lookups) return 403 | -**Two convenience patterns** that reduce the per-user grants in rows 6 and 7: +**Two convenience patterns** that reduce the per-user grants in rows 6, 7, and 8: - **Make the catalog ownership easy:** ask an admin to add you to an existing UC-admin group that already holds `MANAGE` (or `ALL PRIVILEGES`) on ``. This unlocks rows 6 and 7 in one membership change instead of two per-object grants. -- **Workspace admin shortcut:** if you become workspace admin, rows 1–5 + 8–9 collapse automatically. Rows 6 and 7 (UC) and 10 (account admin) still need to be granted explicitly — workspace admin does **not** confer Unity Catalog or account-level rights. +- **Workspace admin shortcut:** if you become workspace admin, rows 1–5 + 9–10 collapse automatically, and row 8 collapses on any **managed** SQL warehouse (you'll own it). Rows 6 and 7 (UC) and 11 (account admin) still need to be granted explicitly — workspace admin does **not** confer Unity Catalog or account-level rights. For a **BYO** warehouse, row 8 (CAN_MANAGE on that specific warehouse) must still be granted by its current owner. ### Workspace features that must be enabled @@ -102,7 +103,7 @@ make app-bind PROFILE= TARGET= ## Step 4: Configure `databricks.yml` -Update a deploy target. The minimum required is a `catalog_name` and `dqx_service_principal_application_id`; everything else has a sensible default and can be overridden per target. In `app/databricks.yml`: +Update a deploy target. The minimum required is a `catalog_name`, `dqx_service_principal_application_id`, and a SQL warehouse choice (see below); everything else has a sensible default and can be overridden per target. In `app/databricks.yml`: ```yaml targets: @@ -112,10 +113,43 @@ targets: variables: catalog_name: dqx_service_principal_application_id: + # Pick ONE of the two SQL warehouse patterns below + # — see "SQL warehouse: managed vs. BYO" right after this block. presets: trigger_pause_status: PAUSED ``` +### SQL warehouse: managed vs. BYO + +The app needs exactly one SQL warehouse to run UC queries against the Delta side. The bundle supports two patterns — pick whichever matches the permissions you actually have. Both patterns end up at the same place: the warehouse ID is set into the `sql_warehouse_id` variable, the app binds to it, and `post_deploy_grants.sh` grants `CAN_USE` to the app SP and the task-runner SP. + +**Pattern A — Managed warehouse (default for `dev`, `kaizen-dev`, `e2-demo`, `bdf-vo`)** + +The bundle declares an X-Small serverless warehouse as `resources.sql_warehouses.dqx_sql_warehouse` inside the target, and `sql_warehouse_id` is wired to `${resources.sql_warehouses.dqx_sql_warehouse.id}`. Nothing for you to fill in. + +Required permissions on top of the table above: rows 2 (Databricks SQL access) and 3 (Allow cluster create). The deploying user automatically becomes the warehouse owner, so row 8 (CAN_MANAGE) is satisfied for free. + +**Pattern B — Bring Your Own (BYO) warehouse (default for `kaizen-app`)** + +Use this when you don't have permission to create SQL warehouses, or when your platform team standardises on a single shared warehouse. The target keeps `sql_warehouse_id` set to an existing warehouse ID, and the base-level `sql_warehouses` resource is *not* declared. + +```yaml +targets: + kaizen-app: + workspace: + profile: + variables: + catalog_name: + dqx_service_principal_application_id: + sql_warehouse_id: # e.g. "abc123def" + presets: + trigger_pause_status: PAUSED +``` + +Required permissions on top of the table above: row 8 (CAN_MANAGE on that existing warehouse) so the post-deploy script can apply `CAN_USE` grants to both service principals. Rows 2 and 3 are not needed — the bundle doesn't try to create or refresh a warehouse. + +> **What happens if the deployer only has CAN_USE on the BYO warehouse?** The bundle deploy itself still succeeds (the app and job bind to the warehouse ID without touching its permissions). `post_deploy_grants.sh` logs one `WARNING: PERMISSION_DENIED` per missing grant and continues; UC grants still apply. The fix is to either ask the warehouse owner to grant you CAN_MANAGE and re-run `make app-grant-permissions`, or to have them grant `CAN_USE` directly to the app SP and the task-runner SP. Until both SPs hold CAN_USE on the warehouse, any feature in the app that issues SQL via that warehouse will fail with `not authorized to use or monitor this SQL Endpoint`. + ### Variable reference All target-level variables, their defaults, and what they control: @@ -126,7 +160,8 @@ All target-level variables, their defaults, and what they control: | `dqx_service_principal_application_id` | `00000000-…` | **Yes** | Application ID of the service principal that runs the task-runner job. Created in [Step 1](#step-1-create-a-service-principal). The placeholder default fails validation. | | `admin_group` | `admins` | No | Workspace group whose members get the in-app `ADMIN` role unconditionally (bootstrap admin path). The default `admins` is the built-in workspace admins group — every workspace admin becomes a DQX admin automatically. Override with a dedicated group (e.g. `dqx-admins-prod`) for narrower bootstrap access. Additional roles are assigned at runtime via the in-app Role Management UI. | | `app_name` | `dqx-studio` | No | Deployed Databricks App name. Override per target (e.g. `dqx-studio-dev`, `dqx-studio-prod`) when deploying multiple targets to the same workspace, or for personal sandboxes. | -| `sql_warehouse_name` | `dqx-studio-sql-warehouse` | No | Deployed SQL warehouse name (the bundle creates an X-Small serverless warehouse for app queries). Override per target to avoid duplicates in shared workspaces. | +| `sql_warehouse_id` | `${resources.sql_warehouses.dqx_sql_warehouse.id}` (in managed targets) | **Yes** for BYO targets, **No** for managed targets | The warehouse ID the app and task runner connect to. In managed targets this is wired to the bundle-created warehouse. In BYO targets (e.g. `kaizen-app`) set it to the existing warehouse ID you want to share. See [SQL warehouse: managed vs. BYO](#sql-warehouse-managed-vs-byo). | +| `sql_warehouse_name` | `dqx-studio-sql-warehouse` | No | Name of the bundle-created warehouse — **managed pattern only** (ignored under BYO). Override per target to avoid name clashes in shared workspaces. | | `schema_name` | `dqx_studio` | No | Main schema — holds run history, profiling, metrics, quarantine, and OLTP fallback tables. Declared as `resources.schemas.main_schema` in the bundle with `lifecycle.prevent_destroy: true`. | | `tmp_schema_name` | `dqx_studio_tmp` | No | Per-user temp-view schema. Declared as `resources.schemas.tmp_schema` with `lifecycle.prevent_destroy: true`. | | `wheels_volume_name` | `wheels` | No | UC volume under `.` for the DQX + task-runner wheels. Declared as `resources.volumes.wheels` with `lifecycle.prevent_destroy: true`. | @@ -151,8 +186,8 @@ make app-deploy PROFILE= TARGET= `make app-deploy` runs the following steps automatically: 1. `make app-build` — builds the frontend and wheels. -2. `databricks bundle deploy` — provisions or updates the schemas, wheels volume, Lakebase instance, SQL warehouse, task-runner job, and Databricks App in dependency order. Stateful resources carry `lifecycle.prevent_destroy: true` so a future destroy can't drop them — see [Step 3](#step-3-stateful-storage-and-destroy-protection). -3. `app/scripts/post_deploy_grants.sh` — discovers both service principals and executes the `GRANT` statements on the catalog, schemas, and volume (the auto-created app SP's UUID isn't known at bundle-write time, which is why grants live in a post-deploy script). Lakebase grants are handled by the bundle's `database` resource binding. +2. `databricks bundle deploy` — provisions or updates the schemas, wheels volume, Lakebase instance, task-runner job, and Databricks App in dependency order, plus the SQL warehouse if the target uses the managed pattern. Stateful resources carry `lifecycle.prevent_destroy: true` so a future destroy can't drop them — see [Step 3](#step-3-stateful-storage-and-destroy-protection). +3. `app/scripts/post_deploy_grants.sh` — discovers both service principals, then (a) executes `GRANT` statements on the catalog / schemas / volume, and (b) PATCHes the SQL warehouse permissions to add `CAN_USE` for both SPs. The grants script is idempotent and re-runnable. The auto-created app SP's UUID isn't known at bundle-write time, which is why all grants live in the post-deploy script. Lakebase grants are handled directly by the bundle's `database` resource binding. 4. `databricks bundle run` — starts the app. > **First start**: The app runs both Delta and Lakebase database migrations on startup, and uploads DQX wheels to the UC volume. If the task-runner job runs before the app has started at least once, it will fail to find its wheels. Wait for `"Uploaded databricks_labs_dqx-..."` in the logs before triggering runs. If Lakebase is enabled, also wait for `"Lakebase OLTP routing enabled"` before opening the UI — the app falls back to UC-only mode if Lakebase init fails (logged as `"Lakebase initialisation failed — falling back to Delta for OLTP tables"`). @@ -169,10 +204,12 @@ make app-build make app-bind PROFILE= TARGET= # Deploy the bundle (creates / updates schemas, volume, Lakebase -# instance, SQL warehouse, task-runner job, app) +# instance, task-runner job, app — plus the SQL warehouse in +# managed targets; BYO targets reuse the warehouse ID you set) cd app && databricks bundle deploy -p -t -# Grant permissions to the app SP (auto-discovered after deploy) +# Grant permissions to the app SP and task-runner SP (auto-discovered +# after deploy): UC catalog/schema/volume grants + warehouse CAN_USE. make app-grant-permissions PROFILE= TARGET= # Start the app @@ -181,7 +218,9 @@ cd app && databricks bundle run dqx-studio -p -t ### Manual grants (if the script doesn't work for your setup) -The grant script discovers both SPs automatically. If you need to run the SQL manually instead: +The grant script discovers both SPs automatically. If you need to apply the grants by hand instead, you need to do **two** things — the UC grants (SQL) and the warehouse `CAN_USE` grants (Permissions API). + +**1. Unity Catalog grants (SQL):** ```sql -- : the app's auto-created SP (find it in Apps → Settings → Service principal) @@ -203,6 +242,21 @@ GRANT ALL PRIVILEGES ON VOLUME .dqx_studio.wheels TO ``; GRANT USE CATALOG ON CATALOG TO `account users`; ``` +**2. SQL warehouse `CAN_USE` grants (Permissions API):** + +Both SPs need `CAN_USE` on the warehouse the app is bound to (the same warehouse ID set via `sql_warehouse_id`). In the workspace UI, open **SQL Warehouses → `` → Permissions** and add `CAN_USE` for each SP. Or via the CLI: + +```bash +databricks api patch /api/2.0/permissions/warehouses/ --json '{ + "access_control_list": [ + {"service_principal_name": "", "permission_level": "CAN_USE"}, + {"service_principal_name": "", "permission_level": "CAN_USE"} + ] +}' +``` + +The PATCH verb is additive (it does not replace existing ACLs), and the call is safe to re-run — this is the exact call `post_deploy_grants.sh` makes. + > **Lakebase grants are handled differently.** When Lakebase is enabled, the bundle binds the database to the app via a `database` resource block (`permission: CAN_CONNECT_AND_CREATE`). DABs translates that into the equivalent Postgres role grants automatically — there is no separate SQL to run. The first time the app connects, `PgMigrationRunner` creates its own schema and tables inside the Lakebase database. To grant app access to end users, go to **Apps → `` → Permissions** and assign `Can Use`. Replace `` with the value of `app_name` configured for your target (default `dqx-studio`). @@ -307,6 +361,29 @@ databricks apps logs -p # logs databricks apps stop -p # stop ``` +## Insights dashboard + +The bundle ships a starter AI/BI dashboard (`dashboards/dqx_quality_overview.lvdash.json`) declared as `resources.dashboards.dqx_quality_overview` in `databricks.yml`. It's automatically created on deploy and pinned to the app's **Insights** page via the `DQX_DEFAULT_DASHBOARD_ID` env var, so the page works out-of-the-box. + +**What you get**: a four-row layout with KPI counters (total runs, monitored tables, total errors, pass rate), trend charts (runs over time by status; errors & warnings over time), drilldowns (top failing tables; quarantined rows over time), and a recent-runs table. + +**Customising the starter**: open it in **Databricks → AI/BI Dashboards**, add or change widgets, and save. The iframe inside DQX Studio picks up changes immediately — no redeploy needed. You can also point the Insights page at a completely different dashboard via **Configuration → Insights dashboard**; clearing that override reverts to the starter. + +**Query identity**: the dashboard is configured with `embed_credentials: true`, so queries run as the bundle deployer rather than the iframe viewer. This is deliberate — the bundle only grants `USE CATALOG` (not table-level `SELECT`) to `account users`, keeping `dq_quarantine_records` (potentially PII row payloads) off the workspace UC surface. The widgets in the starter only expose aggregated counts and run metadata, so deployer-credentialed queries don't leak anything a viewer couldn't already see in the Runs History page. To switch to viewer-credentials, flip `embed_credentials` to `false` and grant `SELECT` on the DQX tables to the audience you want to expose. + +`scripts/post_deploy_grants.sh` automatically grants the deployer `USE SCHEMA` + `SELECT ON SCHEMA .` after every `make app-deploy`, so the dashboard works end-to-end without manual UC plumbing. It resolves the deployer identity from `databricks current-user me` so it works for both human deploys (grants the email) and SP-based deploys (grants the application ID). + +**One operational caveat**: the "deployer" identity is whoever ran `databricks bundle deploy` (the human or service principal authenticated to the workspace at deploy time). If that identity later loses access to the DQX tables, dashboard tiles will fail to render until someone with access redeploys. For production, deploy the bundle as a stable service principal so the dashboard identity doesn't follow individual humans. + +## Run review status + +DQX Studio lets reviewers attach a per-run **review status** (e.g. *Pending review*, *Acknowledged*, *Resolved*, *False positive*) to each validation run from the expanded row on the **Runs History** page. The same value is filterable from the toolbar so a business owner can ask "what's still pending?" in one click. + +- **Configurable catalogue** — admins manage the list of allowed values (label, description, colour) under **Configuration → Run review statuses**. Exactly one entry must be marked **Default**; that value is what unreviewed runs surface virtually (no row is written until someone explicitly reviews). The backend enforces the single-default invariant on save. +- **Audit trail** — every change appends to `dq_run_review_status_history`, surfaced as an "Activity" timeline inside the review-status panel. The current value lives in `dq_run_review_status` (one row per reviewed run). +- **Storage** — both tables are OLTP-shaped (single-key lookups, frequent mutation), so they live in Lakebase when it's enabled and fall back to Delta otherwise via the same `oltp_fallback` plumbing used by comments and role mappings. No extra deployment configuration is needed. +- **Permissions** — any authenticated app user can set or change a review status, mirroring how comments work. Only admins can edit the catalogue itself. + ## Troubleshooting **"App with name X does not exist or is deleted":** @@ -350,8 +427,46 @@ See [Migrating an existing workspace](#migrating-an-existing-workspace). If the conflict is specifically `"Instance name is not unique"` for the Lakebase instance and the instance does NOT appear in `databricks database list-database-instances`, it's likely in the ~7-day soft-delete retention window (the name stays reserved). Edit your target in `databricks.yml` and override `lakebase_instance_name: `, then deploy. +**`databricks bundle deploy` fails with `not authorized to create SQL Endpoint`:** +The target is configured for the managed warehouse pattern but the deployer doesn't have the `Databricks SQL access` + `Allow cluster create` entitlements (rows 2 and 3). Either ask an admin to grant those, or switch the target to **BYO** by setting `sql_warehouse_id: ` in `databricks.yml` (and removing or commenting out the target's `resources.sql_warehouses.dqx_sql_warehouse` block). See [SQL warehouse: managed vs. BYO](#sql-warehouse-managed-vs-byo). + +**`post_deploy_grants.sh` logs `WARNING: PERMISSION_DENIED` on `/api/2.0/permissions/warehouses/...`:** +The deployer doesn't hold `CAN_MANAGE` on the warehouse bound to the target — row 8 in the permissions table. The script reports the warning and continues; the rest of the grants (UC catalog / schema / volume / account users) still apply. To fix: ask the warehouse owner to either (a) grant you CAN_MANAGE and re-run `make app-grant-permissions`, or (b) grant `CAN_USE` directly to the app SP and the task-runner SP shown in the script's preceding log lines. App features that rely on the warehouse will return `not authorized to use or monitor this SQL Endpoint` until both SPs have CAN_USE. + +**App returns `not authorized to use or monitor this SQL Endpoint`:** +The app SP doesn't have `CAN_USE` on the warehouse pointed to by `sql_warehouse_id`. Re-run `make app-grant-permissions PROFILE= TARGET=`; if you see `PERMISSION_DENIED` in that re-run, see the previous entry — the deployer needs CAN_MANAGE on the warehouse. The same applies symmetrically to the task-runner SP when the job hits it. + **`databricks bundle destroy` fails with `"cannot destroy resource: prevent_destroy is set"`:** This is the safety guard doing its job — see [Step 3](#step-3-stateful-storage-and-destroy-protection). To intentionally tear down a stateful resource, remove `lifecycle.prevent_destroy: true` from the relevant block in `databricks.yml`, run `databricks bundle deployment unbind -t ` to detach it from bundle state, then destroy it manually with `databricks schemas delete` / `databricks volumes delete` / `databricks database delete-database-instance`. **Lakebase queries time out / app logs show pool exhaustion:** Bump `lakebase_capacity` from `CU_1` to `CU_2` (or higher) in `databricks.yml` and redeploy. You can also raise `DQX_LAKEBASE_POOL_MAX_SIZE` (default 10) on the app's environment if many concurrent requests are hitting the OLTP path. + +**Insights page shows "No dashboard configured" right after deploy:** +The Insights page reads `DQX_DEFAULT_DASHBOARD_ID` (set by the bundle to `${resources.dashboards.dqx_quality_overview.id}`). If it's empty in the deployed app, the dashboards resource didn't materialise — usually because the Databricks CLI is older than `0.283.0` (required for `dataset_catalog` / `dataset_schema`). Upgrade the CLI, redeploy, and confirm: +```bash +databricks --version # expect ≥ 0.283.0 +databricks bundle validate -p -t -o json | jq '.resources.dashboards' +databricks apps get -p -o json | jq '.config.env[] | select(.name == "DQX_DEFAULT_DASHBOARD_ID")' +``` + +**Insights iframe is blank / shows a Databricks login screen:** +The viewer's session cookies aren't being passed through to the iframe. Usually one of: (a) the user opened DQX Studio in an incognito window where they aren't signed into Databricks, (b) the workspace is on a different host than the app expects, or (c) the workspace blocks cross-frame embedding for that path. Confirm the constructed embed URL works directly: +```bash +# In the running app, the iframe loads: +# https:///embed/dashboardsv3/ +# Test this URL directly in the same browser session. +``` +If it loads in a normal tab but not in the iframe, the workspace has a Content-Security-Policy that blocks iframe embedding — contact your workspace admin. + +**Insights tiles show `[INSUFFICIENT_PERMISSIONS] Principal '...' does not have SELECT on Table ...`:** +The deployer identity executing the dashboard queries (because `embed_credentials: true`) doesn't have SELECT on the DQX tables. `post_deploy_grants.sh` covers this by default, but it can fail silently if `databricks current-user me` doesn't return a `userName` / `applicationId`, or if the script wasn't re-run after switching deploy identities. Either re-run the script, or grant manually against any warehouse you can use: +```bash +databricks api post /api/2.0/sql/statements -p \ + --json '{ + "warehouse_id": "", + "statement": "GRANT USE SCHEMA, SELECT ON SCHEMA ``.`` TO ``", + "wait_timeout": "30s" + }' +``` +`SELECT ON SCHEMA` propagates to every existing and future table in the schema, so you don't need to repeat it for new DQX tables added by migrations. diff --git a/app/README.md b/app/README.md index 6023dad85..48189a665 100644 --- a/app/README.md +++ b/app/README.md @@ -137,7 +137,7 @@ The app aligns with the [DQX Summary Metrics spec](https://github.com/databricks **Read path.** `GET /api/v1/metrics/{table_fqn}` joins `dq_metrics` to `dq_validation_runs` on `run_id` and pivots the long-format rows back into the wide-format `MetricSnapshotOut` the existing UI consumes — the chart and table components keep working unchanged. New optional fields (`check_metrics`, `custom_metrics`, `rule_set_fingerprint`, `error_row_count`, `warning_row_count`) are exposed for future UI surfaces. -**Quarantine for SQL / cross-table rules.** Cross-table SQL checks now persist their full violation set to `dq_quarantine_records` (with a `{: "SQL check violation"}` synthetic `errors` payload to match the shape DQX produces for column checks), capped at `_SQL_QUARANTINE_MAX_ROWS=100_000` to bound storage on runaway rules whose violation set is the entire joined dataset. The true violation count remains accurate in `dq_metrics.error_row_count` even when truncation kicks in. The runs UI's full CSV/Excel export (which reads from `dq_quarantine_records`) now works for SQL checks; previously they only had the 10-row `sample_invalid_json` fallback and 99 %+ of violations were lost. +**Quarantine for SQL / cross-table rules.** Cross-table SQL checks now persist their full violation set to `dq_quarantine_records` (with a `[{"name": "", "message": "SQL check violation"}]` synthetic `errors` payload that mirrors DQX's public `dq_result_item_schema` — list-of-structs, same shape row-level checks produce — so the Pydantic `QuarantineRecordOut.errors: list[Any]` validates cleanly and the UI's per-row Errors column renders without special-casing SQL checks), capped at `_SQL_QUARANTINE_MAX_ROWS=100_000` to bound storage on runaway rules whose violation set is the entire joined dataset. The true violation count remains accurate in `dq_metrics.error_row_count` even when truncation kicks in. The runs UI's full CSV/Excel export (which reads from `dq_quarantine_records`) now works for SQL checks; previously they only had the 10-row `sample_invalid_json` fallback and 99 %+ of violations were lost. Legacy rows written with the old `{: }` dict shape are coerced to the new list shape by `quarantine._row_to_record` so historical data continues to display. ## Stack diff --git a/app/dashboards/dqx_quality_overview.lvdash.json b/app/dashboards/dqx_quality_overview.lvdash.json new file mode 100644 index 000000000..648de0c61 --- /dev/null +++ b/app/dashboards/dqx_quality_overview.lvdash.json @@ -0,0 +1,625 @@ +{ + "datasets": [ + { + "name": "ds_runs", + "displayName": "validation_runs (30 days)", + "queryLines": [ + "SELECT\n", + " run_id,\n", + " source_table_fqn,\n", + " status,\n", + " run_type,\n", + " total_rows,\n", + " valid_rows,\n", + " invalid_rows,\n", + " error_rows,\n", + " warning_rows,\n", + " requesting_user,\n", + " created_at\n", + "FROM dq_validation_runs\n", + "WHERE created_at >= current_date() - INTERVAL 30 DAYS" + ] + }, + { + "name": "ds_quarantine", + "displayName": "quarantine_records (30 days)", + "queryLines": [ + "SELECT\n", + " quarantine_id,\n", + " run_id,\n", + " source_table_fqn,\n", + " requesting_user,\n", + " created_at\n", + "FROM dq_quarantine_records\n", + "WHERE created_at >= current_date() - INTERVAL 30 DAYS" + ] + } + ], + "pages": [ + { + "name": "overview", + "displayName": "Overview", + "layout": [ + { + "widget": { + "name": "kpi_total_runs", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_runs", + "fields": [ + { + "name": "total_runs", + "expression": "COUNT(`run_id`)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 2, + "widgetType": "counter", + "encodings": { + "value": { + "fieldName": "total_runs", + "displayName": "Total runs" + } + }, + "frame": { + "showTitle": true, + "title": "Total runs (30d)" + } + } + }, + "position": { + "x": 0, + "y": 0, + "width": 3, + "height": 3 + } + }, + { + "widget": { + "name": "kpi_monitored_tables", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_runs", + "fields": [ + { + "name": "monitored_tables", + "expression": "COUNT(DISTINCT `source_table_fqn`)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 2, + "widgetType": "counter", + "encodings": { + "value": { + "fieldName": "monitored_tables", + "displayName": "Tables" + } + }, + "frame": { + "showTitle": true, + "title": "Tables under monitoring" + } + } + }, + "position": { + "x": 3, + "y": 0, + "width": 3, + "height": 3 + } + }, + { + "widget": { + "name": "kpi_total_errors", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_runs", + "fields": [ + { + "name": "total_errors", + "expression": "COALESCE(SUM(`error_rows`), 0)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 2, + "widgetType": "counter", + "encodings": { + "value": { + "fieldName": "total_errors", + "displayName": "Error rows" + } + }, + "frame": { + "showTitle": true, + "title": "Total error rows (30d)" + } + } + }, + "position": { + "x": 6, + "y": 0, + "width": 3, + "height": 3 + } + }, + { + "widget": { + "name": "kpi_pass_rate", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_runs", + "fields": [ + { + "name": "pass_rate_pct", + "expression": "100.0 * SUM(`valid_rows`) / NULLIF(SUM(`total_rows`), 0)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 2, + "widgetType": "counter", + "encodings": { + "value": { + "fieldName": "pass_rate_pct", + "displayName": "Pass rate", + "format": { + "type": "number-plain", + "decimalPlaces": { + "type": "exact", + "places": 1 + } + } + } + }, + "frame": { + "showTitle": true, + "title": "Average pass rate (%, 30d)" + } + } + }, + "position": { + "x": 9, + "y": 0, + "width": 3, + "height": 3 + } + }, + { + "widget": { + "name": "runs_over_time", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_runs", + "fields": [ + { + "name": "run_date", + "expression": "DATE_TRUNC('DAY', `created_at`)" + }, + { + "name": "status", + "expression": "`status`" + }, + { + "name": "run_count", + "expression": "COUNT(`run_id`)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 3, + "widgetType": "bar", + "encodings": { + "x": { + "fieldName": "run_date", + "scale": { + "type": "temporal" + }, + "displayName": "Day", + "axis": { + "title": "Day" + } + }, + "y": { + "fieldName": "run_count", + "scale": { + "type": "quantitative" + }, + "displayName": "Runs", + "axis": { + "title": "Runs" + } + }, + "color": { + "fieldName": "status", + "scale": { + "type": "categorical" + }, + "displayName": "Status", + "legend": { + "title": "Status" + } + } + }, + "frame": { + "showTitle": true, + "title": "Runs over time by status" + } + } + }, + "position": { + "x": 0, + "y": 3, + "width": 6, + "height": 5 + } + }, + { + "widget": { + "name": "errors_warnings_over_time", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_runs", + "fields": [ + { + "name": "run_date", + "expression": "DATE_TRUNC('DAY', `created_at`)" + }, + { + "name": "error_rows_sum", + "expression": "COALESCE(SUM(`error_rows`), 0)" + }, + { + "name": "warning_rows_sum", + "expression": "COALESCE(SUM(`warning_rows`), 0)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 3, + "widgetType": "line", + "encodings": { + "x": { + "fieldName": "run_date", + "scale": { + "type": "temporal" + }, + "displayName": "Day", + "axis": { + "title": "Day" + } + }, + "y": { + "fields": [ + { + "fieldName": "error_rows_sum", + "displayName": "Errors" + }, + { + "fieldName": "warning_rows_sum", + "displayName": "Warnings" + } + ], + "scale": { + "type": "quantitative" + }, + "axis": { + "title": "Rows" + } + } + }, + "frame": { + "showTitle": true, + "title": "Error & warning rows over time" + } + } + }, + "position": { + "x": 6, + "y": 3, + "width": 6, + "height": 5 + } + }, + { + "widget": { + "name": "top_failing_tables", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_runs", + "fields": [ + { + "name": "source_table_fqn", + "expression": "`source_table_fqn`" + }, + { + "name": "total_errors", + "expression": "COALESCE(SUM(`error_rows`), 0)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 3, + "widgetType": "bar", + "encodings": { + "x": { + "fieldName": "total_errors", + "scale": { + "type": "quantitative" + }, + "axis": { + "title": "Error rows" + } + }, + "y": { + "fieldName": "source_table_fqn", + "scale": { + "type": "categorical", + "sort": { + "by": "x-reversed" + } + }, + "axis": { + "title": "Table" + } + } + }, + "frame": { + "showTitle": true, + "title": "Top failing tables (30d)" + } + } + }, + "position": { + "x": 0, + "y": 8, + "width": 6, + "height": 5 + } + }, + { + "widget": { + "name": "quarantine_volume", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_quarantine", + "fields": [ + { + "name": "quarantine_date", + "expression": "DATE_TRUNC('DAY', `created_at`)" + }, + { + "name": "quarantine_count", + "expression": "COUNT(`quarantine_id`)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 3, + "widgetType": "area", + "encodings": { + "x": { + "fieldName": "quarantine_date", + "scale": { + "type": "temporal" + }, + "axis": { + "title": "Day" + } + }, + "y": { + "fieldName": "quarantine_count", + "scale": { + "type": "quantitative" + }, + "axis": { + "title": "Quarantined rows" + } + } + }, + "frame": { + "showTitle": true, + "title": "Quarantined rows over time" + } + } + }, + "position": { + "x": 6, + "y": 8, + "width": 6, + "height": 5 + } + }, + { + "widget": { + "name": "recent_runs_table", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "ds_runs", + "fields": [ + { + "name": "created_at", + "expression": "`created_at`" + }, + { + "name": "source_table_fqn", + "expression": "`source_table_fqn`" + }, + { + "name": "status", + "expression": "`status`" + }, + { + "name": "run_type", + "expression": "`run_type`" + }, + { + "name": "total_rows", + "expression": "`total_rows`" + }, + { + "name": "error_rows", + "expression": "`error_rows`" + }, + { + "name": "warning_rows", + "expression": "`warning_rows`" + }, + { + "name": "requesting_user", + "expression": "`requesting_user`" + }, + { + "name": "run_id", + "expression": "`run_id`" + } + ], + "disaggregated": true + } + } + ], + "spec": { + "version": 1, + "widgetType": "table", + "encodings": { + "columns": [ + { + "fieldName": "created_at", + "displayName": "Time", + "type": "datetime", + "dateTimeFormat": "ll LTS (z)", + "order": 100 + }, + { + "fieldName": "source_table_fqn", + "displayName": "Table", + "order": 200 + }, + { + "fieldName": "status", + "displayName": "Status", + "order": 300 + }, + { + "fieldName": "run_type", + "displayName": "Type", + "order": 400 + }, + { + "fieldName": "total_rows", + "displayName": "Total", + "type": "integer", + "order": 500 + }, + { + "fieldName": "error_rows", + "displayName": "Errors", + "type": "integer", + "order": 600 + }, + { + "fieldName": "warning_rows", + "displayName": "Warnings", + "type": "integer", + "order": 700 + }, + { + "fieldName": "requesting_user", + "displayName": "Requested by", + "order": 800 + }, + { + "fieldName": "run_id", + "displayName": "Run ID", + "order": 900, + "visible": false + } + ] + }, + "invisibleColumns": [], + "allowHTMLByDefault": false, + "itemsPerPage": 25, + "paginationSize": "default", + "condensed": true, + "withRowNumber": false, + "frame": { + "showTitle": true, + "title": "Recent runs" + } + } + }, + "position": { + "x": 0, + "y": 13, + "width": 12, + "height": 6 + } + }, + { + "widget": { + "name": "footer_notes", + "spec": { + "version": 1, + "widgetType": "markdown", + "encodings": {}, + "frame": { + "showTitle": false + }, + "text": "### Customise this dashboard\n\nThis starter ships with DQX Studio and is fully editable. Open the dashboard in **Databricks → AI/BI Dashboards** to add widgets, change filters, or drill into specific tables — your changes appear here immediately on the **Insights** page.\n\n**Underlying tables**\n\n- `dq_validation_runs` — one row per validation run (status, counts, requester, timing)\n- `dq_quarantine_records` — per-row failures (full row payload, errors, warnings)\n- `dq_metrics` — long-format DQX observer metrics (pivot on `metric_name`)\n- `dq_profiling_results` — schema profiles for tables under monitoring\n\nAll four live in the schema configured at deploy time (`DQX_CATALOG` / `DQX_SCHEMA`)." + } + }, + "position": { + "x": 0, + "y": 19, + "width": 12, + "height": 3 + } + } + ] + } + ] +} diff --git a/app/databricks.yml b/app/databricks.yml index c17447c18..3d422babe 100644 --- a/app/databricks.yml +++ b/app/databricks.yml @@ -14,13 +14,41 @@ variables: default: "dqx_studio_tmp" admin_group: description: "Databricks workspace group name for bootstrap Admin access" - default: "admins" + default: "proj_dbw_dev_dg_admins-data_ug" app_name: description: "Name of the deployed Databricks App (must be unique per workspace)" default: "dqx-studio" sql_warehouse_name: - description: "Name of the deployed SQL warehouse for DQX Studio queries (must be unique per workspace)" + description: > + Name of the SQL warehouse to *create* when a target opts into a + bundle-managed warehouse (see ``sql_warehouse_id`` below). Unused + when the target points at an existing warehouse. default: "dqx-studio-sql-warehouse" + sql_warehouse_id: + description: > + ID of the SQL warehouse to use for DQX Studio queries. Set this + in every target. Two supported patterns: + + 1. **Bring your own** (recommended when the deployer lacks + CREATE permission on SQL warehouses, or when sharing a + centrally-managed warehouse): set this to the existing + warehouse ID (e.g. ``"abc123def"``). The bundle does not + try to create or refresh the warehouse — it just binds the + app to it. The deployer must hold **CAN_MANAGE** on the + warehouse, because the post-deploy grants script + (``scripts/post_deploy_grants.sh``) explicitly grants + CAN_USE to the app and task-runner service principals via + the permissions API. Without CAN_MANAGE those grants will + fail (recorded as a WARNING; the rest of the script + continues). + + 2. **Bundle-managed**: declare + ``resources.sql_warehouses.dqx_sql_warehouse`` inside the + target and set this variable to + ``${resources.sql_warehouses.dqx_sql_warehouse.id}``. The + bundle then creates (or refreshes) the warehouse. + + A target must do exactly one of the above. wheels_volume_name: description: "Name of the UC volume for storing DQX wheel files" default: "wheels" @@ -106,6 +134,13 @@ variables: value: "${var.lakebase_instance_name}" - name: "DQX_LAKEBASE_DATABASE_NAME" value: "${var.lakebase_database_name}" + # Starter dashboard for the Insights page (rendered as an iframe). + # ``resources.dashboards.dqx_quality_overview`` ships a minimal AI/BI + # dashboard against the DQX tables in this catalog/schema. Admins + # can override the pinned dashboard at runtime via Configuration → + # Insights dashboard; clearing the override reverts to this default. + - name: "DQX_DEFAULT_DASHBOARD_ID" + value: "${resources.dashboards.dqx_quality_overview.id}" sync: include: @@ -160,48 +195,43 @@ resources: - serving.serving-endpoints + # The ``dqx-lakebase`` binding is declared per-target rather than + # here at the base level, because the binding shape differs + # between the two supported Lakebase models: + # * Old (``database_instances``) — uses ``database:`` with + # ``database_name`` + ``instance_name``. + # * New (``postgres_projects`` + branches + endpoints) — uses + # ``postgres:`` with ``branch:`` (and optionally ``database:``). + # DABs sequence merging on ``apps..resources`` is additive by + # ``name`` (see the alias overlays in the ``dev``/``kaizen-app`` + # targets below); each target appends exactly one ``dqx-lakebase`` + # entry of the right shape. resources: - name: "dqx-sql-warehouse" description: "SQL Warehouse for DQX Studio queries" sql_warehouse: - id: ${resources.sql_warehouses.dqx_sql_warehouse.id} + id: ${var.sql_warehouse_id} permission: "CAN_USE" - name: "dqx-task-runner-job" description: "Job for async profiler and dry-run tasks" job: id: ${resources.jobs.dqx_task_runner.id} permission: "CAN_MANAGE" - # Lakebase Postgres for OLTP state (rules catalog, app - # settings, RBAC, comments, schedule configs, scheduler - # bookkeeping). The instance is declared as a bundle resource - # below; the database defaults to the always-present - # ``databricks_postgres`` admin DB. This entry binds the app - # to the (instance, database) pair so DABs configures - # ``CAN_CONNECT_AND_CREATE`` automatically — that lets the app - # SP create its ``dqx_studio`` schema inside the bound DB on - # first connection. - - name: "dqx-lakebase" - description: "Lakebase Postgres backend for app OLTP state" - database: - database_name: ${var.lakebase_database_name} - instance_name: ${resources.database_instances.lakebase.name} - permission: "CAN_CONNECT_AND_CREATE" permissions: - group_name: "account users" level: "CAN_USE" - sql_warehouses: - dqx_sql_warehouse: - name: ${var.sql_warehouse_name} - cluster_size: "X-Small" - enable_serverless_compute: true - max_num_clusters: 1 - min_num_clusters: 1 - auto_stop_mins: 10 - permissions: - - group_name: "users" - level: "CAN_USE" + # ------------------------------------------------------------------ + # NB: SQL warehouse is *not* declared at the base level on purpose. + # Each target chooses one of two patterns (see ``sql_warehouse_id`` + # variable above): bring-your-own (just set the variable to an + # existing ID) or bundle-managed (add a + # ``resources.sql_warehouses.dqx_sql_warehouse`` block inside the + # target's resources block and point the variable at it). This keeps + # the bundle deployable by users who don't hold CREATE permission on + # SQL warehouses. + # ------------------------------------------------------------------ # ------------------------------------------------------------------ # Stateful storage (schemas, volume, Lakebase instance + database). @@ -222,15 +252,15 @@ resources: catalog_name: ${var.catalog_name} name: ${var.schema_name} comment: "DQX Studio main schema (rules, run history, profiling, metrics, quarantine, OLTP fallback)" - lifecycle: - prevent_destroy: true + # lifecycle: + # prevent_destroy: true tmp_schema: catalog_name: ${var.catalog_name} name: ${var.tmp_schema_name} comment: "DQX Studio temp schema for per-user dry-run views" - lifecycle: - prevent_destroy: true + # lifecycle: + # prevent_destroy: true volumes: wheels: @@ -239,15 +269,71 @@ resources: name: ${var.wheels_volume_name} volume_type: MANAGED comment: "DQX Studio wheel storage for the task runner" - lifecycle: - prevent_destroy: true + # lifecycle: + # prevent_destroy: true + + # ------------------------------------------------------------------ + # Starter AI/BI dashboard surfaced on the Insights page (rendered as + # an iframe). The dashboard JSON references the DQX tables by + # unqualified name; ``dataset_catalog`` / ``dataset_schema`` (CLI + # ≥ 0.283.0) tell the runtime which catalog/schema to resolve them + # against, so the same JSON file works for every target. + # + # ``embed_credentials: true`` makes the dashboard run queries as the + # bundle deployer rather than the viewer. This is deliberate: + # + # - The bundle only grants ``USE CATALOG`` (not table-level + # ``SELECT``) to ``account users`` — DQX-table reads otherwise + # go through the FastAPI backend via OBO or the App SP, which + # keeps the quarantine table (potentially PII row payloads) off + # the workspace UC surface. + # - All widgets in the starter only expose aggregated counts and + # run metadata (no row payloads), so deployer-credentialed + # queries don't leak anything the viewer couldn't already see + # in the Runs History page. + # - Customers who want viewer-credentialed queries can flip this + # to ``false`` and grant ``SELECT`` on the DQX tables to whichever + # group they want to surface in the dashboard. + # ------------------------------------------------------------------ + dashboards: + dqx_quality_overview: + display_name: "[${bundle.target}] DQX Quality Overview" + file_path: ./dashboards/dqx_quality_overview.lvdash.json + warehouse_id: ${var.sql_warehouse_id} + dataset_catalog: ${var.catalog_name} + dataset_schema: ${var.schema_name} + embed_credentials: true + permissions: + - level: CAN_READ + group_name: "account users" - database_instances: - lakebase: - name: ${var.lakebase_instance_name} - capacity: ${var.lakebase_capacity} - lifecycle: - prevent_destroy: true + # ------------------------------------------------------------------ + # NB: Lakebase is *not* declared at the base level on purpose — + # ``bdf-vo`` opts out (see its target block below) and a base-level + # declaration would force a Lakebase instance on it. Every other + # target opts in by aliasing the two anchors defined on ``dev``: + # + # * ``&lakebase_instance`` — the ``database_instances`` + # resource that provisions the Lakebase project (autoscaled + # under the hood per + # https://docs.databricks.com/aws/en/oltp/projects/manage-with-bundles). + # * ``&dqx_studio_lakebase_overlay`` — the app→Lakebase binding + # in the new-model ``postgres:`` shape, referencing the project + # paths auto-created by the resource above (every fresh + # ``database_instances`` ships with a ``production`` branch and + # a ``databricks-postgres`` database, so no side-channel + # database-create step is needed). + # + # We deliberately do NOT use the legacy ``database:`` binding shape + # (``database_name`` + ``instance_name``). It works on most + # workspaces but is rejected by some — e.g. ``kaizen-dev`` returns + # ``POST /api/2.0/apps``: "User does not have permission to grant + # permissions for added resource: dqx-lakebase" — even when the + # deployer holds ``CAN_MANAGE`` on the underlying instance. Every + # Lakebase-bound app on the same workspaces (e.g. ``bi-portal`` on + # kaizen-dev) uses the new ``postgres:`` shape successfully, so we + # standardise on it. + # ------------------------------------------------------------------ jobs: dqx_task_runner: @@ -313,10 +399,24 @@ targets: # variables: # catalog_name: # dqx_service_principal_application_id: + # + # # === SQL warehouse — pick ONE pattern === + # + # # Pattern A — BYO warehouse (no CREATE permission needed): + # sql_warehouse_id: "abc123def456" # existing warehouse ID + # + # # Pattern B — bundle-managed warehouse: + # # sql_warehouse_id: ${resources.sql_warehouses.dqx_sql_warehouse.id} + # # (also add this 3-line resources block to the target, reusing + # # the shared spec anchor defined on the ``dev`` target): + # # resources: + # # sql_warehouses: + # # dqx_sql_warehouse: *dqx_warehouse_spec + # # # Optional overrides: # # admin_group: # # app_name: - # # sql_warehouse_name: + # # sql_warehouse_name: # only used for Pattern B # # schema_name: # # tmp_schema_name: # # wheels_volume_name: @@ -325,3 +425,156 @@ targets: # # lakebase_capacity: CU_1 | CU_2 | CU_4 | CU_8 # presets: # trigger_pause_status: PAUSED + # The Pattern B (bundle-managed) warehouse spec is declared once here + # with a YAML anchor (``&dqx_warehouse_spec``) and reused by the other + # bundle-managed targets via ``*dqx_warehouse_spec``. Keep the anchor + # on the ``dev`` target — removing or renaming this target requires + # moving the ``&`` definition to whichever target is parsed first, or + # the alias references below will break at YAML parse time. + dev: + workspace: + profile: dqx-app-demo + variables: + catalog_name: tsh_dqx_app_demo_catalog + dqx_service_principal_application_id: "4174f306-d722-4cea-8745-85416017ee92" + lakebase_instance_name: "dqx-studio-lakebase-v3" + sql_warehouse_id: ${resources.sql_warehouses.dqx_sql_warehouse.id} + resources: + sql_warehouses: + dqx_sql_warehouse: &dqx_warehouse_spec + name: ${var.sql_warehouse_name} + cluster_size: "X-Small" + enable_serverless_compute: true + max_num_clusters: 1 + min_num_clusters: 1 + auto_stop_mins: 10 + permissions: + - group_name: "users" + level: "CAN_USE" + # Lakebase instance (provisioned via ``database_instances``). + # Anchor declared once here on ``dev``; every Lakebase-using + # target aliases it via ``*lakebase_instance``. + database_instances: + lakebase: &lakebase_instance + name: ${var.lakebase_instance_name} + capacity: ${var.lakebase_capacity} + # lifecycle: + # prevent_destroy: true + # App→Lakebase binding overlay (new-model ``postgres:`` shape). + # DABs appends each target's ``apps..resources`` list to the + # base-level list, so this single entry adds itself to the + # warehouse + job bindings declared at the top level. + apps: + # ``name`` is repeated here because the DABs YAML schema linter + # validates each ``apps.`` block independently and doesn't + # know that target-level keys merge with the base-level + # ``resources.apps.dqx-studio`` block (which already sets + # ``name``). ``${var.app_name}`` matches the base value so the + # merge is a no-op at deploy time. + # + # The ``branch`` and ``database`` fields are the resource paths + # auto-created by ``database_instances``: every fresh instance + # ships with a ``production`` branch and a + # ``databricks-postgres`` database (the Postgres database name + # is ``databricks_postgres`` underscored; the resource path + # uses the hyphenated form). ``CAN_CONNECT_AND_CREATE`` is the + # only valid permission for this shape (see DABs schema: + # ``apps.AppResourcePostgresPostgresPermission``). + dqx-studio: &dqx_studio_lakebase_overlay + name: ${var.app_name} + resources: + - name: "dqx-lakebase" + description: "Lakebase Postgres backend for app OLTP state" + postgres: + branch: "projects/${resources.database_instances.lakebase.name}/branches/production" + database: "projects/${resources.database_instances.lakebase.name}/branches/production/databases/databricks-postgres" + permission: "CAN_CONNECT_AND_CREATE" + presets: + trigger_pause_status: PAUSED + + kaizen-dev: + workspace: + profile: kaizen-dev + variables: + catalog_name: data_governance + dqx_service_principal_application_id: "9601694e-31b7-43d4-a664-68ba2b0abb21" + sql_warehouse_id: "f3afe45690d73052" + app_name: dqx-studio-dg + lakebase_instance_name: "dqx-studio-lakebase-v3" + resources: + # BYO warehouse — no ``sql_warehouses`` block. + database_instances: + lakebase: *lakebase_instance + apps: + dqx-studio: *dqx_studio_lakebase_overlay + presets: + trigger_pause_status: PAUSED + + # ``kaizen-app`` uses BYO warehouse because the deployer here lacks + # CREATE on SQL warehouses (see DEPLOYMENT.md → "SQL warehouse: + # managed vs. BYO"). + kaizen-app: + workspace: + profile: kaizen-app + variables: + catalog_name: dqx_studio_demo + dqx_service_principal_application_id: "cf65e56d-cd71-4c94-81c3-c39c1c78fab7" + # BYO warehouse — paste the ID of the existing warehouse you've + # been granted CAN_USE on. Find it in the workspace UI under SQL + # Warehouses, or via: databricks warehouses list -p kaizen-app + sql_warehouse_id: "ddafa92ede1ef3f4" + resources: + # BYO warehouse — no ``sql_warehouses`` block. + database_instances: + lakebase: *lakebase_instance + apps: + dqx-studio: *dqx_studio_lakebase_overlay + presets: + trigger_pause_status: PAUSED + + e2-demo: + workspace: + profile: e2-demo + variables: + catalog_name: dqx_studio + app_name: tsh-dqx-studio + dqx_service_principal_application_id: "9fafb846-67d4-47db-a8f3-6040884e2d55" + sql_warehouse_id: ${resources.sql_warehouses.dqx_sql_warehouse.id} + resources: + sql_warehouses: + dqx_sql_warehouse: *dqx_warehouse_spec + database_instances: + lakebase: *lakebase_instance + apps: + dqx-studio: *dqx_studio_lakebase_overlay + presets: + trigger_pause_status: PAUSED + + # Lakebase-less target. ``lakebase_instance_name: "-"`` is the + # sentinel ``AppConfig.lakebase_enabled`` checks for (alongside + # ``""``, ``disabled``, ``off``, ``none``); it disables Postgres + # init so every OLTP table is provisioned in Delta via the + # ``MigrationRunner`` fallback path. We can't use the empty string + # here even though the Python side would accept it, because the + # Databricks Apps API rejects env vars with ``value: ""`` + # ("Must specify environment variable source using either ``value`` + # or ``valueFrom``") and the failure surfaces at ``databricks bundle + # run`` time, after the app has already started. + bdf-vo: + workspace: + profile: bdf-vo + variables: + catalog_name: data_governance + app_name: dqx-studio + dqx_service_principal_application_id: "98cb321b-6ce2-4bd7-8b22-6418e45818d8" + sql_warehouse_id: ${resources.sql_warehouses.dqx_sql_warehouse.id} + lakebase_instance_name: "-" + resources: + sql_warehouses: + dqx_sql_warehouse: *dqx_warehouse_spec + # No ``database_instances.lakebase`` — Lakebase not provisioned. + # No ``apps.dqx-studio`` overlay either — the base + # ``resources.apps.dqx-studio`` already binds the warehouse and + # the task-runner job, which is all the app needs in this mode. + presets: + trigger_pause_status: PAUSED diff --git a/app/scripts/post_deploy_grants.sh b/app/scripts/post_deploy_grants.sh index 3cab55e59..82adb3d26 100755 --- a/app/scripts/post_deploy_grants.sh +++ b/app/scripts/post_deploy_grants.sh @@ -125,23 +125,99 @@ run_sql() { fi } +# Grant CAN_USE on the bound SQL warehouse to a service principal. +# +# Idempotent: the permissions API PATCH endpoint is additive — granting +# the same permission twice is a no-op (returns 200). Works identically +# whether the warehouse is bundle-managed or brought-your-own, because +# we discover the ID from the deployed app's resource list, not from +# the bundle definition. +# +# Required deployer permission: CAN_MANAGE on the warehouse. Without +# it the call returns 403; we log a WARNING and continue so the rest +# of the grants still apply. +grant_warehouse_can_use() { + local principal="$1" + local label="$2" + echo " Warehouse CAN_USE: $label ($principal)" + local payload + payload=$(jq -n --arg sp "$principal" '{ + access_control_list: [ + {service_principal_name: $sp, permission_level: "CAN_USE"} + ] + }') + RESULT=$($CLI api patch "/api/2.0/permissions/warehouses/$WH_ID" \ + --json "$payload" 2>&1) || true + # The permissions API returns the full ACL on success, or an error + # object with ``error_code`` / ``message`` on failure. We treat anything + # with ``error_code`` as non-fatal and log it. + if echo "$RESULT" | jq -e '.error_code' > /dev/null 2>&1; then + ERR=$(echo "$RESULT" | jq -r '.message // "unknown error"') + CODE=$(echo "$RESULT" | jq -r '.error_code // "UNKNOWN"') + echo " WARNING: $CODE — $ERR" + if [[ "$CODE" == "PERMISSION_DENIED" ]]; then + echo " (deployer needs CAN_MANAGE on warehouse $WH_ID to apply this grant)" + fi + fi +} + echo "" -echo "==> Granting permissions to App SP ($APP_SP_ID)..." +echo "==> Granting UC permissions to App SP ($APP_SP_ID)..." run_sql "GRANT USE CATALOG ON CATALOG \`$CATALOG\` TO \`$APP_SP_ID\`" run_sql "GRANT ALL PRIVILEGES ON SCHEMA \`$CATALOG\`.\`$SCHEMA\` TO \`$APP_SP_ID\`" run_sql "GRANT ALL PRIVILEGES ON SCHEMA \`$CATALOG\`.\`$TMP_SCHEMA\` TO \`$APP_SP_ID\`" run_sql "GRANT ALL PRIVILEGES ON VOLUME \`$CATALOG\`.\`$SCHEMA\`.\`$VOLUME\` TO \`$APP_SP_ID\`" echo "" -echo "==> Granting permissions to Job SP ($JOB_SP)..." +echo "==> Granting UC permissions to Job SP ($JOB_SP)..." run_sql "GRANT USE CATALOG ON CATALOG \`$CATALOG\` TO \`$JOB_SP\`" run_sql "GRANT ALL PRIVILEGES ON SCHEMA \`$CATALOG\`.\`$SCHEMA\` TO \`$JOB_SP\`" run_sql "GRANT ALL PRIVILEGES ON SCHEMA \`$CATALOG\`.\`$TMP_SCHEMA\` TO \`$JOB_SP\`" run_sql "GRANT ALL PRIVILEGES ON VOLUME \`$CATALOG\`.\`$SCHEMA\`.\`$VOLUME\` TO \`$JOB_SP\`" +# Warehouse CAN_USE — needed regardless of whether the warehouse is +# bundle-managed or BYO. The Apps binding (``permission: "CAN_USE"``) +# only covers the app SP, and only when the deployer has CAN_MANAGE on +# the warehouse at bundle-deploy time. The task-runner SP needs CAN_USE +# explicitly for ``ws.statement_execution.execute_statement`` calls +# (e.g. the temp-view cleanup in ``runner.py``). We grant both here so +# the script is the single source of truth. +echo "" +echo "==> Granting CAN_USE on SQL warehouse ($WH_ID)..." +grant_warehouse_can_use "$APP_SP_ID" "App SP" +grant_warehouse_can_use "$JOB_SP" "Job SP" + echo "" echo "==> Granting USE CATALOG to account users (for end-user tmp view creation)..." run_sql "GRANT USE CATALOG ON CATALOG \`$CATALOG\` TO \`account users\`" +# The starter Insights dashboard is configured with ``embed_credentials: true``, +# so AI/BI runs every widget query under the bundle's deployer identity (the +# principal authenticated to the CLI profile that ran ``databricks bundle deploy``). +# That principal doesn't automatically inherit SELECT on the DQX tables: DABs +# makes them schema-owner, but UC table-level reads need an explicit grant. +# Without it the Insights page renders, then every tile shows +# INSUFFICIENT_PERMISSIONS. Granting at the schema level covers existing and +# future tables created by the migration runner. +# +# For SP-based deployers (CI/CD) ``current-user me`` returns the SP and +# ``userName`` is the application ID — which is what UC expects in a GRANT. +# For human deployers it's the email. Either way the GRANT line is identical. +echo "" +echo "==> Granting SELECT to deployer (for Insights dashboard queries)..." +DEPLOYER_JSON=$($CLI current-user me -o json 2>/dev/null || echo "") +DEPLOYER=$(echo "$DEPLOYER_JSON" | jq -r '.userName // .applicationId // empty' 2>/dev/null || echo "") +if [[ -n "$DEPLOYER" ]]; then + echo " Deployer: $DEPLOYER" + run_sql "GRANT USE SCHEMA ON SCHEMA \`$CATALOG\`.\`$SCHEMA\` TO \`$DEPLOYER\`" + run_sql "GRANT SELECT ON SCHEMA \`$CATALOG\`.\`$SCHEMA\` TO \`$DEPLOYER\`" +else + echo " WARNING: Could not resolve deployer identity from profile '$PROFILE'." + echo " The Insights dashboard will show INSUFFICIENT_PERMISSIONS" + echo " until you GRANT USE SCHEMA + SELECT ON SCHEMA" + echo " \`$CATALOG\`.\`$SCHEMA\` to the bundle deployer." +fi + echo "" -echo "==> Done. All grants applied." +echo "==> Done. All grants applied. Re-running this script is safe — every" +echo " grant above is idempotent." diff --git a/app/src/databricks_labs_dqx_app/backend/app.py b/app/src/databricks_labs_dqx_app/backend/app.py index 81e2d54aa..6f28237c6 100644 --- a/app/src/databricks_labs_dqx_app/backend/app.py +++ b/app/src/databricks_labs_dqx_app/backend/app.py @@ -218,6 +218,19 @@ async def lifespan(app: FastAPI): "Lakebase initialisation failed — falling back to Delta for OLTP tables. " "Verify the database_instance is provisioned and the app SP has CAN_CONNECT_AND_CREATE." ) + # Hard-to-spot data divergence: previously-written rules / + # settings / RBAC / schedules in Lakebase remain on Postgres + # but every read now hits the (empty or stale) Delta fallback. + # Surface this loudly so operators don't conclude data was + # *lost* when really it's just unreachable until Lakebase + # comes back. Restart once Lakebase is healthy to reattach. + logger.warning( + "Previously-written Lakebase data (rules, app settings, RBAC, " + "comments, schedule configs, schedule runs) will be INACCESSIBLE " + "until Lakebase recovers and the app is restarted. Any writes " + "performed in this Delta-fallback session will diverge from the " + "Lakebase state until reconciled manually." + ) pg_executor = None set_oltp_executor(None) else: diff --git a/app/src/databricks_labs_dqx_app/backend/config.py b/app/src/databricks_labs_dqx_app/backend/config.py index b0b9eee64..9e9db6a03 100644 --- a/app/src/databricks_labs_dqx_app/backend/config.py +++ b/app/src/databricks_labs_dqx_app/backend/config.py @@ -42,6 +42,20 @@ class AppConfig(BaseSettings): dryrun_max_sample_size: int = Field(default=10_000) dryrun_default_sample_size: int = Field(default=1_000) + # ------------------------------------------------------------------ + # Embedded dashboard (Insights page) + # ------------------------------------------------------------------ + # The Insights page renders a Databricks AI/BI dashboard inside an + # iframe. Admins set the dashboard ID via the Configuration page, + # which writes to ``dq_app_settings`` and overrides this default. + # When unset, this env var lets the bundle ship a starter + # dashboard ID so the page works out-of-the-box. + default_dashboard_id: str = Field( + default="", + validation_alias="DQX_DEFAULT_DASHBOARD_ID", + description="Fallback dashboard ID for the Insights page when no admin override is set.", + ) + # ------------------------------------------------------------------ # Lakebase (Postgres) backend # ------------------------------------------------------------------ @@ -58,7 +72,15 @@ class AppConfig(BaseSettings): lakebase_instance_name: str = Field( default="", validation_alias="DQX_LAKEBASE_INSTANCE_NAME", - description="Lakebase instance name. Empty disables Lakebase routing.", + description=( + "Lakebase instance name. Empty — or any of the sentinel " + "values ``-`` / ``disabled`` / ``off`` / ``none`` " + "(case-insensitive) — disables Lakebase routing. The " + "sentinel form exists because Databricks Apps rejects " + "env vars with an empty ``value`` string, so deployments " + "that want to disable Lakebase must pass a non-empty " + "placeholder." + ), ) lakebase_database_name: str = Field( default="dqx_studio", @@ -83,15 +105,22 @@ class AppConfig(BaseSettings): def static_assets_path(self) -> Path: return Path(str(resources.files(app_slug))).joinpath("__dist__") + # Sentinel values that explicitly disable Lakebase routing even + # though the env var has to be non-empty (Databricks Apps rejects + # ``value: ""``). Comparison is case-insensitive after stripping. + _LAKEBASE_DISABLED_SENTINELS = frozenset({"", "-", "disabled", "off", "none"}) + @property def lakebase_enabled(self) -> bool: """``True`` when the deployment was provisioned with Lakebase. Falls back to ``False`` (legacy UC-only mode) when the - instance name is empty so existing tests and dev setups keep - working with no Postgres dependency. + instance name is empty or set to a recognised "disabled" + sentinel so existing tests, dev setups, and Lakebase-less + Databricks Apps deployments keep working with no Postgres + dependency. """ - return bool(self.lakebase_instance_name.strip()) + return self.lakebase_instance_name.strip().lower() not in self._LAKEBASE_DISABLED_SENTINELS conf = AppConfig() diff --git a/app/src/databricks_labs_dqx_app/backend/dependencies.py b/app/src/databricks_labs_dqx_app/backend/dependencies.py index a275a8983..b8d99a3e5 100644 --- a/app/src/databricks_labs_dqx_app/backend/dependencies.py +++ b/app/src/databricks_labs_dqx_app/backend/dependencies.py @@ -27,6 +27,7 @@ from .services.role_service import RoleService from .services.rules_catalog_service import RulesCatalogService from .services.comments_service import CommentsService +from .services.review_status_service import ReviewStatusService from .services.schedule_config_service import ScheduleConfigService from .services.view_service import ViewService from .sql_executor import SqlExecutor @@ -255,6 +256,19 @@ async def get_comments_service( return CommentsService(sql=sql) +async def get_review_status_service( + sql: Annotated[SqlExecutor, Depends(get_sp_oltp_executor)], + settings: Annotated[AppSettingsService, Depends(get_app_settings_service)], +) -> ReviewStatusService: + """Create a ReviewStatusService routed at the OLTP executor. + + Takes the same ``AppSettingsService`` we use everywhere else as a + transitive dep so the catalogue of allowed status values comes from + the same singleton (and same cache) as the Configuration page reads. + """ + return ReviewStatusService(sql=sql, settings=settings) + + async def get_schedule_config_service( sql: Annotated[SqlExecutor, Depends(get_sp_oltp_executor)], ) -> ScheduleConfigService: @@ -453,6 +467,7 @@ async def get_user_catalog_names( "get_sql_connector", "get_user_role", "get_comments_service", + "get_review_status_service", "get_schedule_config_service", "require_role", "require_runner", diff --git a/app/src/databricks_labs_dqx_app/backend/migrations/__init__.py b/app/src/databricks_labs_dqx_app/backend/migrations/__init__.py index 03e914daf..1dc8dd5f4 100644 --- a/app/src/databricks_labs_dqx_app/backend/migrations/__init__.py +++ b/app/src/databricks_labs_dqx_app/backend/migrations/__init__.py @@ -364,6 +364,41 @@ class Migration: _V5_VALIDATION_RUNS_ERROR_ROWS = f"ALTER TABLE {_PLACEHOLDER}.dq_validation_runs " f" ADD COLUMN error_rows INT" +# Run review status — per-run review label set by business / SA reviewers +# from the Runs detail page. The allowed value list is admin-managed in +# ``dq_app_settings.run_review_statuses`` so there's no CHECK constraint +# on ``status``; the service validates against the live list before INSERT. +# +# Two tables intentionally: +# - ``dq_run_review_status`` is mutable (one row per run that has been +# reviewed; absent rows surface the configured default virtually). +# - ``dq_run_review_status_history`` is append-only so we can show +# "X changed status from Pending to Acknowledged on Tue" on the run +# detail page and answer compliance questions. Same shape as +# ``dq_quality_rules_history`` — no PK column on Delta (rows are +# ordered by ``changed_at`` for display). +# +# Marked ``oltp_fallback=True`` because both tables are OLTP-shaped +# (single-key lookup, frequent mutation) and live in Lakebase when +# enabled; this migration only runs against Delta when Lakebase is off. +_V6_RUN_REVIEW_STATUS = ( + f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_run_review_status (" + " run_id STRING NOT NULL," + " status STRING NOT NULL," + " updated_by STRING," + " updated_at TIMESTAMP," + " CONSTRAINT pk_dq_run_review_status PRIMARY KEY (run_id) RELY" + ") CLUSTER BY (run_id);" + f"CREATE TABLE IF NOT EXISTS {_PLACEHOLDER}.dq_run_review_status_history (" + " run_id STRING NOT NULL," + " status STRING NOT NULL," + " previous_status STRING," + " changed_by STRING NOT NULL," + " changed_at TIMESTAMP NOT NULL" + ") CLUSTER BY (run_id, changed_at)" +) + + # OLTP fallback migration is identified by ``oltp_fallback=True`` so # the runner can skip it when Lakebase is enabled. Keeping the flag on # the migration itself (rather than e.g. a hard-coded version number) @@ -412,6 +447,12 @@ class DeltaMigration(Migration): sql_template=_V5_VALIDATION_RUNS_ERROR_ROWS, oltp_fallback=False, ), + DeltaMigration( + version=6, + description="Run review status (per-run review label + audit history) — used only when Lakebase is disabled", + sql_template=_V6_RUN_REVIEW_STATUS, + oltp_fallback=True, + ), ] # --------------------------------------------------------------------------- diff --git a/app/src/databricks_labs_dqx_app/backend/migrations/postgres.py b/app/src/databricks_labs_dqx_app/backend/migrations/postgres.py index c2a1a2d24..3551c7601 100644 --- a/app/src/databricks_labs_dqx_app/backend/migrations/postgres.py +++ b/app/src/databricks_labs_dqx_app/backend/migrations/postgres.py @@ -215,6 +215,45 @@ class PgMigration: f" ON {_S}.dq_schedule_configs_history (schedule_name, changed_at DESC);" ), ), + PgMigration( + version=2, + description="Run review status (per-run review label + audit history)", + sql=( + # ---------------------------------------------------------- + # dq_run_review_status — one mutable row per run that has + # been explicitly reviewed. Runs without a row surface the + # configured default virtually at read-time (see + # ReviewStatusService.get_effective). + # ---------------------------------------------------------- + f"CREATE TABLE IF NOT EXISTS {_S}.dq_run_review_status (" + " run_id TEXT PRIMARY KEY," + " status TEXT NOT NULL," + " updated_by TEXT," + " updated_at TIMESTAMPTZ" + ");" + # The Runs History page filters by status across the whole + # list, so an index on status keeps that scan cheap as the + # review-status table grows alongside the run history. + f"CREATE INDEX IF NOT EXISTS idx_dq_run_review_status_status " + f" ON {_S}.dq_run_review_status (status);" + # ---------------------------------------------------------- + # dq_run_review_status_history — append-only audit log. + # BIGSERIAL gives us a stable display order even if two + # changes land on the same TIMESTAMPTZ (rare but possible + # with millisecond resolution + bulk admin tooling). + # ---------------------------------------------------------- + f"CREATE TABLE IF NOT EXISTS {_S}.dq_run_review_status_history (" + " history_id BIGSERIAL PRIMARY KEY," + " run_id TEXT NOT NULL," + " status TEXT NOT NULL," + " previous_status TEXT," + " changed_by TEXT NOT NULL," + " changed_at TIMESTAMPTZ NOT NULL" + ");" + f"CREATE INDEX IF NOT EXISTS idx_dq_run_review_status_history_run_changed_at " + f" ON {_S}.dq_run_review_status_history (run_id, changed_at DESC);" + ), + ), ] diff --git a/app/src/databricks_labs_dqx_app/backend/models.py b/app/src/databricks_labs_dqx_app/backend/models.py index 8b4d02920..0943f9c97 100644 --- a/app/src/databricks_labs_dqx_app/backend/models.py +++ b/app/src/databricks_labs_dqx_app/backend/models.py @@ -290,6 +290,17 @@ class ValidationRunSummaryOut(BaseModel): created_at: str | None = None error_message: str | None = None checks: list[dict[str, Any]] = Field(default_factory=list) + # Per-run review status — set by reviewers on the Runs detail page, + # filterable on the Runs History page. ``review_status`` is the + # effective value (catalogue default for unreviewed runs, persisted + # value otherwise); ``review_status_is_default`` lets the History + # table render unreviewed rows distinctly (e.g. lighter badge) so + # they're not visually indistinguishable from rows where someone + # explicitly selected "Pending review". + review_status: str | None = None + review_status_is_default: bool = False + review_status_updated_by: str | None = None + review_status_updated_at: str | None = None # --------------------------------------------------------------------------- diff --git a/app/src/databricks_labs_dqx_app/backend/pg_executor.py b/app/src/databricks_labs_dqx_app/backend/pg_executor.py index e94224eb5..037f74be3 100644 --- a/app/src/databricks_labs_dqx_app/backend/pg_executor.py +++ b/app/src/databricks_labs_dqx_app/backend/pg_executor.py @@ -116,11 +116,13 @@ def _pg_render_value(value: Any) -> str: Behaves identically to :func:`backend.sql_executor._render_value` except that: - - :class:`RawSql("current_timestamp()")` is rewritten to - ``CURRENT_TIMESTAMP`` because Postgres rejects the parenthesised - Spark SQL form. Other ``RawSql`` payloads pass through verbatim - so callers can still inject Postgres-specific helpers like - ``now()`` or ``::jsonb`` casts. + - :class:`RawSql` payloads matching the Spark / SQL standard "now" + idiom — ``current_timestamp()`` or ``now()`` — are rewritten to + ``CURRENT_TIMESTAMP``. The parenthesised Spark form does not + parse on Postgres, and rewriting bare ``now()`` keeps service + code dialect-agnostic. Any other ``RawSql`` payload (e.g. + ``::jsonb`` casts, ``gen_random_uuid()``) passes through + verbatim. - ``bool`` renders as ``TRUE``/``FALSE`` which Postgres accepts. """ if isinstance(value, RawSql): diff --git a/app/src/databricks_labs_dqx_app/backend/routes/v1/__init__.py b/app/src/databricks_labs_dqx_app/backend/routes/v1/__init__.py index 48832e9ba..ad79e258f 100644 --- a/app/src/databricks_labs_dqx_app/backend/routes/v1/__init__.py +++ b/app/src/databricks_labs_dqx_app/backend/routes/v1/__init__.py @@ -14,6 +14,7 @@ from .comments import router as comments_router from .quarantine import router as quarantine_router from .metrics import router as metrics_router +from .review_status import router as review_status_router from .schedules import router as schedules_router v1_router = APIRouter() @@ -32,3 +33,4 @@ v1_router.include_router(comments_router, prefix="/comments", tags=["comments"]) v1_router.include_router(quarantine_router, prefix="/quarantine", tags=["quarantine"]) v1_router.include_router(metrics_router, prefix="/metrics", tags=["metrics"]) +v1_router.include_router(review_status_router, prefix="/runs", tags=["review-status"]) diff --git a/app/src/databricks_labs_dqx_app/backend/routes/v1/config.py b/app/src/databricks_labs_dqx_app/backend/routes/v1/config.py index 17b5fa0c8..9f8ab8d79 100644 --- a/app/src/databricks_labs_dqx_app/backend/routes/v1/config.py +++ b/app/src/databricks_labs_dqx_app/backend/routes/v1/config.py @@ -1,10 +1,12 @@ import json +import os import re from typing import Annotated from fastapi import APIRouter, Depends, HTTPException from databricks_labs_dqx_app.backend.common.authorization import UserRole, get_user_email +from databricks_labs_dqx_app.backend.config import conf from databricks_labs_dqx_app.backend.dependencies import get_app_settings_service, require_role from databricks_labs_dqx_app.backend.logger import logger from pydantic import BaseModel, Field @@ -526,3 +528,293 @@ def save_custom_metrics( saved = svc.save_custom_metrics(cleaned, user_email=email) logger.info("Saved %d custom metric expression(s)", len(saved)) return CustomMetricsOut(metrics=saved) + + +# ---------------------------------------------------------------------- +# Embedded dashboard — the Insights page renders a Databricks AI/BI +# dashboard inside an iframe. Admins set the dashboard ID (and an +# optional display title) here; the GET endpoint falls back to the env +# default (``conf.default_dashboard_id`` from ``DQX_DEFAULT_DASHBOARD_ID``) +# so the bundle can ship a starter dashboard without preventing +# customer overrides. The workspace host is read from +# ``DATABRICKS_HOST`` (always set inside a Databricks App container) +# and included in the response so the frontend can build the embed +# URL without a second roundtrip. +# ---------------------------------------------------------------------- + +# Conservative ID validation: Databricks AI/BI dashboard IDs are +# UUIDs or shorter slugs, so we accept letters, digits, hyphens, and +# underscores. We deliberately reject anything that could be a URL +# fragment or path traversal so admins can't accidentally paste a full +# URL and break iframe rendering downstream. +_DASHBOARD_ID_RE = re.compile(r"^[A-Za-z0-9_-]{1,128}$") + + +class EmbeddedDashboardOut(BaseModel): + """Current embedded-dashboard configuration + the bits the UI needs to render the iframe.""" + + dashboard_id: str = Field( + default="", + description="Effective dashboard ID. Empty string means 'nothing configured'.", + ) + title: str | None = Field( + default=None, + description="Optional admin-provided display title. The UI falls back to a generic label when null.", + ) + workspace_host: str = Field( + default="", + description="Workspace host (e.g. 'https://e2-...cloud.databricks.com') used to build the iframe URL.", + ) + is_set: bool = Field( + default=False, + description="True when the admin has saved an explicit setting (independent of the env default).", + ) + is_default: bool = Field( + default=False, + description="True when the response is serving the env-provided default rather than an admin override.", + ) + + +class EmbeddedDashboardIn(BaseModel): + """Update payload — admins write the dashboard ID and optionally a display title.""" + + dashboard_id: str + title: str | None = None + + +def _workspace_host() -> str: + """Read the workspace host from the env Databricks Apps populates at runtime. + + Returns an empty string if unset (e.g. local dev without DATABRICKS_HOST); + the UI handles this by showing a config-required message rather than a + broken iframe. + """ + host = (os.environ.get("DATABRICKS_HOST") or "").strip() + if host and not host.startswith(("http://", "https://")): + host = f"https://{host}" + return host.rstrip("/") + + +@router.get( + "/embedded-dashboard", + response_model=EmbeddedDashboardOut, + operation_id="getEmbeddedDashboard", +) +def get_embedded_dashboard( + svc: Annotated[AppSettingsService, Depends(get_app_settings_service)], +) -> EmbeddedDashboardOut: + """Return the current embedded-dashboard config. + + Available to any authenticated user — the Insights page is read-only + and the underlying dashboard enforces UC permissions on the data, + so we don't gate visibility here. + """ + saved = svc.get_embedded_dashboard() + workspace_host = _workspace_host() + if saved: + return EmbeddedDashboardOut( + dashboard_id=saved["dashboard_id"], + title=saved.get("title"), + workspace_host=workspace_host, + is_set=True, + is_default=False, + ) + env_default = (conf.default_dashboard_id or "").strip() + return EmbeddedDashboardOut( + dashboard_id=env_default, + title=None, + workspace_host=workspace_host, + is_set=False, + is_default=bool(env_default), + ) + + +@router.put( + "/embedded-dashboard", + response_model=EmbeddedDashboardOut, + operation_id="saveEmbeddedDashboard", + dependencies=[require_role(UserRole.ADMIN)], +) +def save_embedded_dashboard( + body: EmbeddedDashboardIn, + svc: Annotated[AppSettingsService, Depends(get_app_settings_service)], + email: Annotated[str, Depends(get_user_email)], +) -> EmbeddedDashboardOut: + """Save the embedded-dashboard configuration (admin only).""" + dashboard_id = (body.dashboard_id or "").strip() + if not dashboard_id: + raise HTTPException(status_code=400, detail="dashboard_id is required.") + if not _DASHBOARD_ID_RE.match(dashboard_id): + raise HTTPException( + status_code=400, + detail=( + "Invalid dashboard_id. Paste the ID portion only " + "(letters, digits, hyphens, underscores; up to 128 chars) — " + "not a full dashboard URL." + ), + ) + title = (body.title or "").strip() or None + if title and len(title) > 200: + raise HTTPException(status_code=400, detail="title must be 200 characters or fewer.") + + svc.save_embedded_dashboard(dashboard_id, title, user_email=email) + logger.info("Saved embedded dashboard id=%s title=%r (by=%s)", dashboard_id, title, email) + return EmbeddedDashboardOut( + dashboard_id=dashboard_id, + title=title, + workspace_host=_workspace_host(), + is_set=True, + is_default=False, + ) + + +@router.delete( + "/embedded-dashboard", + response_model=EmbeddedDashboardOut, + operation_id="deleteEmbeddedDashboard", + dependencies=[require_role(UserRole.ADMIN)], +) +def delete_embedded_dashboard( + svc: Annotated[AppSettingsService, Depends(get_app_settings_service)], + email: Annotated[str, Depends(get_user_email)], +) -> EmbeddedDashboardOut: + """Clear the admin override (admin only). + + The env-provided default — if any — takes over again. Useful when + the bundle ships a starter dashboard and the admin wants to revert + to it after a botched custom ID. + """ + svc.delete_embedded_dashboard(user_email=email) + logger.info("Cleared embedded dashboard override (by=%s)", email) + return get_embedded_dashboard(svc) + + +# ---------------------------------------------------------------------- +# Run review statuses — admin-managed catalogue of values for the per-run +# review label shown on the Runs detail page and filterable on the Runs +# History page. Stored as a JSON list under ``run_review_statuses_v1`` +# (see :meth:`AppSettingsService.get_run_review_statuses`). The catalogue +# always includes exactly one entry marked ``is_default``; the service +# enforces that invariant on save so the Runs detail dropdown is never +# empty and listing endpoints always have something to surface for +# unreviewed runs. +# ---------------------------------------------------------------------- + +# Conservative value validation: the catalogue values become filter +# chips on the History page and audit-log strings, so we want them +# printable and short. Letters/digits/spaces/hyphens/underscores covers +# "Pending review", "Acknowledged", "False positive", custom domain +# terms, while rejecting newlines, control chars, and quote characters +# that would break our raw-SQL escape path on the history table. +_REVIEW_STATUS_VALUE_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9 _\-/.]{0,79}$") + + +class RunReviewStatusOption(BaseModel): + """One catalogue entry. ``is_default`` flags the value auto-surfaced for unreviewed runs.""" + + value: str + description: str = "" + color: str = "gray" + is_default: bool = False + + +class RunReviewStatusesOut(BaseModel): + statuses: list[RunReviewStatusOption] + + +class RunReviewStatusesIn(BaseModel): + statuses: list[RunReviewStatusOption] + + +def _statuses_to_out(entries: list[dict]) -> RunReviewStatusesOut: + """Coerce the service's dict-shaped entries into the pydantic out model.""" + return RunReviewStatusesOut( + statuses=[ + RunReviewStatusOption( + value=e.get("value") or "", + description=e.get("description") or "", + color=e.get("color") or "gray", + is_default=bool(e.get("is_default")), + ) + for e in entries + ] + ) + + +@router.get( + "/run-review-statuses", + response_model=RunReviewStatusesOut, + operation_id="getRunReviewStatuses", +) +def get_run_review_statuses( + svc: Annotated[AppSettingsService, Depends(get_app_settings_service)], +) -> RunReviewStatusesOut: + """Return the admin-managed list of run review status values. + + Visible to any authenticated user — both the Runs detail dropdown + and the Runs History filter need this list, and neither is + admin-gated. The list is seeded on first read so the UI never + sees an empty dropdown on a fresh deploy. + """ + return _statuses_to_out(svc.get_run_review_statuses()) + + +@router.put( + "/run-review-statuses", + response_model=RunReviewStatusesOut, + operation_id="saveRunReviewStatuses", + dependencies=[require_role(UserRole.ADMIN)], +) +def save_run_review_statuses( + body: RunReviewStatusesIn, + svc: Annotated[AppSettingsService, Depends(get_app_settings_service)], + email: Annotated[str, Depends(get_user_email)], +) -> RunReviewStatusesOut: + """Replace the full catalogue (admin only). + + Each value is validated against ``_REVIEW_STATUS_VALUE_RE`` (printable + short strings, no quotes/control chars), descriptions are trimmed, + duplicates are rejected, and exactly one ``is_default`` is enforced + by :meth:`AppSettingsService.save_run_review_statuses`. + + NOTE: renaming an existing value does *not* update historical + references in ``dq_run_review_status`` / ``dq_run_review_status_history``; + those rows keep the original string so the audit trail stays + accurate. The UI surfaces orphaned historical values as-is. If a + workspace operator wants to retire a value cleanly they should + either keep it in the list (without ``is_default``) until the + affected runs age out, or do a one-off UPDATE through the SQL + warehouse. + """ + cleaned_payload: list[dict] = [] + for option in body.statuses or []: + value = (option.value or "").strip() + if not value: + raise HTTPException(status_code=400, detail="Run review status 'value' cannot be blank.") + if not _REVIEW_STATUS_VALUE_RE.match(value): + raise HTTPException( + status_code=400, + detail=( + f"Invalid review status value {value!r}. Use printable ASCII " + "(letters, digits, spaces, hyphens, underscores, dots, slashes), " + "1–80 characters, starting with a letter or digit." + ), + ) + cleaned_payload.append( + { + "value": value, + "description": (option.description or "").strip(), + "color": (option.color or "gray").strip() or "gray", + "is_default": bool(option.is_default), + } + ) + + try: + saved = svc.save_run_review_statuses(cleaned_payload, user_email=email) + except ValueError as e: + # The service's invariants (at-least-one entry, unique values, + # exactly one default) surface as ValueError. Route surface + # them as 400 so the UI can show the human-readable message. + raise HTTPException(status_code=400, detail=str(e)) + logger.info("Saved %d run review status(es)", len(saved)) + return _statuses_to_out(saved) diff --git a/app/src/databricks_labs_dqx_app/backend/routes/v1/dryrun.py b/app/src/databricks_labs_dqx_app/backend/routes/v1/dryrun.py index f98ce3856..0f7e717af 100644 --- a/app/src/databricks_labs_dqx_app/backend/routes/v1/dryrun.py +++ b/app/src/databricks_labs_dqx_app/backend/routes/v1/dryrun.py @@ -19,6 +19,7 @@ get_conf, get_job_service, get_obo_ws, + get_review_status_service, get_rules_catalog_service, get_sp_sql_executor, get_user_catalog_names, @@ -40,6 +41,7 @@ ) from databricks_labs_dqx_app.backend.services.job_service import JobService from databricks_labs_dqx_app.backend.run_status_manager import get_run_metadata, has_terminal_result, update_run_status +from databricks_labs_dqx_app.backend.services.review_status_service import ReviewStatusService from databricks_labs_dqx_app.backend.services.rules_catalog_service import RulesCatalogService from databricks_labs_dqx_app.backend.services.view_service import ViewService @@ -76,18 +78,76 @@ def _catalog_of(fqn: str) -> str: ) async def list_validation_runs( job_svc: Annotated[JobService, Depends(get_job_service)], + review_svc: Annotated[ReviewStatusService, Depends(get_review_status_service)], app_conf: Annotated[AppConfig, Depends(get_conf)], user_catalogs: Annotated[frozenset[str], Depends(get_user_catalog_names)], + review_status: Annotated[ + list[str] | None, + Query( + description=( + "Filter to runs whose effective review status matches one of " + "the supplied values. Repeat the param for multi-select " + "(e.g. ?review_status=Acknowledged&review_status=Resolved). " + "Match is on the effective value, so passing the catalogue " + "default also catches unreviewed runs." + ), + ), + ] = None, ) -> list[ValidationRunSummaryOut]: """Return validation (dry-run) history filtered to user-accessible catalogs.""" try: table = f"{app_conf.catalog}.{app_conf.schema_name}.dq_validation_runs" rows = job_svc.list_dryrun_rows(table) - results: list[ValidationRunSummaryOut] = [] + + # First-pass filter on UC visibility — we don't want to bulk-fetch + # review statuses for runs the caller can't see anyway. Build the + # candidate list in the same order so the final response stays + # sorted by ``created_at DESC`` (already applied in the SQL). + candidates: list[dict[str, str | None]] = [] for row in rows: fqn = row.get("source_table_fqn") or "" if not fqn.startswith(_SQL_CHECK_PREFIX) and _catalog_of(fqn) not in user_catalogs: continue + candidates.append(row) + + # Bulk-fetch review statuses for every visible run in one query. + # Lakebase/Delta-OLTP and dq_validation_runs may live on different + # backends, so we can't JOIN at the SQL layer — merging in Python + # is what keeps this dialect-portable. + candidate_run_ids = [row.get("run_id") or "" for row in candidates if row.get("run_id")] + try: + review_map = review_svc.bulk_get_effective(candidate_run_ids) + except Exception as exc: # noqa: BLE001 - degrade gracefully if OLTP is hiccuping + logger.warning( + "Failed to bulk-fetch review statuses (rendering without): %s", + exc, + exc_info=True, + ) + review_map = {} + + # Normalise the optional multi-select filter into a set for O(1) + # checks. Empty strings (e.g. trailing ``?review_status=``) are + # dropped so a forgotten chip never accidentally filters + # everything out. + review_filter: set[str] | None + if review_status: + review_filter = {s.strip() for s in review_status if s and s.strip()} + if not review_filter: + review_filter = None + else: + review_filter = None + + results: list[ValidationRunSummaryOut] = [] + for row in candidates: + fqn = row.get("source_table_fqn") or "" + run_id = row.get("run_id") or "" + review = review_map.get(run_id) + review_value = review.status if review else None + + if review_filter is not None: + if not review_value or review_value not in review_filter: + continue + checks: list[dict[str, Any]] = [] raw = row.get("checks_json") if raw: @@ -99,7 +159,7 @@ async def list_validation_runs( pass results.append( ValidationRunSummaryOut( - run_id=row.get("run_id") or "", + run_id=run_id, source_table_fqn=fqn, status=row.get("status"), requesting_user=row.get("requesting_user"), @@ -117,6 +177,10 @@ async def list_validation_runs( run_type=row.get("run_type"), error_message=row.get("error_message"), checks=checks, + review_status=review_value, + review_status_is_default=bool(review.is_default) if review else False, + review_status_updated_by=review.updated_by if review else None, + review_status_updated_at=review.updated_at if review else None, ) ) return results diff --git a/app/src/databricks_labs_dqx_app/backend/routes/v1/quarantine.py b/app/src/databricks_labs_dqx_app/backend/routes/v1/quarantine.py index ca0aab44c..7badce020 100644 --- a/app/src/databricks_labs_dqx_app/backend/routes/v1/quarantine.py +++ b/app/src/databricks_labs_dqx_app/backend/routes/v1/quarantine.py @@ -62,9 +62,23 @@ def _row_to_record(row: dict[str, Any]) -> QuarantineRecordOut: errors = None if row.get("errors"): try: - errors = json.loads(row["errors"]) + parsed_errors = json.loads(row["errors"]) except (json.JSONDecodeError, TypeError): errors = [row["errors"]] + else: + # Normalise to a list. DQX's ``dq_result_item_schema`` (and + # the current SQL-check writer) emit a list of ``{name, + # message, ...}`` structs, but legacy SQL-check rows written + # before that fix used a single ``{check_name: message}`` + # dict. Coerce the legacy shape so those rows still display + # — and so Pydantic's ``list[Any]`` validation doesn't 500 + # the whole endpoint when one historical row is wrong. + if isinstance(parsed_errors, list): + errors = parsed_errors + elif isinstance(parsed_errors, dict): + errors = [{"name": k, "message": v} for k, v in parsed_errors.items()] + elif parsed_errors is not None: + errors = [parsed_errors] # ``warnings`` is missing on rows written before migration v4; the # column is ``null`` for SQL-check quarantines. diff --git a/app/src/databricks_labs_dqx_app/backend/routes/v1/review_status.py b/app/src/databricks_labs_dqx_app/backend/routes/v1/review_status.py new file mode 100644 index 000000000..6bbae302e --- /dev/null +++ b/app/src/databricks_labs_dqx_app/backend/routes/v1/review_status.py @@ -0,0 +1,208 @@ +"""Per-run review status endpoints. + +Surface for the dropdown that lives next to the comments thread on the +Runs detail page, plus the audit-history list shown below it. Any +authenticated user can change a run's status (matches the comments +behaviour — business reviewers don't have a special role); the +catalogue of allowed values is admin-managed in +``/api/v1/config/run-review-statuses`` (see ``config.py``). + +Route shape +----------- +``GET /api/v1/runs/{run_id}/review-status`` — current effective value +(persisted row or virtual default) plus ``updated_by`` / ``updated_at`` +metadata. + +``PUT /api/v1/runs/{run_id}/review-status`` — set the status. Validated +against the live catalogue server-side. + +``DELETE /api/v1/runs/{run_id}/review-status`` — revert to the +catalogue default (drops the explicit row, appends a history entry). + +``GET /api/v1/runs/{run_id}/review-status/history`` — last ≤200 audit +rows, newest first. Surfaced as a collapsible list on the run detail +page. +""" + +from __future__ import annotations + +from typing import Annotated + +from databricks.sdk import WorkspaceClient +from fastapi import APIRouter, Depends, HTTPException + +from databricks_labs_dqx_app.backend.common.authorization import UserRole +from databricks_labs_dqx_app.backend.dependencies import ( + get_obo_ws, + get_review_status_service, + require_role, +) +from databricks_labs_dqx_app.backend.logger import logger +from databricks_labs_dqx_app.backend.services.review_status_service import ( + ReviewStatusHistoryEntry, + ReviewStatusRecord, + ReviewStatusService, +) +from pydantic import BaseModel + +router = APIRouter() + +# Visible to every authenticated principal. Matches the comments +# routes — review actions are deliberately broad-reach so business +# users can self-serve without being assigned a role. +_ALL_ROLES = [UserRole.ADMIN, UserRole.RULE_APPROVER, UserRole.RULE_AUTHOR, UserRole.VIEWER] + + +class ReviewStatusOut(BaseModel): + """Current effective review status for a run.""" + + run_id: str + status: str + updated_by: str | None = None + updated_at: str | None = None + is_default: bool = False + + +class SetReviewStatusIn(BaseModel): + status: str + + +class ReviewStatusHistoryEntryOut(BaseModel): + run_id: str + status: str + previous_status: str | None = None + changed_by: str + changed_at: str | None = None + + +class ReviewStatusHistoryOut(BaseModel): + history: list[ReviewStatusHistoryEntryOut] + + +def _record_to_out(record: ReviewStatusRecord) -> ReviewStatusOut: + return ReviewStatusOut( + run_id=record.run_id, + status=record.status, + updated_by=record.updated_by, + updated_at=record.updated_at, + is_default=record.is_default, + ) + + +def _history_entry_to_out(entry: ReviewStatusHistoryEntry) -> ReviewStatusHistoryEntryOut: + return ReviewStatusHistoryEntryOut( + run_id=entry.run_id, + status=entry.status, + previous_status=entry.previous_status, + changed_by=entry.changed_by, + changed_at=entry.changed_at, + ) + + +@router.get( + "/{run_id}/review-status", + response_model=ReviewStatusOut, + operation_id="getRunReviewStatus", + dependencies=[require_role(*_ALL_ROLES)], +) +def get_run_review_status( + run_id: str, + svc: Annotated[ReviewStatusService, Depends(get_review_status_service)], +) -> ReviewStatusOut: + """Return the effective status for the run. + + Falls back to the catalogue default when no explicit row exists — + the UI uses ``is_default`` to render an "(auto)" hint and skip the + ``updated_by`` / ``updated_at`` metadata that would otherwise be + misleading for an unreviewed run. + """ + try: + return _record_to_out(svc.get_effective(run_id)) + except Exception as e: + logger.error("Failed to get review status for run %s: %s", run_id, e, exc_info=True) + raise HTTPException(status_code=500, detail=f"Failed to get review status: {e}") + + +@router.put( + "/{run_id}/review-status", + response_model=ReviewStatusOut, + operation_id="setRunReviewStatus", + dependencies=[require_role(*_ALL_ROLES)], +) +def set_run_review_status( + run_id: str, + body: SetReviewStatusIn, + svc: Annotated[ReviewStatusService, Depends(get_review_status_service)], + obo_ws: Annotated[WorkspaceClient, Depends(get_obo_ws)], +) -> ReviewStatusOut: + """Set the review status for a run. + + Records the change in the audit history with the previous effective + value (virtual default included) so the run detail page can render + "Pending review → Acknowledged" naturally. + """ + try: + user = obo_ws.current_user.me() + user_email = user.user_name or "unknown" + record = svc.set_status(run_id, body.status, user_email=user_email) + return _record_to_out(record) + except ValueError as e: + # Service raises ValueError for unknown status / empty inputs. + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + logger.error("Failed to set review status for run %s: %s", run_id, e, exc_info=True) + raise HTTPException(status_code=500, detail=f"Failed to set review status: {e}") + + +@router.delete( + "/{run_id}/review-status", + response_model=ReviewStatusOut, + operation_id="clearRunReviewStatus", + dependencies=[require_role(*_ALL_ROLES)], +) +def clear_run_review_status( + run_id: str, + svc: Annotated[ReviewStatusService, Depends(get_review_status_service)], + obo_ws: Annotated[WorkspaceClient, Depends(get_obo_ws)], +) -> ReviewStatusOut: + """Revert the run to the catalogue default. + + Drops the explicit row and appends a history entry. Useful when a + reviewer wants to "unacknowledge" something without picking another + explicit value (e.g. the run was re-classified and should go back + into the unreviewed queue). + """ + try: + user = obo_ws.current_user.me() + user_email = user.user_name or "unknown" + record = svc.clear_status(run_id, user_email=user_email) + return _record_to_out(record) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + logger.error("Failed to clear review status for run %s: %s", run_id, e, exc_info=True) + raise HTTPException(status_code=500, detail=f"Failed to clear review status: {e}") + + +@router.get( + "/{run_id}/review-status/history", + response_model=ReviewStatusHistoryOut, + operation_id="getRunReviewStatusHistory", + dependencies=[require_role(*_ALL_ROLES)], +) +def get_run_review_status_history( + run_id: str, + svc: Annotated[ReviewStatusService, Depends(get_review_status_service)], +) -> ReviewStatusHistoryOut: + """Return up to 200 most-recent audit rows, newest first.""" + try: + history = svc.get_history(run_id) + return ReviewStatusHistoryOut(history=[_history_entry_to_out(h) for h in history]) + except Exception as e: + logger.error( + "Failed to load review status history for run %s: %s", + run_id, + e, + exc_info=True, + ) + raise HTTPException(status_code=500, detail=f"Failed to load review status history: {e}") diff --git a/app/src/databricks_labs_dqx_app/backend/services/app_settings_service.py b/app/src/databricks_labs_dqx_app/backend/services/app_settings_service.py index 2333e075a..a81367de9 100644 --- a/app/src/databricks_labs_dqx_app/backend/services/app_settings_service.py +++ b/app/src/databricks_labs_dqx_app/backend/services/app_settings_service.py @@ -163,3 +163,219 @@ def _get_int_setting(self, key: str) -> int | None: except (TypeError, ValueError): logger.warning("Setting %s is not parseable as int (%r); ignoring", key, raw) return None + + # ------------------------------------------------------------------ + # Embedded dashboard — Insights page renders a Databricks AI/BI + # dashboard inside an iframe. Admins set the dashboard ID + an + # optional display title via the Configuration page; the GET + # endpoint falls back to ``conf.default_dashboard_id`` (env) when + # this setting is unset, so a bundle can ship a starter dashboard + # ID without preventing customer overrides. + # ------------------------------------------------------------------ + + _EMBEDDED_DASHBOARD_KEY = "embedded_dashboard_v1" + + def get_embedded_dashboard(self) -> dict | None: + """Return ``{"dashboard_id": str, "title": str | None}`` or ``None`` if unset.""" + raw = self.get_setting(self._EMBEDDED_DASHBOARD_KEY) + if not raw: + return None + try: + parsed = json.loads(raw) + except (TypeError, json.JSONDecodeError): + logger.warning("embedded_dashboard_v1 setting is not valid JSON; ignoring") + return None + if not isinstance(parsed, dict): + logger.warning("embedded_dashboard_v1 setting is not a dict; ignoring") + return None + dashboard_id = parsed.get("dashboard_id") + if not isinstance(dashboard_id, str) or not dashboard_id.strip(): + return None + title = parsed.get("title") + return { + "dashboard_id": dashboard_id.strip(), + "title": title.strip() if isinstance(title, str) and title.strip() else None, + } + + def save_embedded_dashboard( + self, + dashboard_id: str, + title: str | None = None, + *, + user_email: str | None = None, + ) -> dict: + """Persist the embedded dashboard ID + optional title. Returns the saved payload.""" + cleaned_id = (dashboard_id or "").strip() + cleaned_title = (title or "").strip() or None + payload = {"dashboard_id": cleaned_id, "title": cleaned_title} + self.save_setting(self._EMBEDDED_DASHBOARD_KEY, json.dumps(payload), user_email=user_email) + return payload + + def delete_embedded_dashboard(self, *, user_email: str | None = None) -> None: + """Clear the embedded dashboard setting so the env default takes over again.""" + self.save_setting(self._EMBEDDED_DASHBOARD_KEY, "", user_email=user_email) + + # ------------------------------------------------------------------ + # Run review statuses — admin-managed list of labels surfaced on the + # Runs detail page (next to comments) and as a Runs History filter. + # Stored as a JSON array under ``run_review_statuses_v1``. One entry + # MUST be flagged ``is_default``; that value is what + # ``ReviewStatusService`` returns virtually for runs that have never + # been explicitly reviewed. + # + # We seed a sensible default list on first read (rather than at + # migration time) so the feature works out-of-the-box even on + # already-deployed workspaces, and so the seed list can evolve in + # code without needing a fresh migration. + # ------------------------------------------------------------------ + + _RUN_REVIEW_STATUSES_KEY = "run_review_statuses_v1" + + # Default catalogue shipped on first read. The colors are token + # names the UI maps to its design-system palette so we can rebrand + # without touching backend data. + _RUN_REVIEW_STATUSES_SEED: list[dict] = [ + { + "value": "Pending review", + "description": "Awaiting business review", + "color": "gray", + "is_default": True, + }, + { + "value": "Acknowledged", + "description": "Known issue, accepted by owners", + "color": "amber", + "is_default": False, + }, + { + "value": "Resolved", + "description": "Fixed upstream", + "color": "green", + "is_default": False, + }, + { + "value": "False positive", + "description": "Rule is wrong, not a real issue", + "color": "blue", + "is_default": False, + }, + ] + + def get_run_review_statuses(self) -> list[dict]: + """Return the admin-managed catalogue of run review status values. + + Falls back to (and persists) the seed list on first read so the + Runs detail dropdown is never empty after a fresh deploy. + Returned entries are always normalised — ``value`` trimmed, + ``description`` defaulted to empty string, ``color`` defaulted + to ``"gray"``, ``is_default`` coerced to bool — so call sites + can index by field without defensive lookups. + """ + raw = self.get_setting(self._RUN_REVIEW_STATUSES_KEY) + if not raw: + logger.info("Seeding default run_review_statuses on first read") + self._persist_run_review_statuses(self._RUN_REVIEW_STATUSES_SEED, user_email=None) + return [self._normalise_status_entry(e) for e in self._RUN_REVIEW_STATUSES_SEED] + + try: + parsed = json.loads(raw) + except (TypeError, json.JSONDecodeError): + logger.warning("run_review_statuses_v1 is not valid JSON; falling back to seed") + return [self._normalise_status_entry(e) for e in self._RUN_REVIEW_STATUSES_SEED] + if not isinstance(parsed, list): + logger.warning("run_review_statuses_v1 is not a list; falling back to seed") + return [self._normalise_status_entry(e) for e in self._RUN_REVIEW_STATUSES_SEED] + + out: list[dict] = [] + for item in parsed: + if not isinstance(item, dict): + continue + normalised = self._normalise_status_entry(item) + if normalised["value"]: + out.append(normalised) + # Defensive: if the persisted list is malformed and we end up + # with nothing, surface the seed rather than an empty dropdown. + return out or [self._normalise_status_entry(e) for e in self._RUN_REVIEW_STATUSES_SEED] + + def save_run_review_statuses( + self, + statuses: list[dict], + *, + user_email: str | None = None, + ) -> list[dict]: + """Replace the admin-managed catalogue, enforcing exactly one default. + + Validation rules (raise ``ValueError`` on violation so the route + can turn them into a 400): + - At least one entry is required (callers always need a default + to surface for unreviewed runs). + - ``value`` must be non-empty, trimmed, and unique within the list. + - Exactly one entry must have ``is_default=True``. + """ + cleaned: list[dict] = [] + seen: set[str] = set() + for item in statuses or []: + if not isinstance(item, dict): + continue + normalised = self._normalise_status_entry(item) + value = normalised["value"] + if not value: + raise ValueError("Run review status 'value' must be non-empty.") + if value in seen: + raise ValueError(f"Duplicate run review status value: {value!r}.") + seen.add(value) + cleaned.append(normalised) + + if not cleaned: + raise ValueError("At least one run review status is required.") + + defaults = [e for e in cleaned if e["is_default"]] + if len(defaults) != 1: + raise ValueError(f"Exactly one run review status must be marked as default; got {len(defaults)}.") + + self._persist_run_review_statuses(cleaned, user_email=user_email) + return cleaned + + def get_default_run_review_status(self) -> str: + """Return the ``value`` of the catalogue entry flagged ``is_default``. + + Called by ``ReviewStatusService`` to surface an effective status + for runs that have no explicit row. Guaranteed non-empty because + :meth:`save_run_review_statuses` enforces the invariant. + """ + for entry in self.get_run_review_statuses(): + if entry["is_default"]: + return entry["value"] + # Should be unreachable thanks to the save-side invariant, but + # we fall back to the seed default rather than raise so a buggy + # write can't take down the whole listings endpoint. + return self._RUN_REVIEW_STATUSES_SEED[0]["value"] + + def _persist_run_review_statuses( + self, + entries: list[dict], + *, + user_email: str | None, + ) -> None: + self.save_setting( + self._RUN_REVIEW_STATUSES_KEY, + json.dumps([self._normalise_status_entry(e) for e in entries]), + user_email=user_email, + ) + logger.info("Saved %d run review status(es) (by=%s)", len(entries), user_email or "system") + + @staticmethod + def _normalise_status_entry(item: dict) -> dict: + value = (item.get("value") or "").strip() if isinstance(item.get("value"), str) else "" + description = item.get("description") or "" + if not isinstance(description, str): + description = "" + color = item.get("color") or "gray" + if not isinstance(color, str) or not color.strip(): + color = "gray" + return { + "value": value, + "description": description.strip(), + "color": color.strip(), + "is_default": bool(item.get("is_default")), + } diff --git a/app/src/databricks_labs_dqx_app/backend/services/review_status_service.py b/app/src/databricks_labs_dqx_app/backend/services/review_status_service.py new file mode 100644 index 000000000..4b4ae05ea --- /dev/null +++ b/app/src/databricks_labs_dqx_app/backend/services/review_status_service.py @@ -0,0 +1,366 @@ +"""Per-run review-status tracking for DQX validation runs. + +A reviewer (any authenticated user) tags a validation run with one of the +admin-configured values from ``dq_app_settings.run_review_statuses_v1`` — +e.g. ``Pending review`` (auto-default) → ``Acknowledged`` once business +owners have triaged the failures. The Runs detail page renders the +current status next to the comments thread, and the Runs History page +exposes a multi-select filter on it. + +Storage layout +-------------- +- ``dq_run_review_status`` — one mutable row per run that has been + explicitly reviewed. Runs without a row are *not* unreviewed — they + surface the configured default ``value`` from ``AppSettingsService`` + virtually, so dashboards and filters never see a NULL state. +- ``dq_run_review_status_history`` — append-only audit log. One row per + explicit change (including the very first time someone moves a run + off the virtual default). ``previous_status`` carries the *effective* + value before the change, which is what reviewers care about reading + even though it may have been virtual. + +Both tables live in the OLTP executor — Lakebase Postgres when enabled, +Delta fallback otherwise — and share the same service surface as +``CommentsService`` / ``AppSettingsService``. The validation-run rows +themselves stay in Delta because they're append-only Spark output; +listing routes bulk-fetch effective statuses from this service and merge +in Python (see :meth:`bulk_get_effective`). +""" + +from __future__ import annotations + +import logging +from datetime import datetime, timezone + +from databricks_labs_dqx_app.backend.services.app_settings_service import AppSettingsService +from databricks_labs_dqx_app.backend.sql_executor import RawSql, SqlExecutor +from databricks_labs_dqx_app.backend.sql_utils import escape_sql_string + +logger = logging.getLogger(__name__) + + +class ReviewStatusRecord: + """A persisted ``dq_run_review_status`` row + a flag telling the UI + whether the value came from the table or from the virtual default.""" + + __slots__ = ("run_id", "status", "updated_by", "updated_at", "is_default") + + def __init__( + self, + run_id: str, + status: str, + updated_by: str | None, + updated_at: str | None, + is_default: bool, + ) -> None: + self.run_id = run_id + self.status = status + self.updated_by = updated_by + self.updated_at = updated_at + # True iff this record reflects the catalogue default rather + # than an explicit row in dq_run_review_status. The UI uses + # this to render an "(auto)" hint and skip showing meaningless + # ``updated_by`` / ``updated_at`` metadata. + self.is_default = is_default + + +class ReviewStatusHistoryEntry: + """One row from ``dq_run_review_status_history``.""" + + __slots__ = ("run_id", "status", "previous_status", "changed_by", "changed_at") + + def __init__( + self, + run_id: str, + status: str, + previous_status: str | None, + changed_by: str, + changed_at: str | None, + ) -> None: + self.run_id = run_id + self.status = status + self.previous_status = previous_status + self.changed_by = changed_by + self.changed_at = changed_at + + +class ReviewStatusService: + """CRUD + audit log for ``dq_run_review_status``. + + Constructed against the OLTP executor so the data co-locates with + ``dq_app_settings`` (status catalogue) and ``dq_comments`` (the + sibling reviewer-action table). All writes go through the app's + service principal, mirroring :class:`CommentsService`. + """ + + # Capped to keep the detail-page request payload predictable. Most + # reviews have <10 changes; the cap protects against pathological + # cases where a script churns the status thousands of times. + _HISTORY_LIMIT = 200 + + def __init__(self, sql: SqlExecutor, settings: AppSettingsService) -> None: + self._sql = sql + self._settings = settings + self._table = sql.fqn("dq_run_review_status") + self._history_table = sql.fqn("dq_run_review_status_history") + + # ------------------------------------------------------------------ + # Reads + # ------------------------------------------------------------------ + + def get_explicit(self, run_id: str) -> ReviewStatusRecord | None: + """Return the persisted row for *run_id* or ``None`` if unset. + + Callers that want a value (even the catalogue default) for runs + without a row should use :meth:`get_effective` instead. + """ + er = escape_sql_string(run_id) + sql = ( + f"SELECT run_id, status, updated_by, {self._sql.ts_text('updated_at')} " + f"FROM {self._table} WHERE run_id = '{er}' LIMIT 1" + ) + rows = self._sql.query(sql) + if not rows: + return None + return ReviewStatusRecord( + run_id=rows[0][0] or "", + status=rows[0][1] or "", + updated_by=rows[0][2], + updated_at=rows[0][3], + is_default=False, + ) + + def get_effective(self, run_id: str) -> ReviewStatusRecord: + """Return the explicit row, or a virtual default record if unset. + + The default value comes from + :meth:`AppSettingsService.get_default_run_review_status`. + """ + explicit = self.get_explicit(run_id) + if explicit is not None: + return explicit + return ReviewStatusRecord( + run_id=run_id, + status=self._settings.get_default_run_review_status(), + updated_by=None, + updated_at=None, + is_default=True, + ) + + def bulk_get_effective(self, run_ids: list[str]) -> dict[str, ReviewStatusRecord]: + """Return ``{run_id: ReviewStatusRecord}`` for every input run_id. + + Runs without an explicit row get a virtual-default record so the + listing route can render *something* for every row. Uses a single + ``WHERE run_id IN (...)`` query — at the existing listing LIMIT + of 500 this is well inside any reasonable IN-list ceiling. + """ + if not run_ids: + return {} + + # Deduplicate before formatting so the ``IN`` list stays + # compact even if the caller passes the same run twice. + unique_ids = list(dict.fromkeys(run_ids)) + in_list = ", ".join(f"'{escape_sql_string(r)}'" for r in unique_ids) + sql = ( + f"SELECT run_id, status, updated_by, {self._sql.ts_text('updated_at')} " + f"FROM {self._table} WHERE run_id IN ({in_list})" + ) + rows = self._sql.query(sql) + + explicit: dict[str, ReviewStatusRecord] = { + (row[0] or ""): ReviewStatusRecord( + run_id=row[0] or "", + status=row[1] or "", + updated_by=row[2], + updated_at=row[3], + is_default=False, + ) + for row in rows + if row and row[0] + } + + default_value = self._settings.get_default_run_review_status() + out: dict[str, ReviewStatusRecord] = {} + for run_id in unique_ids: + record = explicit.get(run_id) + if record is not None: + out[run_id] = record + else: + out[run_id] = ReviewStatusRecord( + run_id=run_id, + status=default_value, + updated_by=None, + updated_at=None, + is_default=True, + ) + return out + + def get_history(self, run_id: str) -> list[ReviewStatusHistoryEntry]: + """Return up to ``_HISTORY_LIMIT`` recent history rows, newest first.""" + er = escape_sql_string(run_id) + sql = ( + f"SELECT run_id, status, previous_status, changed_by, " + f"{self._sql.ts_text('changed_at')} " + f"FROM {self._history_table} " + f"WHERE run_id = '{er}' " + f"ORDER BY changed_at DESC LIMIT {self._HISTORY_LIMIT}" + ) + rows = self._sql.query(sql) + return [ + ReviewStatusHistoryEntry( + run_id=row[0] or "", + status=row[1] or "", + previous_status=row[2], + changed_by=row[3] or "", + changed_at=row[4], + ) + for row in rows + ] + + # ------------------------------------------------------------------ + # Writes + # ------------------------------------------------------------------ + + def set_status( + self, + run_id: str, + status: str, + *, + user_email: str, + ) -> ReviewStatusRecord: + """Upsert the explicit row + append an audit row. + + ``status`` is validated against the live catalogue from + :class:`AppSettingsService`; passing an unknown value raises + ``ValueError`` so the route can return a 400. The audit row + carries the effective previous status (virtual default included) + so the history reads naturally on the UI. + """ + if not run_id or not run_id.strip(): + raise ValueError("run_id must be a non-empty string.") + if not status or not status.strip(): + raise ValueError("status must be a non-empty string.") + if not user_email or not user_email.strip(): + raise ValueError("user_email must be a non-empty string.") + + cleaned_status = status.strip() + allowed = {entry["value"] for entry in self._settings.get_run_review_statuses()} + if cleaned_status not in allowed: + raise ValueError(f"Unknown review status {cleaned_status!r}. " f"Allowed values: {sorted(allowed)}") + + previous_effective = self.get_effective(run_id).status + + # No-op writes still go through so the history captures the + # human action (e.g. the reviewer re-confirmed the existing + # status). UI shows the no-op rows as "Confirmed" if we want + # that polish later; for now we just record the duplicate. + self._sql.upsert( + self._table, + key_cols={"run_id": run_id}, + value_cols={ + "status": cleaned_status, + "updated_by": user_email, + "updated_at": RawSql("current_timestamp()"), + }, + ) + + self._append_history( + run_id=run_id, + status=cleaned_status, + previous_status=previous_effective, + changed_by=user_email, + ) + + logger.info( + "Run %s review status set to %r (prev=%r) by %s", + run_id, + cleaned_status, + previous_effective, + user_email, + ) + return ReviewStatusRecord( + run_id=run_id, + status=cleaned_status, + updated_by=user_email, + updated_at=datetime.now(timezone.utc).isoformat(), + is_default=False, + ) + + def clear_status(self, run_id: str, *, user_email: str) -> ReviewStatusRecord: + """Delete the explicit row + append an audit row for the revert. + + The run reverts to the catalogue default. The audit row records + the default value as the new status so the history reads + "Acknowledged → Pending review" rather than "Acknowledged → + (none)" which would be ambiguous. + """ + if not run_id or not run_id.strip(): + raise ValueError("run_id must be a non-empty string.") + if not user_email or not user_email.strip(): + raise ValueError("user_email must be a non-empty string.") + + previous_effective = self.get_effective(run_id).status + default_value = self._settings.get_default_run_review_status() + + er = escape_sql_string(run_id) + self._sql.execute(f"DELETE FROM {self._table} WHERE run_id = '{er}'") + + self._append_history( + run_id=run_id, + status=default_value, + previous_status=previous_effective, + changed_by=user_email, + ) + + logger.info( + "Run %s review status cleared (was %r) by %s", + run_id, + previous_effective, + user_email, + ) + return ReviewStatusRecord( + run_id=run_id, + status=default_value, + updated_by=None, + updated_at=None, + is_default=True, + ) + + # ------------------------------------------------------------------ + # Internals + # ------------------------------------------------------------------ + + def _append_history( + self, + *, + run_id: str, + status: str, + previous_status: str | None, + changed_by: str, + ) -> None: + """Insert one row into ``dq_run_review_status_history``. + + Hand-rolled INSERT (no ``upsert``) because the table is + append-only with no natural key. We can't piggyback on the + executor's ``_render_value`` helpers either — the Delta one + passes ``current_timestamp()`` through verbatim (Postgres + rejects the parenthesised form), and the Postgres one is not + exposed publicly. ``now()`` is portable across both dialects + (Postgres ships it as a built-in synonym for + ``CURRENT_TIMESTAMP``; Delta SQL accepts it too — same idiom + as :class:`CommentsService`), so we inline it directly here. + """ + run_id_lit = f"'{escape_sql_string(run_id)}'" + status_lit = f"'{escape_sql_string(status)}'" + if previous_status is None: + prev_lit = "NULL" + else: + prev_lit = f"'{escape_sql_string(previous_status)}'" + changed_by_lit = f"'{escape_sql_string(changed_by)}'" + + self._sql.execute( + f"INSERT INTO {self._history_table} " + f"(run_id, status, previous_status, changed_by, changed_at) " + f"VALUES ({run_id_lit}, {status_lit}, {prev_lit}, {changed_by_lit}, now())" + ) diff --git a/app/src/databricks_labs_dqx_app/ui/components/RunReviewStatusPanel.tsx b/app/src/databricks_labs_dqx_app/ui/components/RunReviewStatusPanel.tsx new file mode 100644 index 000000000..0e6d20f41 --- /dev/null +++ b/app/src/databricks_labs_dqx_app/ui/components/RunReviewStatusPanel.tsx @@ -0,0 +1,301 @@ +/** + * Per-run review status panel. Surfaced inside the expanded row on the + * Runs History page, immediately above the existing comments thread. + * + * UX outline: + * - A coloured badge shows the effective review status. Unreviewed runs + * carry the catalogue default (e.g. "Pending review") with an "(auto)" + * hint so the row is visually distinct from one where someone + * explicitly picked the same value. + * - A dropdown lets any authenticated user move the run to another + * value from the admin-managed catalogue. + * - A small "Last changed by X at Y" line records who moved the run + * most recently — only shown when the value is explicit (matching the + * ``is_default`` flag returned by the API). + * - "Revert to default" appears only when the status is explicit; it + * POSTs DELETE so the run goes back to the catalogue default. + * - A collapsible history list shows the audit trail. We deliberately + * keep this collapsed by default because the dropdown + recent-change + * line answers 95% of questions and the panel sits inside an already + * expanded run row. + */ +import { useState } from "react"; +import { useQueryClient } from "@tanstack/react-query"; +import { toast } from "sonner"; +import { + AlertCircle, + Check, + ChevronDown, + History, + Loader2, + RotateCcw, + ShieldCheck, +} from "lucide-react"; +import type { AxiosError } from "axios"; + +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { + Popover, + PopoverContent, + PopoverTrigger, +} from "@/components/ui/popover"; +import { formatDateTime } from "@/lib/format-utils"; +import { cn } from "@/lib/utils"; +import { + useRunReviewStatus, + useRunReviewStatusHistory, + useRunReviewStatuses, + useSetRunReviewStatus, + useClearRunReviewStatus, + getRunReviewStatusQueryKey, + getRunReviewStatusHistoryQueryKey, +} from "@/lib/api-custom"; +// Re-use the colour token table from the Configuration page so the +// badge here is visually identical to the swatch admins picked there. +import { reviewStatusBadgeClasses } from "@/routes/_sidebar/config"; + +interface RunReviewStatusPanelProps { + runId: string; +} + +export function RunReviewStatusPanel({ runId }: RunReviewStatusPanelProps) { + const queryClient = useQueryClient(); + const [historyOpen, setHistoryOpen] = useState(false); + + const { data: current, isLoading: currentLoading } = useRunReviewStatus(runId); + const { data: catalogue, isLoading: catalogueLoading } = useRunReviewStatuses(); + const { data: history, isLoading: historyLoading } = useRunReviewStatusHistory(runId, { + // Don't pay for the audit-trail roundtrip until the user opens it. + // Audit views are a minority of interactions and the data is rarely + // useful at-a-glance. + query: { enabled: historyOpen }, + }); + + const setMutation = useSetRunReviewStatus(); + const clearMutation = useClearRunReviewStatus(); + + const refresh = () => { + queryClient.invalidateQueries({ queryKey: getRunReviewStatusQueryKey(runId) }); + queryClient.invalidateQueries({ queryKey: getRunReviewStatusHistoryQueryKey(runId) }); + // Bust the listing cache too so the History page row reflects the + // change immediately when the user collapses the row. + queryClient.invalidateQueries({ queryKey: ["listValidationRuns"] }); + queryClient.invalidateQueries({ queryKey: ["/listValidationRuns"] }); + }; + + const handleSelect = (value: string) => { + if (!value || value === current?.status) return; + setMutation.mutate( + { runId, data: { status: value } }, + { + onSuccess: () => { + refresh(); + toast.success(`Marked as "${value}"`); + }, + onError: (err: unknown) => { + const axErr = err as AxiosError<{ detail?: string }>; + toast.error(axErr?.response?.data?.detail ?? "Failed to update review status"); + }, + }, + ); + }; + + const handleClear = () => { + clearMutation.mutate( + { runId }, + { + onSuccess: () => { + refresh(); + toast.success("Reverted to default"); + }, + onError: () => toast.error("Failed to revert review status"), + }, + ); + }; + + if (currentLoading || catalogueLoading || !current || !catalogue) { + return ( +
+ + Loading review status… +
+ ); + } + + const options = catalogue.statuses; + // The current status might point to a value that's no longer in the + // catalogue (e.g. admin renamed/removed it after the row was written). + // We still want to render the badge and keep the dropdown open with a + // disabled "orphan" hint so the operator can pick a replacement. + const matchingOption = options.find((o) => o.value === current.status); + const badgeColor = matchingOption?.color ?? "gray"; + const isOrphan = !matchingOption && !current.is_default; + + const busy = setMutation.isPending || clearMutation.isPending; + + return ( +
+
+ + + Review status + + + + + + + +
+ {isOrphan && ( +
+ Current value {current.status} is no + longer in the catalogue. Pick a replacement. +
+ )} + {options.map((opt) => ( + + ))} +
+
+
+ + {!current.is_default && ( + + )} + + {/* Inline "who & when" line — only meaningful for an explicit + value. The default is virtual so updated_by/updated_at are + null and showing them would be misleading. */} + {!current.is_default && current.updated_by && ( + + by {current.updated_by} + {current.updated_at && <> · {formatDateTime(current.updated_at)}} + + )} + + {current.is_default && ( + + (default for unreviewed runs) + + )} +
+ + + + {historyOpen && ( +
+ {historyLoading && ( +
+ + Loading history… +
+ )} + {!historyLoading && (history?.history.length ?? 0) === 0 && ( +

+ No explicit changes yet. The run is at the catalogue default. +

+ )} + {history?.history.map((entry, i) => ( +
+ + {formatDateTime(entry.changed_at)} + + · + {entry.changed_by} + changed status + {entry.previous_status ? ( + <> + from + + {entry.previous_status} + + + ) : null} + to + {entry.status} +
+ ))} +
+ )} + + {isOrphan && ( +

+ + This run carries a value not in the current catalogue. Choose a + replacement above to keep the audit trail clean. +

+ )} +
+ ); +} diff --git a/app/src/databricks_labs_dqx_app/ui/lib/api-custom.ts b/app/src/databricks_labs_dqx_app/ui/lib/api-custom.ts index 0130fe9d8..ebb4a23f5 100644 --- a/app/src/databricks_labs_dqx_app/ui/lib/api-custom.ts +++ b/app/src/databricks_labs_dqx_app/ui/lib/api-custom.ts @@ -974,3 +974,358 @@ export const useSaveRetentionSettings = < ...mutationOptions, }); }; + +// --------------------------------------------------------------------------- +// Embedded dashboard (Insights page). The dashboard ID can be set by an +// admin via the Configuration page; when unset, the backend falls back to +// the env-provided DQX_DEFAULT_DASHBOARD_ID (so the bundle can ship a +// starter dashboard). ``is_set`` distinguishes admin override from env +// default in the UI. +// --------------------------------------------------------------------------- + +export interface EmbeddedDashboardOut { + dashboard_id: string; + title: string | null; + workspace_host: string; + is_set: boolean; + is_default: boolean; +} + +export interface EmbeddedDashboardIn { + dashboard_id: string; + title?: string | null; +} + +export const getEmbeddedDashboard = ( + options?: AxiosRequestConfig, +): Promise> => + axios.default.get("/api/v1/config/embedded-dashboard", options); + +export const getEmbeddedDashboardQueryKey = () => ["embedded-dashboard"] as const; + +export const useEmbeddedDashboard = < + TData = Awaited>["data"], + TError = AxiosError, +>( + options?: { + query?: Partial>, TError, TData>>; + axios?: AxiosRequestConfig; + }, +): UseQueryResult => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; + return useQuery({ + queryKey: queryOptions?.queryKey ?? getEmbeddedDashboardQueryKey(), + queryFn: () => getEmbeddedDashboard(axiosOptions), + select: ((resp: Awaited>) => resp.data) as never, + staleTime: 60 * 1000, + ...queryOptions, + }) as UseQueryResult; +}; + +export const saveEmbeddedDashboard = ( + body: EmbeddedDashboardIn, + options?: AxiosRequestConfig, +): Promise> => + axios.default.put("/api/v1/config/embedded-dashboard", body, options); + +export const useSaveEmbeddedDashboard = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: EmbeddedDashboardIn }, + TContext + >; + axios?: AxiosRequestConfig; + }, +): UseMutationResult< + Awaited>, + TError, + { data: EmbeddedDashboardIn }, + TContext +> => { + const { mutation: mutationOptions, axios: axiosOptions } = options ?? {}; + return useMutation({ + mutationFn: ({ data }: { data: EmbeddedDashboardIn }) => saveEmbeddedDashboard(data, axiosOptions), + ...mutationOptions, + }); +}; + +export const deleteEmbeddedDashboard = ( + options?: AxiosRequestConfig, +): Promise> => + axios.default.delete("/api/v1/config/embedded-dashboard", options); + +export const useDeleteEmbeddedDashboard = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + void, + TContext + >; + axios?: AxiosRequestConfig; + }, +): UseMutationResult< + Awaited>, + TError, + void, + TContext +> => { + const { mutation: mutationOptions, axios: axiosOptions } = options ?? {}; + return useMutation({ + mutationFn: () => deleteEmbeddedDashboard(axiosOptions), + ...mutationOptions, + }); +}; + +// --------------------------------------------------------------------------- +// Run review statuses — admin-managed catalogue of values surfaced as the +// per-run review dropdown on the Runs detail page and as a multi-select +// filter on the Runs History page. Exactly one entry is flagged +// ``is_default`` (backend invariant); that value is what the listing +// endpoint returns virtually for unreviewed runs. +// +// The ``color`` field carries a design-system token name (gray, amber, +// green, blue, red, purple, ...) that the UI maps to a tailwind palette +// so we can rebrand without touching backend data. +// --------------------------------------------------------------------------- + +export interface RunReviewStatusOption { + value: string; + description: string; + color: string; + is_default: boolean; +} + +export interface RunReviewStatusesOut { + statuses: RunReviewStatusOption[]; +} + +export interface RunReviewStatusesIn { + statuses: RunReviewStatusOption[]; +} + +export const getRunReviewStatuses = ( + options?: AxiosRequestConfig, +): Promise> => + axios.default.get("/api/v1/config/run-review-statuses", options); + +export const getRunReviewStatusesQueryKey = () => ["run-review-statuses"] as const; + +export const useRunReviewStatuses = < + TData = Awaited>["data"], + TError = AxiosError, +>( + options?: { + query?: Partial>, TError, TData>>; + axios?: AxiosRequestConfig; + }, +): UseQueryResult => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; + return useQuery({ + queryKey: queryOptions?.queryKey ?? getRunReviewStatusesQueryKey(), + queryFn: () => getRunReviewStatuses(axiosOptions), + select: ((resp: Awaited>) => resp.data) as never, + // The dropdown is rendered in three places (Config card, Runs + // detail, Runs History filter) — a 5-min stale window keeps the + // navigation fast while letting admin saves propagate naturally + // through React Query's invalidation on the mutation hook below. + staleTime: 5 * 60 * 1000, + ...queryOptions, + }) as UseQueryResult; +}; + +export const saveRunReviewStatuses = ( + body: RunReviewStatusesIn, + options?: AxiosRequestConfig, +): Promise> => + axios.default.put("/api/v1/config/run-review-statuses", body, options); + +export const useSaveRunReviewStatuses = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: RunReviewStatusesIn }, + TContext + >; + axios?: AxiosRequestConfig; + }, +): UseMutationResult< + Awaited>, + TError, + { data: RunReviewStatusesIn }, + TContext +> => { + const { mutation: mutationOptions, axios: axiosOptions } = options ?? {}; + return useMutation({ + mutationFn: ({ data }: { data: RunReviewStatusesIn }) => saveRunReviewStatuses(data, axiosOptions), + ...mutationOptions, + }); +}; + +// --------------------------------------------------------------------------- +// Per-run review status — set/clear/get/history endpoints for the dropdown +// + audit list on the Runs detail page. ``is_default`` distinguishes the +// virtual catalogue default (no row in dq_run_review_status) from an +// explicit value — the UI uses it to render "(auto)" hints and skip +// meaningless updated_by/updated_at metadata. +// --------------------------------------------------------------------------- + +export interface RunReviewStatusOut { + run_id: string; + status: string; + updated_by: string | null; + updated_at: string | null; + is_default: boolean; +} + +export interface SetRunReviewStatusIn { + status: string; +} + +export interface RunReviewStatusHistoryEntry { + run_id: string; + status: string; + previous_status: string | null; + changed_by: string; + changed_at: string | null; +} + +export interface RunReviewStatusHistoryOut { + history: RunReviewStatusHistoryEntry[]; +} + +export const getRunReviewStatus = ( + runId: string, + options?: AxiosRequestConfig, +): Promise> => + axios.default.get(`/api/v1/runs/${encodeURIComponent(runId)}/review-status`, options); + +export const getRunReviewStatusQueryKey = (runId: string) => + ["run-review-status", runId] as const; + +export const useRunReviewStatus = < + TData = Awaited>["data"], + TError = AxiosError, +>( + runId: string, + options?: { + query?: Partial>, TError, TData>>; + axios?: AxiosRequestConfig; + }, +): UseQueryResult => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; + return useQuery({ + queryKey: queryOptions?.queryKey ?? getRunReviewStatusQueryKey(runId), + queryFn: () => getRunReviewStatus(runId, axiosOptions), + select: ((resp: Awaited>) => resp.data) as never, + enabled: Boolean(runId), + ...queryOptions, + }) as UseQueryResult; +}; + +export const setRunReviewStatus = ( + runId: string, + body: SetRunReviewStatusIn, + options?: AxiosRequestConfig, +): Promise> => + axios.default.put(`/api/v1/runs/${encodeURIComponent(runId)}/review-status`, body, options); + +export const useSetRunReviewStatus = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { runId: string; data: SetRunReviewStatusIn }, + TContext + >; + axios?: AxiosRequestConfig; + }, +): UseMutationResult< + Awaited>, + TError, + { runId: string; data: SetRunReviewStatusIn }, + TContext +> => { + const { mutation: mutationOptions, axios: axiosOptions } = options ?? {}; + return useMutation({ + mutationFn: ({ runId, data }: { runId: string; data: SetRunReviewStatusIn }) => + setRunReviewStatus(runId, data, axiosOptions), + ...mutationOptions, + }); +}; + +export const clearRunReviewStatus = ( + runId: string, + options?: AxiosRequestConfig, +): Promise> => + axios.default.delete(`/api/v1/runs/${encodeURIComponent(runId)}/review-status`, options); + +export const useClearRunReviewStatus = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { runId: string }, + TContext + >; + axios?: AxiosRequestConfig; + }, +): UseMutationResult< + Awaited>, + TError, + { runId: string }, + TContext +> => { + const { mutation: mutationOptions, axios: axiosOptions } = options ?? {}; + return useMutation({ + mutationFn: ({ runId }: { runId: string }) => clearRunReviewStatus(runId, axiosOptions), + ...mutationOptions, + }); +}; + +export const getRunReviewStatusHistory = ( + runId: string, + options?: AxiosRequestConfig, +): Promise> => + axios.default.get(`/api/v1/runs/${encodeURIComponent(runId)}/review-status/history`, options); + +export const getRunReviewStatusHistoryQueryKey = (runId: string) => + ["run-review-status-history", runId] as const; + +export const useRunReviewStatusHistory = < + TData = Awaited>["data"], + TError = AxiosError, +>( + runId: string, + options?: { + query?: Partial>, TError, TData>>; + axios?: AxiosRequestConfig; + }, +): UseQueryResult => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; + return useQuery({ + queryKey: queryOptions?.queryKey ?? getRunReviewStatusHistoryQueryKey(runId), + queryFn: () => getRunReviewStatusHistory(runId, axiosOptions), + select: ((resp: Awaited>) => resp.data) as never, + enabled: Boolean(runId), + ...queryOptions, + }) as UseQueryResult; +}; diff --git a/app/src/databricks_labs_dqx_app/ui/lib/api.ts b/app/src/databricks_labs_dqx_app/ui/lib/api.ts index ea29b040f..663c2667d 100644 --- a/app/src/databricks_labs_dqx_app/ui/lib/api.ts +++ b/app/src/databricks_labs_dqx_app/ui/lib/api.ts @@ -336,6 +336,37 @@ export interface DryRunSubmitOut { view_fqn: string; } +export type EmbeddedDashboardInTitle = string | null; + +/** + * Update payload — admins write the dashboard ID and optionally a display title. + */ +export interface EmbeddedDashboardIn { + dashboard_id: string; + title?: EmbeddedDashboardInTitle; +} + +/** + * Optional admin-provided display title. The UI falls back to a generic label when null. + */ +export type EmbeddedDashboardOutTitle = string | null; + +/** + * Current embedded-dashboard configuration + the bits the UI needs to render the iframe. + */ +export interface EmbeddedDashboardOut { + /** Effective dashboard ID. Empty string means 'nothing configured'. */ + dashboard_id?: string; + /** Optional admin-provided display title. The UI falls back to a generic label when null. */ + title?: EmbeddedDashboardOutTitle; + /** Workspace host (e.g. 'https://e2-...cloud.databricks.com') used to build the iframe URL. */ + workspace_host?: string; + /** True when the admin has saved an explicit setting (independent of the env default). */ + is_set?: boolean; + /** True when the response is serving the env-provided default rather than an admin override. */ + is_default?: boolean; +} + export type ExtraParamsResultColumnNames = { [key: string]: string }; export type ExtraParamsUserMetadata = { [key: string]: string }; @@ -742,6 +773,37 @@ export interface RetentionSettingsOut { quarantine_retention_days_set: boolean; } +export type ReviewStatusHistoryEntryOutPreviousStatus = string | null; + +export type ReviewStatusHistoryEntryOutChangedAt = string | null; + +export interface ReviewStatusHistoryEntryOut { + run_id: string; + status: string; + previous_status?: ReviewStatusHistoryEntryOutPreviousStatus; + changed_by: string; + changed_at?: ReviewStatusHistoryEntryOutChangedAt; +} + +export interface ReviewStatusHistoryOut { + history: ReviewStatusHistoryEntryOut[]; +} + +export type ReviewStatusOutUpdatedBy = string | null; + +export type ReviewStatusOutUpdatedAt = string | null; + +/** + * Current effective review status for a run. + */ +export interface ReviewStatusOut { + run_id: string; + status: string; + updated_by?: ReviewStatusOutUpdatedBy; + updated_at?: ReviewStatusOutUpdatedAt; + is_default?: boolean; +} + export type RoleMappingOutCreatedBy = string | null; export type RoleMappingOutCreatedAt = string | null; @@ -837,6 +899,24 @@ export interface RunConfigOut { config: RunConfig; } +/** + * One catalogue entry. ``is_default`` flags the value auto-surfaced for unreviewed runs. + */ +export interface RunReviewStatusOption { + value: string; + description?: string; + color?: string; + is_default?: boolean; +} + +export interface RunReviewStatusesIn { + statuses: RunReviewStatusOption[]; +} + +export interface RunReviewStatusesOut { + statuses: RunReviewStatusOption[]; +} + export type RunStatusOutResultState = string | null; export type RunStatusOutMessage = string | null; @@ -926,6 +1006,10 @@ export interface SchemaOut { comment?: SchemaOutComment; } +export interface SetReviewStatusIn { + status: string; +} + /** * If provided, the update is rejected when the current version does not match (optimistic concurrency). */ @@ -1077,6 +1161,12 @@ export type ValidationRunSummaryOutErrorMessage = string | null; export type ValidationRunSummaryOutChecksItem = { [key: string]: unknown }; +export type ValidationRunSummaryOutReviewStatus = string | null; + +export type ValidationRunSummaryOutReviewStatusUpdatedBy = string | null; + +export type ValidationRunSummaryOutReviewStatusUpdatedAt = string | null; + export interface ValidationRunSummaryOut { run_id: string; source_table_fqn: string; @@ -1094,6 +1184,10 @@ export interface ValidationRunSummaryOut { created_at?: ValidationRunSummaryOutCreatedAt; error_message?: ValidationRunSummaryOutErrorMessage; checks?: ValidationRunSummaryOutChecksItem[]; + review_status?: ValidationRunSummaryOutReviewStatus; + review_status_is_default?: boolean; + review_status_updated_by?: ValidationRunSummaryOutReviewStatusUpdatedBy; + review_status_updated_at?: ValidationRunSummaryOutReviewStatusUpdatedAt; } export interface VersionOut { @@ -1297,6 +1391,13 @@ export type BackfillRuleIds200 = { [key: string]: number }; export type RejectRuleBody = SetStatusIn | null; +export type ListValidationRunsParams = { + /** + * Filter to runs whose effective review status matches one of the supplied values. Repeat the param for multi-select (e.g. ?review_status=Acknowledged&review_status=Resolved). Match is on the effective value, so passing the catalogue default also catches unreviewed runs. + */ + review_status?: string[] | null; +}; + export type GetDryRunStatusParams = { job_run_id?: number | null; view_fqn?: string | null; @@ -4319,61 +4420,73 @@ export const useSaveCustomMetrics = < }; /** - * List all schedule configurations. - * @summary List Schedules + * Return the current embedded-dashboard config. + +Available to any authenticated user — the Insights page is read-only +and the underlying dashboard enforces UC permissions on the data, +so we don't gate visibility here. + * @summary Get Embedded Dashboard */ -export const listSchedules = ( +export const getEmbeddedDashboard = ( options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/schedules`, options); +): Promise> => { + return axios.default.get(`/api/v1/config/embedded-dashboard`, options); }; -export const getListSchedulesQueryKey = () => { - return [`/api/v1/schedules`] as const; +export const getGetEmbeddedDashboardQueryKey = () => { + return [`/api/v1/config/embedded-dashboard`] as const; }; -export const getListSchedulesQueryOptions = < - TData = Awaited>, - TError = AxiosError, +export const getGetEmbeddedDashboardQueryOptions = < + TData = Awaited>, + TError = AxiosError, >(options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getListSchedulesQueryKey(); + const queryKey = queryOptions?.queryKey ?? getGetEmbeddedDashboardQueryKey(); - const queryFn: QueryFunction>> = ({ - signal, - }) => listSchedules({ signal, ...axiosOptions }); + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => getEmbeddedDashboard({ signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListSchedulesQueryResult = NonNullable< - Awaited> +export type GetEmbeddedDashboardQueryResult = NonNullable< + Awaited> >; -export type ListSchedulesQueryError = AxiosError; +export type GetEmbeddedDashboardQueryError = AxiosError; -export function useListSchedules< - TData = Awaited>, - TError = AxiosError, +export function useGetEmbeddedDashboard< + TData = Awaited>, + TError = AxiosError, >( options: { query: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -4383,19 +4496,23 @@ export function useListSchedules< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useListSchedules< - TData = Awaited>, - TError = AxiosError, +export function useGetEmbeddedDashboard< + TData = Awaited>, + TError = AxiosError, >( options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -4405,13 +4522,17 @@ export function useListSchedules< ): UseQueryResult & { queryKey: DataTag; }; -export function useListSchedules< - TData = Awaited>, - TError = AxiosError, +export function useGetEmbeddedDashboard< + TData = Awaited>, + TError = AxiosError, >( options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, @@ -4420,16 +4541,20 @@ export function useListSchedules< queryKey: DataTag; }; /** - * @summary List Schedules + * @summary Get Embedded Dashboard */ -export function useListSchedules< - TData = Awaited>, - TError = AxiosError, +export function useGetEmbeddedDashboard< + TData = Awaited>, + TError = AxiosError, >( options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, @@ -4437,7 +4562,7 @@ export function useListSchedules< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListSchedulesQueryOptions(options); + const queryOptions = getGetEmbeddedDashboardQueryOptions(options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -4449,13 +4574,13 @@ export function useListSchedules< return query; } -export const getListSchedulesSuspenseQueryOptions = < - TData = Awaited>, - TError = AxiosError, +export const getGetEmbeddedDashboardSuspenseQueryOptions = < + TData = Awaited>, + TError = AxiosError, >(options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -4464,32 +4589,32 @@ export const getListSchedulesSuspenseQueryOptions = < }) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getListSchedulesQueryKey(); + const queryKey = queryOptions?.queryKey ?? getGetEmbeddedDashboardQueryKey(); - const queryFn: QueryFunction>> = ({ - signal, - }) => listSchedules({ signal, ...axiosOptions }); + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => getEmbeddedDashboard({ signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListSchedulesSuspenseQueryResult = NonNullable< - Awaited> +export type GetEmbeddedDashboardSuspenseQueryResult = NonNullable< + Awaited> >; -export type ListSchedulesSuspenseQueryError = AxiosError; +export type GetEmbeddedDashboardSuspenseQueryError = AxiosError; -export function useListSchedulesSuspense< - TData = Awaited>, - TError = AxiosError, +export function useGetEmbeddedDashboardSuspense< + TData = Awaited>, + TError = AxiosError, >( options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -4500,14 +4625,14 @@ export function useListSchedulesSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListSchedulesSuspense< - TData = Awaited>, - TError = AxiosError, +export function useGetEmbeddedDashboardSuspense< + TData = Awaited>, + TError = AxiosError, >( options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -4518,14 +4643,14 @@ export function useListSchedulesSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListSchedulesSuspense< - TData = Awaited>, - TError = AxiosError, +export function useGetEmbeddedDashboardSuspense< + TData = Awaited>, + TError = AxiosError, >( options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -4537,17 +4662,17 @@ export function useListSchedulesSuspense< queryKey: DataTag; }; /** - * @summary List Schedules + * @summary Get Embedded Dashboard */ -export function useListSchedulesSuspense< - TData = Awaited>, - TError = AxiosError, +export function useGetEmbeddedDashboardSuspense< + TData = Awaited>, + TError = AxiosError, >( options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -4558,7 +4683,7 @@ export function useListSchedulesSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListSchedulesSuspenseQueryOptions(options); + const queryOptions = getGetEmbeddedDashboardSuspenseQueryOptions(options); const query = useSuspenseQuery( queryOptions, @@ -4573,34 +4698,38 @@ export function useListSchedulesSuspense< } /** - * Create or update a schedule configuration. - * @summary Save Schedule + * Save the embedded-dashboard configuration (admin only). + * @summary Save Embedded Dashboard */ -export const saveSchedule = ( - scheduleConfigIn: ScheduleConfigIn, +export const saveEmbeddedDashboard = ( + embeddedDashboardIn: EmbeddedDashboardIn, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post(`/api/v1/schedules`, scheduleConfigIn, options); +): Promise> => { + return axios.default.put( + `/api/v1/config/embedded-dashboard`, + embeddedDashboardIn, + options, + ); }; -export const getSaveScheduleMutationOptions = < +export const getSaveEmbeddedDashboardMutationOptions = < TError = AxiosError, TContext = unknown, >(options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { data: ScheduleConfigIn }, + { data: EmbeddedDashboardIn }, TContext >; axios?: AxiosRequestConfig; }): UseMutationOptions< - Awaited>, + Awaited>, TError, - { data: ScheduleConfigIn }, + { data: EmbeddedDashboardIn }, TContext > => { - const mutationKey = ["saveSchedule"]; + const mutationKey = ["saveEmbeddedDashboard"]; const { mutation: mutationOptions, axios: axiosOptions } = options ? options.mutation && "mutationKey" in options.mutation && @@ -4610,117 +4739,206 @@ export const getSaveScheduleMutationOptions = < : { mutation: { mutationKey }, axios: undefined }; const mutationFn: MutationFunction< - Awaited>, - { data: ScheduleConfigIn } + Awaited>, + { data: EmbeddedDashboardIn } > = (props) => { const { data } = props ?? {}; - return saveSchedule(data, axiosOptions); + return saveEmbeddedDashboard(data, axiosOptions); }; return { mutationFn, ...mutationOptions }; }; -export type SaveScheduleMutationResult = NonNullable< - Awaited> +export type SaveEmbeddedDashboardMutationResult = NonNullable< + Awaited> >; -export type SaveScheduleMutationBody = ScheduleConfigIn; -export type SaveScheduleMutationError = AxiosError; +export type SaveEmbeddedDashboardMutationBody = EmbeddedDashboardIn; +export type SaveEmbeddedDashboardMutationError = + AxiosError; /** - * @summary Save Schedule + * @summary Save Embedded Dashboard */ -export const useSaveSchedule = < +export const useSaveEmbeddedDashboard = < TError = AxiosError, TContext = unknown, >( options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { data: ScheduleConfigIn }, + { data: EmbeddedDashboardIn }, TContext >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, ): UseMutationResult< - Awaited>, + Awaited>, TError, - { data: ScheduleConfigIn }, + { data: EmbeddedDashboardIn }, TContext > => { - const mutationOptions = getSaveScheduleMutationOptions(options); + const mutationOptions = getSaveEmbeddedDashboardMutationOptions(options); return useMutation(mutationOptions, queryClient); }; /** - * Get a single schedule configuration by name. - * @summary Get Schedule + * Clear the admin override (admin only). + +The env-provided default — if any — takes over again. Useful when +the bundle ships a starter dashboard and the admin wants to revert +to it after a botched custom ID. + * @summary Delete Embedded Dashboard */ -export const getSchedule = ( - name: string, +export const deleteEmbeddedDashboard = ( options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/schedules/${name}`, options); +): Promise> => { + return axios.default.delete(`/api/v1/config/embedded-dashboard`, options); }; -export const getGetScheduleQueryKey = (name?: string) => { - return [`/api/v1/schedules/${name}`] as const; +export const getDeleteEmbeddedDashboardMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + void, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + void, + TContext +> => { + const mutationKey = ["deleteEmbeddedDashboard"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + void + > = () => { + return deleteEmbeddedDashboard(axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; }; -export const getGetScheduleQueryOptions = < - TData = Awaited>, +export type DeleteEmbeddedDashboardMutationResult = NonNullable< + Awaited> +>; + +export type DeleteEmbeddedDashboardMutationError = + AxiosError; + +/** + * @summary Delete Embedded Dashboard + */ +export const useDeleteEmbeddedDashboard = < TError = AxiosError, + TContext = unknown, >( - name: string, options?: { - query?: Partial< - UseQueryOptions>, TError, TData> + mutation?: UseMutationOptions< + Awaited>, + TError, + void, + TContext >; axios?: AxiosRequestConfig; }, -) => { - const { query: queryOptions, axios: axiosOptions } = options ?? {}; + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + void, + TContext +> => { + const mutationOptions = getDeleteEmbeddedDashboardMutationOptions(options); - const queryKey = queryOptions?.queryKey ?? getGetScheduleQueryKey(name); + return useMutation(mutationOptions, queryClient); +}; - const queryFn: QueryFunction>> = ({ - signal, - }) => getSchedule(name, { signal, ...axiosOptions }); +/** + * Return the admin-managed list of run review status values. - return { - queryKey, - queryFn, - enabled: !!name, - ...queryOptions, - } as UseQueryOptions< - Awaited>, - TError, +Visible to any authenticated user — both the Runs detail dropdown +and the Runs History filter need this list, and neither is +admin-gated. The list is seeded on first read so the UI never +sees an empty dropdown on a fresh deploy. + * @summary Get Run Review Statuses + */ +export const getRunReviewStatuses = ( + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.get(`/api/v1/config/run-review-statuses`, options); +}; + +export const getGetRunReviewStatusesQueryKey = () => { + return [`/api/v1/config/run-review-statuses`] as const; +}; + +export const getGetRunReviewStatusesQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>(options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; +}) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; + + const queryKey = queryOptions?.queryKey ?? getGetRunReviewStatusesQueryKey(); + + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => getRunReviewStatuses({ signal, ...axiosOptions }); + + return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< + Awaited>, + TError, TData > & { queryKey: DataTag }; }; -export type GetScheduleQueryResult = NonNullable< - Awaited> +export type GetRunReviewStatusesQueryResult = NonNullable< + Awaited> >; -export type GetScheduleQueryError = AxiosError; +export type GetRunReviewStatusesQueryError = AxiosError; -export function useGetSchedule< - TData = Awaited>, - TError = AxiosError, +export function useGetRunReviewStatuses< + TData = Awaited>, + TError = AxiosError, >( - name: string, options: { query: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -4730,20 +4948,23 @@ export function useGetSchedule< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useGetSchedule< - TData = Awaited>, - TError = AxiosError, +export function useGetRunReviewStatuses< + TData = Awaited>, + TError = AxiosError, >( - name: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -4753,14 +4974,17 @@ export function useGetSchedule< ): UseQueryResult & { queryKey: DataTag; }; -export function useGetSchedule< - TData = Awaited>, - TError = AxiosError, +export function useGetRunReviewStatuses< + TData = Awaited>, + TError = AxiosError, >( - name: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, @@ -4769,17 +4993,20 @@ export function useGetSchedule< queryKey: DataTag; }; /** - * @summary Get Schedule + * @summary Get Run Review Statuses */ -export function useGetSchedule< - TData = Awaited>, - TError = AxiosError, +export function useGetRunReviewStatuses< + TData = Awaited>, + TError = AxiosError, >( - name: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, @@ -4787,7 +5014,7 @@ export function useGetSchedule< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetScheduleQueryOptions(name, options); + const queryOptions = getGetRunReviewStatusesQueryOptions(options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -4799,51 +5026,47 @@ export function useGetSchedule< return query; } -export const getGetScheduleSuspenseQueryOptions = < - TData = Awaited>, - TError = AxiosError, ->( - name: string, - options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; - }, -) => { +export const getGetRunReviewStatusesSuspenseQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>(options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; +}) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getGetScheduleQueryKey(name); + const queryKey = queryOptions?.queryKey ?? getGetRunReviewStatusesQueryKey(); - const queryFn: QueryFunction>> = ({ - signal, - }) => getSchedule(name, { signal, ...axiosOptions }); + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => getRunReviewStatuses({ signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetScheduleSuspenseQueryResult = NonNullable< - Awaited> +export type GetRunReviewStatusesSuspenseQueryResult = NonNullable< + Awaited> >; -export type GetScheduleSuspenseQueryError = AxiosError; +export type GetRunReviewStatusesSuspenseQueryError = AxiosError; -export function useGetScheduleSuspense< - TData = Awaited>, - TError = AxiosError, +export function useGetRunReviewStatusesSuspense< + TData = Awaited>, + TError = AxiosError, >( - name: string, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -4854,15 +5077,14 @@ export function useGetScheduleSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetScheduleSuspense< - TData = Awaited>, - TError = AxiosError, +export function useGetRunReviewStatusesSuspense< + TData = Awaited>, + TError = AxiosError, >( - name: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -4873,15 +5095,14 @@ export function useGetScheduleSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetScheduleSuspense< - TData = Awaited>, - TError = AxiosError, +export function useGetRunReviewStatusesSuspense< + TData = Awaited>, + TError = AxiosError, >( - name: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -4893,18 +5114,17 @@ export function useGetScheduleSuspense< queryKey: DataTag; }; /** - * @summary Get Schedule + * @summary Get Run Review Statuses */ -export function useGetScheduleSuspense< - TData = Awaited>, - TError = AxiosError, +export function useGetRunReviewStatusesSuspense< + TData = Awaited>, + TError = AxiosError, >( - name: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -4915,7 +5135,7 @@ export function useGetScheduleSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetScheduleSuspenseQueryOptions(name, options); + const queryOptions = getGetRunReviewStatusesSuspenseQueryOptions(options); const query = useSuspenseQuery( queryOptions, @@ -4930,34 +5150,52 @@ export function useGetScheduleSuspense< } /** - * Delete a schedule configuration by name. - * @summary Delete Schedule + * Replace the full catalogue (admin only). + +Each value is validated against ``_REVIEW_STATUS_VALUE_RE`` (printable +short strings, no quotes/control chars), descriptions are trimmed, +duplicates are rejected, and exactly one ``is_default`` is enforced +by :meth:`AppSettingsService.save_run_review_statuses`. + +NOTE: renaming an existing value does *not* update historical +references in ``dq_run_review_status`` / ``dq_run_review_status_history``; +those rows keep the original string so the audit trail stays +accurate. The UI surfaces orphaned historical values as-is. If a +workspace operator wants to retire a value cleanly they should +either keep it in the list (without ``is_default``) until the +affected runs age out, or do a one-off UPDATE through the SQL +warehouse. + * @summary Save Run Review Statuses */ -export const deleteSchedule = ( - name: string, +export const saveRunReviewStatuses = ( + runReviewStatusesIn: RunReviewStatusesIn, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.delete(`/api/v1/schedules/${name}`, options); +): Promise> => { + return axios.default.put( + `/api/v1/config/run-review-statuses`, + runReviewStatusesIn, + options, + ); }; -export const getDeleteScheduleMutationOptions = < +export const getSaveRunReviewStatusesMutationOptions = < TError = AxiosError, TContext = unknown, >(options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { name: string }, + { data: RunReviewStatusesIn }, TContext >; axios?: AxiosRequestConfig; }): UseMutationOptions< - Awaited>, + Awaited>, TError, - { name: string }, + { data: RunReviewStatusesIn }, TContext > => { - const mutationKey = ["deleteSchedule"]; + const mutationKey = ["saveRunReviewStatuses"]; const { mutation: mutationOptions, axios: axiosOptions } = options ? options.mutation && "mutationKey" in options.mutation && @@ -4967,126 +5205,108 @@ export const getDeleteScheduleMutationOptions = < : { mutation: { mutationKey }, axios: undefined }; const mutationFn: MutationFunction< - Awaited>, - { name: string } + Awaited>, + { data: RunReviewStatusesIn } > = (props) => { - const { name } = props ?? {}; + const { data } = props ?? {}; - return deleteSchedule(name, axiosOptions); + return saveRunReviewStatuses(data, axiosOptions); }; return { mutationFn, ...mutationOptions }; }; -export type DeleteScheduleMutationResult = NonNullable< - Awaited> +export type SaveRunReviewStatusesMutationResult = NonNullable< + Awaited> >; - -export type DeleteScheduleMutationError = AxiosError; +export type SaveRunReviewStatusesMutationBody = RunReviewStatusesIn; +export type SaveRunReviewStatusesMutationError = + AxiosError; /** - * @summary Delete Schedule + * @summary Save Run Review Statuses */ -export const useDeleteSchedule = < +export const useSaveRunReviewStatuses = < TError = AxiosError, TContext = unknown, >( options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { name: string }, + { data: RunReviewStatusesIn }, TContext >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, ): UseMutationResult< - Awaited>, + Awaited>, TError, - { name: string }, + { data: RunReviewStatusesIn }, TContext > => { - const mutationOptions = getDeleteScheduleMutationOptions(options); + const mutationOptions = getSaveRunReviewStatusesMutationOptions(options); return useMutation(mutationOptions, queryClient); }; /** - * Get the change history for a schedule configuration. - * @summary Get Schedule History + * List all schedule configurations. + * @summary List Schedules */ -export const getScheduleHistory = ( - name: string, +export const listSchedules = ( options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/schedules/${name}/history`, options); +): Promise> => { + return axios.default.get(`/api/v1/schedules`, options); }; -export const getGetScheduleHistoryQueryKey = (name?: string) => { - return [`/api/v1/schedules/${name}/history`] as const; +export const getListSchedulesQueryKey = () => { + return [`/api/v1/schedules`] as const; }; -export const getGetScheduleHistoryQueryOptions = < - TData = Awaited>, +export const getListSchedulesQueryOptions = < + TData = Awaited>, TError = AxiosError, ->( - name: string, - options?: { - query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; - }, -) => { +>(options?: { + query?: Partial< + UseQueryOptions>, TError, TData> + >; + axios?: AxiosRequestConfig; +}) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = - queryOptions?.queryKey ?? getGetScheduleHistoryQueryKey(name); + const queryKey = queryOptions?.queryKey ?? getListSchedulesQueryKey(); - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => getScheduleHistory(name, { signal, ...axiosOptions }); + const queryFn: QueryFunction>> = ({ + signal, + }) => listSchedules({ signal, ...axiosOptions }); - return { - queryKey, - queryFn, - enabled: !!name, - ...queryOptions, - } as UseQueryOptions< - Awaited>, + return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetScheduleHistoryQueryResult = NonNullable< - Awaited> +export type ListSchedulesQueryResult = NonNullable< + Awaited> >; -export type GetScheduleHistoryQueryError = AxiosError; +export type ListSchedulesQueryError = AxiosError; -export function useGetScheduleHistory< - TData = Awaited>, +export function useListSchedules< + TData = Awaited>, TError = AxiosError, >( - name: string, options: { query: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -5096,24 +5316,19 @@ export function useGetScheduleHistory< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useGetScheduleHistory< - TData = Awaited>, +export function useListSchedules< + TData = Awaited>, TError = AxiosError, >( - name: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -5123,18 +5338,13 @@ export function useGetScheduleHistory< ): UseQueryResult & { queryKey: DataTag; }; -export function useGetScheduleHistory< - TData = Awaited>, +export function useListSchedules< + TData = Awaited>, TError = AxiosError, >( - name: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, @@ -5143,21 +5353,16 @@ export function useGetScheduleHistory< queryKey: DataTag; }; /** - * @summary Get Schedule History + * @summary List Schedules */ -export function useGetScheduleHistory< - TData = Awaited>, +export function useListSchedules< + TData = Awaited>, TError = AxiosError, >( - name: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, @@ -5165,7 +5370,7 @@ export function useGetScheduleHistory< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetScheduleHistoryQueryOptions(name, options); + const queryOptions = getListSchedulesQueryOptions(options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -5177,53 +5382,47 @@ export function useGetScheduleHistory< return query; } -export const getGetScheduleHistorySuspenseQueryOptions = < - TData = Awaited>, +export const getListSchedulesSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, ->( - name: string, - options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; - }, -) => { +>(options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; +}) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = - queryOptions?.queryKey ?? getGetScheduleHistoryQueryKey(name); + const queryKey = queryOptions?.queryKey ?? getListSchedulesQueryKey(); - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => getScheduleHistory(name, { signal, ...axiosOptions }); + const queryFn: QueryFunction>> = ({ + signal, + }) => listSchedules({ signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetScheduleHistorySuspenseQueryResult = NonNullable< - Awaited> +export type ListSchedulesSuspenseQueryResult = NonNullable< + Awaited> >; -export type GetScheduleHistorySuspenseQueryError = - AxiosError; +export type ListSchedulesSuspenseQueryError = AxiosError; -export function useGetScheduleHistorySuspense< - TData = Awaited>, +export function useListSchedulesSuspense< + TData = Awaited>, TError = AxiosError, >( - name: string, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -5234,15 +5433,14 @@ export function useGetScheduleHistorySuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetScheduleHistorySuspense< - TData = Awaited>, +export function useListSchedulesSuspense< + TData = Awaited>, TError = AxiosError, >( - name: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -5253,15 +5451,14 @@ export function useGetScheduleHistorySuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetScheduleHistorySuspense< - TData = Awaited>, +export function useListSchedulesSuspense< + TData = Awaited>, TError = AxiosError, >( - name: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -5273,18 +5470,17 @@ export function useGetScheduleHistorySuspense< queryKey: DataTag; }; /** - * @summary Get Schedule History + * @summary List Schedules */ -export function useGetScheduleHistorySuspense< - TData = Awaited>, +export function useListSchedulesSuspense< + TData = Awaited>, TError = AxiosError, >( - name: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -5295,7 +5491,7 @@ export function useGetScheduleHistorySuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetScheduleHistorySuspenseQueryOptions(name, options); + const queryOptions = getListSchedulesSuspenseQueryOptions(options); const query = useSuspenseQuery( queryOptions, @@ -5310,111 +5506,194 @@ export function useGetScheduleHistorySuspense< } /** - * List all role-to-group mappings (Admin only). - * @summary List Role Mappings + * Create or update a schedule configuration. + * @summary Save Schedule */ -export const listRoleMappings = ( +export const saveSchedule = ( + scheduleConfigIn: ScheduleConfigIn, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/roles`, options); -}; - -export const getListRoleMappingsQueryKey = () => { - return [`/api/v1/roles`] as const; +): Promise> => { + return axios.default.post(`/api/v1/schedules`, scheduleConfigIn, options); }; -export const getListRoleMappingsQueryOptions = < - TData = Awaited>, +export const getSaveScheduleMutationOptions = < TError = AxiosError, + TContext = unknown, >(options?: { - query?: Partial< - UseQueryOptions>, TError, TData> + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: ScheduleConfigIn }, + TContext >; axios?: AxiosRequestConfig; -}) => { - const { query: queryOptions, axios: axiosOptions } = options ?? {}; +}): UseMutationOptions< + Awaited>, + TError, + { data: ScheduleConfigIn }, + TContext +> => { + const mutationKey = ["saveSchedule"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; - const queryKey = queryOptions?.queryKey ?? getListRoleMappingsQueryKey(); + const mutationFn: MutationFunction< + Awaited>, + { data: ScheduleConfigIn } + > = (props) => { + const { data } = props ?? {}; - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => listRoleMappings({ signal, ...axiosOptions }); + return saveSchedule(data, axiosOptions); + }; - return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< - Awaited>, - TError, - TData - > & { queryKey: DataTag }; + return { mutationFn, ...mutationOptions }; }; -export type ListRoleMappingsQueryResult = NonNullable< - Awaited> +export type SaveScheduleMutationResult = NonNullable< + Awaited> >; -export type ListRoleMappingsQueryError = AxiosError; +export type SaveScheduleMutationBody = ScheduleConfigIn; +export type SaveScheduleMutationError = AxiosError; -export function useListRoleMappings< - TData = Awaited>, +/** + * @summary Save Schedule + */ +export const useSaveSchedule = < TError = AxiosError, + TContext = unknown, >( - options: { - query: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > - > & - Pick< - DefinedInitialDataOptions< - Awaited>, - TError, - Awaited> - >, - "initialData" - >; + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: ScheduleConfigIn }, + TContext + >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): DefinedUseQueryResult & { - queryKey: DataTag; +): UseMutationResult< + Awaited>, + TError, + { data: ScheduleConfigIn }, + TContext +> => { + const mutationOptions = getSaveScheduleMutationOptions(options); + + return useMutation(mutationOptions, queryClient); }; -export function useListRoleMappings< - TData = Awaited>, + +/** + * Get a single schedule configuration by name. + * @summary Get Schedule + */ +export const getSchedule = ( + name: string, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.get(`/api/v1/schedules/${name}`, options); +}; + +export const getGetScheduleQueryKey = (name?: string) => { + return [`/api/v1/schedules/${name}`] as const; +}; + +export const getGetScheduleQueryOptions = < + TData = Awaited>, TError = AxiosError, >( + name: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > - > & - Pick< - UndefinedInitialDataOptions< - Awaited>, - TError, - Awaited> - >, - "initialData" - >; + UseQueryOptions>, TError, TData> + >; axios?: AxiosRequestConfig; }, - queryClient?: QueryClient, -): UseQueryResult & { - queryKey: DataTag; -}; -export function useListRoleMappings< - TData = Awaited>, - TError = AxiosError, ->( +) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; + + const queryKey = queryOptions?.queryKey ?? getGetScheduleQueryKey(name); + + const queryFn: QueryFunction>> = ({ + signal, + }) => getSchedule(name, { signal, ...axiosOptions }); + + return { + queryKey, + queryFn, + enabled: !!name, + ...queryOptions, + } as UseQueryOptions< + Awaited>, + TError, + TData + > & { queryKey: DataTag }; +}; + +export type GetScheduleQueryResult = NonNullable< + Awaited> +>; +export type GetScheduleQueryError = AxiosError; + +export function useGetSchedule< + TData = Awaited>, + TError = AxiosError, +>( + name: string, + options: { + query: Partial< + UseQueryOptions>, TError, TData> + > & + Pick< + DefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): DefinedUseQueryResult & { + queryKey: DataTag; +}; +export function useGetSchedule< + TData = Awaited>, + TError = AxiosError, +>( + name: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> + > & + Pick< + UndefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +}; +export function useGetSchedule< + TData = Awaited>, + TError = AxiosError, +>( + name: string, + options?: { + query?: Partial< + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, @@ -5423,20 +5702,17 @@ export function useListRoleMappings< queryKey: DataTag; }; /** - * @summary List Role Mappings + * @summary Get Schedule */ -export function useListRoleMappings< - TData = Awaited>, +export function useGetSchedule< + TData = Awaited>, TError = AxiosError, >( + name: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, @@ -5444,7 +5720,7 @@ export function useListRoleMappings< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListRoleMappingsQueryOptions(options); + const queryOptions = getGetScheduleQueryOptions(name, options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -5456,48 +5732,51 @@ export function useListRoleMappings< return query; } -export const getListRoleMappingsSuspenseQueryOptions = < - TData = Awaited>, +export const getGetScheduleSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, ->(options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; -}) => { +>( + name: string, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, +) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getListRoleMappingsQueryKey(); + const queryKey = queryOptions?.queryKey ?? getGetScheduleQueryKey(name); - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => listRoleMappings({ signal, ...axiosOptions }); + const queryFn: QueryFunction>> = ({ + signal, + }) => getSchedule(name, { signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListRoleMappingsSuspenseQueryResult = NonNullable< - Awaited> +export type GetScheduleSuspenseQueryResult = NonNullable< + Awaited> >; -export type ListRoleMappingsSuspenseQueryError = - AxiosError; +export type GetScheduleSuspenseQueryError = AxiosError; -export function useListRoleMappingsSuspense< - TData = Awaited>, +export function useGetScheduleSuspense< + TData = Awaited>, TError = AxiosError, >( + name: string, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -5508,14 +5787,15 @@ export function useListRoleMappingsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListRoleMappingsSuspense< - TData = Awaited>, +export function useGetScheduleSuspense< + TData = Awaited>, TError = AxiosError, >( + name: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -5526,14 +5806,15 @@ export function useListRoleMappingsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListRoleMappingsSuspense< - TData = Awaited>, +export function useGetScheduleSuspense< + TData = Awaited>, TError = AxiosError, >( + name: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -5545,17 +5826,18 @@ export function useListRoleMappingsSuspense< queryKey: DataTag; }; /** - * @summary List Role Mappings + * @summary Get Schedule */ -export function useListRoleMappingsSuspense< - TData = Awaited>, +export function useGetScheduleSuspense< + TData = Awaited>, TError = AxiosError, >( + name: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -5566,7 +5848,7 @@ export function useListRoleMappingsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListRoleMappingsSuspenseQueryOptions(options); + const queryOptions = getGetScheduleSuspenseQueryOptions(name, options); const query = useSuspenseQuery( queryOptions, @@ -5581,34 +5863,34 @@ export function useListRoleMappingsSuspense< } /** - * Create or update a role-to-group mapping (Admin only). - * @summary Create Role Mapping + * Delete a schedule configuration by name. + * @summary Delete Schedule */ -export const createRoleMapping = ( - createRoleMappingIn: CreateRoleMappingIn, +export const deleteSchedule = ( + name: string, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post(`/api/v1/roles`, createRoleMappingIn, options); +): Promise> => { + return axios.default.delete(`/api/v1/schedules/${name}`, options); }; -export const getCreateRoleMappingMutationOptions = < +export const getDeleteScheduleMutationOptions = < TError = AxiosError, TContext = unknown, >(options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { data: CreateRoleMappingIn }, + { name: string }, TContext >; axios?: AxiosRequestConfig; }): UseMutationOptions< - Awaited>, + Awaited>, TError, - { data: CreateRoleMappingIn }, + { name: string }, TContext > => { - const mutationKey = ["createRoleMapping"]; + const mutationKey = ["deleteSchedule"]; const { mutation: mutationOptions, axios: axiosOptions } = options ? options.mutation && "mutationKey" in options.mutation && @@ -5618,181 +5900,75 @@ export const getCreateRoleMappingMutationOptions = < : { mutation: { mutationKey }, axios: undefined }; const mutationFn: MutationFunction< - Awaited>, - { data: CreateRoleMappingIn } + Awaited>, + { name: string } > = (props) => { - const { data } = props ?? {}; + const { name } = props ?? {}; - return createRoleMapping(data, axiosOptions); + return deleteSchedule(name, axiosOptions); }; return { mutationFn, ...mutationOptions }; }; -export type CreateRoleMappingMutationResult = NonNullable< - Awaited> +export type DeleteScheduleMutationResult = NonNullable< + Awaited> >; -export type CreateRoleMappingMutationBody = CreateRoleMappingIn; -export type CreateRoleMappingMutationError = AxiosError; + +export type DeleteScheduleMutationError = AxiosError; /** - * @summary Create Role Mapping + * @summary Delete Schedule */ -export const useCreateRoleMapping = < +export const useDeleteSchedule = < TError = AxiosError, TContext = unknown, >( options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { data: CreateRoleMappingIn }, + { name: string }, TContext >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, ): UseMutationResult< - Awaited>, + Awaited>, TError, - { data: CreateRoleMappingIn }, + { name: string }, TContext > => { - const mutationOptions = getCreateRoleMappingMutationOptions(options); + const mutationOptions = getDeleteScheduleMutationOptions(options); return useMutation(mutationOptions, queryClient); }; /** - * Delete a role-to-group mapping (Admin only). - * @summary Delete Role Mapping + * Get the change history for a schedule configuration. + * @summary Get Schedule History */ -export const deleteRoleMapping = ( - role: string, - groupName: string, +export const getScheduleHistory = ( + name: string, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.delete(`/api/v1/roles/${role}/${groupName}`, options); +): Promise> => { + return axios.default.get(`/api/v1/schedules/${name}/history`, options); }; -export const getDeleteRoleMappingMutationOptions = < - TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { role: string; groupName: string }, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - { role: string; groupName: string }, - TContext -> => { - const mutationKey = ["deleteRoleMapping"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; - - const mutationFn: MutationFunction< - Awaited>, - { role: string; groupName: string } - > = (props) => { - const { role, groupName } = props ?? {}; - - return deleteRoleMapping(role, groupName, axiosOptions); - }; - - return { mutationFn, ...mutationOptions }; -}; - -export type DeleteRoleMappingMutationResult = NonNullable< - Awaited> ->; - -export type DeleteRoleMappingMutationError = AxiosError; - -/** - * @summary Delete Role Mapping - */ -export const useDeleteRoleMapping = < - TError = AxiosError, - TContext = unknown, ->( - options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { role: string; groupName: string }, - TContext - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - { role: string; groupName: string }, - TContext -> => { - const mutationOptions = getDeleteRoleMappingMutationOptions(options); - - return useMutation(mutationOptions, queryClient); -}; - -/** - * List available Databricks workspace groups (Admin only). - -Optimised for large workspaces: - -- Requests only ``id,displayName`` from SCIM via the ``attributes`` - parameter. By default SCIM returns the full member roster for every - group, which on a workspace with thousands of groups (each holding - hundreds-to-thousands of members) can balloon the response into the - hundreds of MB and take many seconds to fetch + deserialise. Group - members are not needed for role mapping. -- Server-side search via ``?search=`` maps to SCIM - ``filter=displayName co "..."``, so the dropdown can be a typeahead - that pulls the top matches for whatever the user types instead of - shipping every group in the workspace. -- Hard ``?limit=`` cap (default 200, max 1000) so we never enumerate - every page of groups even without a search term. - -Uses the SP client which has full SCIM access without user-scope restrictions. - * @summary List Workspace Groups - */ -export const listWorkspaceGroups = ( - params?: ListWorkspaceGroupsParams, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/roles/groups`, { - ...options, - params: { ...params, ...options?.params }, - }); -}; - -export const getListWorkspaceGroupsQueryKey = ( - params?: ListWorkspaceGroupsParams, -) => { - return [`/api/v1/roles/groups`, ...(params ? [params] : [])] as const; -}; - -export const getListWorkspaceGroupsQueryOptions = < - TData = Awaited>, +export const getGetScheduleHistoryQueryKey = (name?: string) => { + return [`/api/v1/schedules/${name}/history`] as const; +}; + +export const getGetScheduleHistoryQueryOptions = < + TData = Awaited>, TError = AxiosError, >( - params?: ListWorkspaceGroupsParams, + name: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -5803,42 +5979,47 @@ export const getListWorkspaceGroupsQueryOptions = < const { query: queryOptions, axios: axiosOptions } = options ?? {}; const queryKey = - queryOptions?.queryKey ?? getListWorkspaceGroupsQueryKey(params); + queryOptions?.queryKey ?? getGetScheduleHistoryQueryKey(name); const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => listWorkspaceGroups(params, { signal, ...axiosOptions }); + Awaited> + > = ({ signal }) => getScheduleHistory(name, { signal, ...axiosOptions }); - return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< - Awaited>, + return { + queryKey, + queryFn, + enabled: !!name, + ...queryOptions, + } as UseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListWorkspaceGroupsQueryResult = NonNullable< - Awaited> +export type GetScheduleHistoryQueryResult = NonNullable< + Awaited> >; -export type ListWorkspaceGroupsQueryError = AxiosError; +export type GetScheduleHistoryQueryError = AxiosError; -export function useListWorkspaceGroups< - TData = Awaited>, +export function useGetScheduleHistory< + TData = Awaited>, TError = AxiosError, >( - params: undefined | ListWorkspaceGroupsParams, + name: string, options: { query: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -5848,24 +6029,24 @@ export function useListWorkspaceGroups< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useListWorkspaceGroups< - TData = Awaited>, +export function useGetScheduleHistory< + TData = Awaited>, TError = AxiosError, >( - params?: ListWorkspaceGroupsParams, + name: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -5875,15 +6056,15 @@ export function useListWorkspaceGroups< ): UseQueryResult & { queryKey: DataTag; }; -export function useListWorkspaceGroups< - TData = Awaited>, +export function useGetScheduleHistory< + TData = Awaited>, TError = AxiosError, >( - params?: ListWorkspaceGroupsParams, + name: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -5895,18 +6076,18 @@ export function useListWorkspaceGroups< queryKey: DataTag; }; /** - * @summary List Workspace Groups + * @summary Get Schedule History */ -export function useListWorkspaceGroups< - TData = Awaited>, +export function useGetScheduleHistory< + TData = Awaited>, TError = AxiosError, >( - params?: ListWorkspaceGroupsParams, + name: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -5917,7 +6098,7 @@ export function useListWorkspaceGroups< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListWorkspaceGroupsQueryOptions(params, options); + const queryOptions = getGetScheduleHistoryQueryOptions(name, options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -5929,15 +6110,15 @@ export function useListWorkspaceGroups< return query; } -export const getListWorkspaceGroupsSuspenseQueryOptions = < - TData = Awaited>, +export const getGetScheduleHistorySuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, >( - params?: ListWorkspaceGroupsParams, + name: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -5948,34 +6129,34 @@ export const getListWorkspaceGroupsSuspenseQueryOptions = < const { query: queryOptions, axios: axiosOptions } = options ?? {}; const queryKey = - queryOptions?.queryKey ?? getListWorkspaceGroupsQueryKey(params); + queryOptions?.queryKey ?? getGetScheduleHistoryQueryKey(name); const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => listWorkspaceGroups(params, { signal, ...axiosOptions }); + Awaited> + > = ({ signal }) => getScheduleHistory(name, { signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListWorkspaceGroupsSuspenseQueryResult = NonNullable< - Awaited> +export type GetScheduleHistorySuspenseQueryResult = NonNullable< + Awaited> >; -export type ListWorkspaceGroupsSuspenseQueryError = +export type GetScheduleHistorySuspenseQueryError = AxiosError; -export function useListWorkspaceGroupsSuspense< - TData = Awaited>, +export function useGetScheduleHistorySuspense< + TData = Awaited>, TError = AxiosError, >( - params: undefined | ListWorkspaceGroupsParams, + name: string, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -5986,15 +6167,15 @@ export function useListWorkspaceGroupsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListWorkspaceGroupsSuspense< - TData = Awaited>, +export function useGetScheduleHistorySuspense< + TData = Awaited>, TError = AxiosError, >( - params?: ListWorkspaceGroupsParams, + name: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -6005,15 +6186,15 @@ export function useListWorkspaceGroupsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListWorkspaceGroupsSuspense< - TData = Awaited>, +export function useGetScheduleHistorySuspense< + TData = Awaited>, TError = AxiosError, >( - params?: ListWorkspaceGroupsParams, + name: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -6025,18 +6206,18 @@ export function useListWorkspaceGroupsSuspense< queryKey: DataTag; }; /** - * @summary List Workspace Groups + * @summary Get Schedule History */ -export function useListWorkspaceGroupsSuspense< - TData = Awaited>, +export function useGetScheduleHistorySuspense< + TData = Awaited>, TError = AxiosError, >( - params?: ListWorkspaceGroupsParams, + name: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -6047,10 +6228,7 @@ export function useListWorkspaceGroupsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListWorkspaceGroupsSuspenseQueryOptions( - params, - options, - ); + const queryOptions = getGetScheduleHistorySuspenseQueryOptions(name, options); const query = useSuspenseQuery( queryOptions, @@ -6065,69 +6243,65 @@ export function useListWorkspaceGroupsSuspense< } /** - * List all available role names that can be assigned (Admin only). - * @summary List Available Roles + * List all role-to-group mappings (Admin only). + * @summary List Role Mappings */ -export const listAvailableRoles = ( +export const listRoleMappings = ( options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/roles/available-roles`, options); +): Promise> => { + return axios.default.get(`/api/v1/roles`, options); }; -export const getListAvailableRolesQueryKey = () => { - return [`/api/v1/roles/available-roles`] as const; +export const getListRoleMappingsQueryKey = () => { + return [`/api/v1/roles`] as const; }; -export const getListAvailableRolesQueryOptions = < - TData = Awaited>, +export const getListRoleMappingsQueryOptions = < + TData = Awaited>, TError = AxiosError, >(options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getListAvailableRolesQueryKey(); + const queryKey = queryOptions?.queryKey ?? getListRoleMappingsQueryKey(); const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => listAvailableRoles({ signal, ...axiosOptions }); + Awaited> + > = ({ signal }) => listRoleMappings({ signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListAvailableRolesQueryResult = NonNullable< - Awaited> +export type ListRoleMappingsQueryResult = NonNullable< + Awaited> >; -export type ListAvailableRolesQueryError = AxiosError; +export type ListRoleMappingsQueryError = AxiosError; -export function useListAvailableRoles< - TData = Awaited>, +export function useListRoleMappings< + TData = Awaited>, TError = AxiosError, >( options: { query: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -6137,23 +6311,23 @@ export function useListAvailableRoles< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useListAvailableRoles< - TData = Awaited>, +export function useListRoleMappings< + TData = Awaited>, TError = AxiosError, >( options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -6163,14 +6337,14 @@ export function useListAvailableRoles< ): UseQueryResult & { queryKey: DataTag; }; -export function useListAvailableRoles< - TData = Awaited>, +export function useListRoleMappings< + TData = Awaited>, TError = AxiosError, >( options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -6182,17 +6356,17 @@ export function useListAvailableRoles< queryKey: DataTag; }; /** - * @summary List Available Roles + * @summary List Role Mappings */ -export function useListAvailableRoles< - TData = Awaited>, +export function useListRoleMappings< + TData = Awaited>, TError = AxiosError, >( options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -6203,7 +6377,7 @@ export function useListAvailableRoles< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListAvailableRolesQueryOptions(options); + const queryOptions = getListRoleMappingsQueryOptions(options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -6215,13 +6389,13 @@ export function useListAvailableRoles< return query; } -export const getListAvailableRolesSuspenseQueryOptions = < - TData = Awaited>, +export const getListRoleMappingsSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, >(options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -6230,33 +6404,33 @@ export const getListAvailableRolesSuspenseQueryOptions = < }) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getListAvailableRolesQueryKey(); + const queryKey = queryOptions?.queryKey ?? getListRoleMappingsQueryKey(); const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => listAvailableRoles({ signal, ...axiosOptions }); + Awaited> + > = ({ signal }) => listRoleMappings({ signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListAvailableRolesSuspenseQueryResult = NonNullable< - Awaited> +export type ListRoleMappingsSuspenseQueryResult = NonNullable< + Awaited> >; -export type ListAvailableRolesSuspenseQueryError = +export type ListRoleMappingsSuspenseQueryError = AxiosError; -export function useListAvailableRolesSuspense< - TData = Awaited>, +export function useListRoleMappingsSuspense< + TData = Awaited>, TError = AxiosError, >( options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -6267,14 +6441,14 @@ export function useListAvailableRolesSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListAvailableRolesSuspense< - TData = Awaited>, +export function useListRoleMappingsSuspense< + TData = Awaited>, TError = AxiosError, >( options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -6285,14 +6459,14 @@ export function useListAvailableRolesSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListAvailableRolesSuspense< - TData = Awaited>, +export function useListRoleMappingsSuspense< + TData = Awaited>, TError = AxiosError, >( options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -6304,17 +6478,17 @@ export function useListAvailableRolesSuspense< queryKey: DataTag; }; /** - * @summary List Available Roles + * @summary List Role Mappings */ -export function useListAvailableRolesSuspense< - TData = Awaited>, +export function useListRoleMappingsSuspense< + TData = Awaited>, TError = AxiosError, >( options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -6325,7 +6499,7 @@ export function useListAvailableRolesSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListAvailableRolesSuspenseQueryOptions(options); + const queryOptions = getListRoleMappingsSuspenseQueryOptions(options); const query = useSuspenseQuery( queryOptions, @@ -6340,212 +6514,309 @@ export function useListAvailableRolesSuspense< } /** - * @summary List Catalogs + * Create or update a role-to-group mapping (Admin only). + * @summary Create Role Mapping */ -export const listCatalogs = ( +export const createRoleMapping = ( + createRoleMappingIn: CreateRoleMappingIn, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/discovery/catalogs`, options); -}; - -export const getListCatalogsQueryKey = () => { - return [`/api/v1/discovery/catalogs`] as const; +): Promise> => { + return axios.default.post(`/api/v1/roles`, createRoleMappingIn, options); }; -export const getListCatalogsQueryOptions = < - TData = Awaited>, +export const getCreateRoleMappingMutationOptions = < TError = AxiosError, + TContext = unknown, >(options?: { - query?: Partial< - UseQueryOptions>, TError, TData> + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: CreateRoleMappingIn }, + TContext >; axios?: AxiosRequestConfig; -}) => { - const { query: queryOptions, axios: axiosOptions } = options ?? {}; +}): UseMutationOptions< + Awaited>, + TError, + { data: CreateRoleMappingIn }, + TContext +> => { + const mutationKey = ["createRoleMapping"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; - const queryKey = queryOptions?.queryKey ?? getListCatalogsQueryKey(); + const mutationFn: MutationFunction< + Awaited>, + { data: CreateRoleMappingIn } + > = (props) => { + const { data } = props ?? {}; - const queryFn: QueryFunction>> = ({ - signal, - }) => listCatalogs({ signal, ...axiosOptions }); + return createRoleMapping(data, axiosOptions); + }; - return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< - Awaited>, - TError, - TData - > & { queryKey: DataTag }; + return { mutationFn, ...mutationOptions }; }; -export type ListCatalogsQueryResult = NonNullable< - Awaited> +export type CreateRoleMappingMutationResult = NonNullable< + Awaited> >; -export type ListCatalogsQueryError = AxiosError; +export type CreateRoleMappingMutationBody = CreateRoleMappingIn; +export type CreateRoleMappingMutationError = AxiosError; -export function useListCatalogs< - TData = Awaited>, - TError = AxiosError, ->( - options: { - query: Partial< - UseQueryOptions>, TError, TData> - > & - Pick< - DefinedInitialDataOptions< - Awaited>, - TError, - Awaited> - >, - "initialData" - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): DefinedUseQueryResult & { - queryKey: DataTag; -}; -export function useListCatalogs< - TData = Awaited>, - TError = AxiosError, ->( - options?: { - query?: Partial< - UseQueryOptions>, TError, TData> - > & - Pick< - UndefinedInitialDataOptions< - Awaited>, - TError, - Awaited> - >, - "initialData" - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): UseQueryResult & { - queryKey: DataTag; -}; -export function useListCatalogs< - TData = Awaited>, +/** + * @summary Create Role Mapping + */ +export const useCreateRoleMapping = < TError = AxiosError, + TContext = unknown, >( options?: { - query?: Partial< - UseQueryOptions>, TError, TData> + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: CreateRoleMappingIn }, + TContext >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseQueryResult & { - queryKey: DataTag; +): UseMutationResult< + Awaited>, + TError, + { data: CreateRoleMappingIn }, + TContext +> => { + const mutationOptions = getCreateRoleMappingMutationOptions(options); + + return useMutation(mutationOptions, queryClient); }; + /** - * @summary List Catalogs + * Delete a role-to-group mapping (Admin only). + * @summary Delete Role Mapping */ +export const deleteRoleMapping = ( + role: string, + groupName: string, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.delete(`/api/v1/roles/${role}/${groupName}`, options); +}; -export function useListCatalogs< - TData = Awaited>, +export const getDeleteRoleMappingMutationOptions = < TError = AxiosError, ->( - options?: { - query?: Partial< - UseQueryOptions>, TError, TData> - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): UseQueryResult & { - queryKey: DataTag; -} { - const queryOptions = getListCatalogsQueryOptions(options); + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { role: string; groupName: string }, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + { role: string; groupName: string }, + TContext +> => { + const mutationKey = ["deleteRoleMapping"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; - const query = useQuery(queryOptions, queryClient) as UseQueryResult< - TData, - TError - > & { queryKey: DataTag }; + const mutationFn: MutationFunction< + Awaited>, + { role: string; groupName: string } + > = (props) => { + const { role, groupName } = props ?? {}; - query.queryKey = queryOptions.queryKey; + return deleteRoleMapping(role, groupName, axiosOptions); + }; - return query; -} + return { mutationFn, ...mutationOptions }; +}; -export const getListCatalogsSuspenseQueryOptions = < - TData = Awaited>, +export type DeleteRoleMappingMutationResult = NonNullable< + Awaited> +>; + +export type DeleteRoleMappingMutationError = AxiosError; + +/** + * @summary Delete Role Mapping + */ +export const useDeleteRoleMapping = < TError = AxiosError, ->(options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, TError, - TData - > - >; - axios?: AxiosRequestConfig; -}) => { + { role: string; groupName: string }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { role: string; groupName: string }, + TContext +> => { + const mutationOptions = getDeleteRoleMappingMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + +/** + * List available Databricks workspace groups (Admin only). + +Optimised for large workspaces: + +- Requests only ``id,displayName`` from SCIM via the ``attributes`` + parameter. By default SCIM returns the full member roster for every + group, which on a workspace with thousands of groups (each holding + hundreds-to-thousands of members) can balloon the response into the + hundreds of MB and take many seconds to fetch + deserialise. Group + members are not needed for role mapping. +- Server-side search via ``?search=`` maps to SCIM + ``filter=displayName co "..."``, so the dropdown can be a typeahead + that pulls the top matches for whatever the user types instead of + shipping every group in the workspace. +- Hard ``?limit=`` cap (default 200, max 1000) so we never enumerate + every page of groups even without a search term. + +Uses the SP client which has full SCIM access without user-scope restrictions. + * @summary List Workspace Groups + */ +export const listWorkspaceGroups = ( + params?: ListWorkspaceGroupsParams, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.get(`/api/v1/roles/groups`, { + ...options, + params: { ...params, ...options?.params }, + }); +}; + +export const getListWorkspaceGroupsQueryKey = ( + params?: ListWorkspaceGroupsParams, +) => { + return [`/api/v1/roles/groups`, ...(params ? [params] : [])] as const; +}; + +export const getListWorkspaceGroupsQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>( + params?: ListWorkspaceGroupsParams, + options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, +) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getListCatalogsQueryKey(); + const queryKey = + queryOptions?.queryKey ?? getListWorkspaceGroupsQueryKey(params); - const queryFn: QueryFunction>> = ({ - signal, - }) => listCatalogs({ signal, ...axiosOptions }); + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => listWorkspaceGroups(params, { signal, ...axiosOptions }); - return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListCatalogsSuspenseQueryResult = NonNullable< - Awaited> +export type ListWorkspaceGroupsQueryResult = NonNullable< + Awaited> >; -export type ListCatalogsSuspenseQueryError = AxiosError; +export type ListWorkspaceGroupsQueryError = AxiosError; -export function useListCatalogsSuspense< - TData = Awaited>, +export function useListWorkspaceGroups< + TData = Awaited>, TError = AxiosError, >( + params: undefined | ListWorkspaceGroupsParams, options: { query: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > - >; + > & + Pick< + DefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useListCatalogsSuspense< - TData = Awaited>, +export function useListWorkspaceGroups< + TData = Awaited>, TError = AxiosError, >( + params?: ListWorkspaceGroupsParams, options?: { query?: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > - >; + > & + Pick< + UndefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): UseQueryResult & { queryKey: DataTag; }; -export function useListCatalogsSuspense< - TData = Awaited>, +export function useListWorkspaceGroups< + TData = Awaited>, TError = AxiosError, >( + params?: ListWorkspaceGroupsParams, options?: { query?: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > @@ -6553,21 +6824,22 @@ export function useListCatalogsSuspense< axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): UseQueryResult & { queryKey: DataTag; }; /** - * @summary List Catalogs + * @summary List Workspace Groups */ -export function useListCatalogsSuspense< - TData = Awaited>, +export function useListWorkspaceGroups< + TData = Awaited>, TError = AxiosError, >( + params?: ListWorkspaceGroupsParams, options?: { query?: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > @@ -6575,252 +6847,263 @@ export function useListCatalogsSuspense< axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListCatalogsSuspenseQueryOptions(options); + const queryOptions = getListWorkspaceGroupsQueryOptions(params, options); - const query = useSuspenseQuery( - queryOptions, - queryClient, - ) as UseSuspenseQueryResult & { - queryKey: DataTag; - }; + const query = useQuery(queryOptions, queryClient) as UseQueryResult< + TData, + TError + > & { queryKey: DataTag }; query.queryKey = queryOptions.queryKey; return query; } -/** - * @summary List Schemas - */ -export const listSchemas = ( - catalog: string, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get( - `/api/v1/discovery/catalogs/${catalog}/schemas`, - options, - ); -}; - -export const getListSchemasQueryKey = (catalog?: string) => { - return [`/api/v1/discovery/catalogs/${catalog}/schemas`] as const; -}; - -export const getListSchemasQueryOptions = < - TData = Awaited>, +export const getListWorkspaceGroupsSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, >( - catalog: string, + params?: ListWorkspaceGroupsParams, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, ) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getListSchemasQueryKey(catalog); + const queryKey = + queryOptions?.queryKey ?? getListWorkspaceGroupsQueryKey(params); - const queryFn: QueryFunction>> = ({ - signal, - }) => listSchemas(catalog, { signal, ...axiosOptions }); + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => listWorkspaceGroups(params, { signal, ...axiosOptions }); - return { - queryKey, - queryFn, - enabled: !!catalog, - ...queryOptions, - } as UseQueryOptions< - Awaited>, + return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListSchemasQueryResult = NonNullable< - Awaited> +export type ListWorkspaceGroupsSuspenseQueryResult = NonNullable< + Awaited> >; -export type ListSchemasQueryError = AxiosError; +export type ListWorkspaceGroupsSuspenseQueryError = + AxiosError; -export function useListSchemas< - TData = Awaited>, +export function useListWorkspaceGroupsSuspense< + TData = Awaited>, TError = AxiosError, >( - catalog: string, + params: undefined | ListWorkspaceGroupsParams, options: { query: Partial< - UseQueryOptions>, TError, TData> - > & - Pick< - DefinedInitialDataOptions< - Awaited>, - TError, - Awaited> - >, - "initialData" - >; + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): DefinedUseQueryResult & { +): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListSchemas< - TData = Awaited>, +export function useListWorkspaceGroupsSuspense< + TData = Awaited>, TError = AxiosError, >( - catalog: string, + params?: ListWorkspaceGroupsParams, options?: { query?: Partial< - UseQueryOptions>, TError, TData> - > & - Pick< - UndefinedInitialDataOptions< - Awaited>, - TError, - Awaited> - >, - "initialData" - >; + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseQueryResult & { +): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListSchemas< - TData = Awaited>, +export function useListWorkspaceGroupsSuspense< + TData = Awaited>, TError = AxiosError, >( - catalog: string, + params?: ListWorkspaceGroupsParams, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseQueryResult & { +): UseSuspenseQueryResult & { queryKey: DataTag; }; /** - * @summary List Schemas + * @summary List Workspace Groups */ -export function useListSchemas< - TData = Awaited>, +export function useListWorkspaceGroupsSuspense< + TData = Awaited>, TError = AxiosError, >( - catalog: string, + params?: ListWorkspaceGroupsParams, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseQueryResult & { +): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListSchemasQueryOptions(catalog, options); + const queryOptions = getListWorkspaceGroupsSuspenseQueryOptions( + params, + options, + ); - const query = useQuery(queryOptions, queryClient) as UseQueryResult< - TData, - TError - > & { queryKey: DataTag }; + const query = useSuspenseQuery( + queryOptions, + queryClient, + ) as UseSuspenseQueryResult & { + queryKey: DataTag; + }; query.queryKey = queryOptions.queryKey; return query; } -export const getListSchemasSuspenseQueryOptions = < - TData = Awaited>, +/** + * List all available role names that can be assigned (Admin only). + * @summary List Available Roles + */ +export const listAvailableRoles = ( + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.get(`/api/v1/roles/available-roles`, options); +}; + +export const getListAvailableRolesQueryKey = () => { + return [`/api/v1/roles/available-roles`] as const; +}; + +export const getListAvailableRolesQueryOptions = < + TData = Awaited>, TError = AxiosError, ->( - catalog: string, - options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; - }, -) => { +>(options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; +}) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getListSchemasQueryKey(catalog); + const queryKey = queryOptions?.queryKey ?? getListAvailableRolesQueryKey(); - const queryFn: QueryFunction>> = ({ - signal, - }) => listSchemas(catalog, { signal, ...axiosOptions }); + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => listAvailableRoles({ signal, ...axiosOptions }); - return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListSchemasSuspenseQueryResult = NonNullable< - Awaited> +export type ListAvailableRolesQueryResult = NonNullable< + Awaited> >; -export type ListSchemasSuspenseQueryError = AxiosError; +export type ListAvailableRolesQueryError = AxiosError; -export function useListSchemasSuspense< - TData = Awaited>, +export function useListAvailableRoles< + TData = Awaited>, TError = AxiosError, >( - catalog: string, options: { query: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > - >; + > & + Pick< + DefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useListSchemasSuspense< - TData = Awaited>, +export function useListAvailableRoles< + TData = Awaited>, TError = AxiosError, >( - catalog: string, options?: { query?: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > - >; + > & + Pick< + UndefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): UseQueryResult & { queryKey: DataTag; }; -export function useListSchemasSuspense< - TData = Awaited>, +export function useListAvailableRoles< + TData = Awaited>, TError = AxiosError, >( - catalog: string, options?: { query?: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > @@ -6828,22 +7111,21 @@ export function useListSchemasSuspense< axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): UseQueryResult & { queryKey: DataTag; }; /** - * @summary List Schemas + * @summary List Available Roles */ -export function useListSchemasSuspense< - TData = Awaited>, +export function useListAvailableRoles< + TData = Awaited>, TError = AxiosError, >( - catalog: string, options?: { query?: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > @@ -6851,167 +7133,10 @@ export function useListSchemasSuspense< axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListSchemasSuspenseQueryOptions(catalog, options); - - const query = useSuspenseQuery( - queryOptions, - queryClient, - ) as UseSuspenseQueryResult & { - queryKey: DataTag; - }; - - query.queryKey = queryOptions.queryKey; - - return query; -} - -/** - * @summary List Tables - */ -export const listTables = ( - catalog: string, - schema: string, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get( - `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/tables`, - options, - ); -}; - -export const getListTablesQueryKey = (catalog?: string, schema?: string) => { - return [ - `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/tables`, - ] as const; -}; - -export const getListTablesQueryOptions = < - TData = Awaited>, - TError = AxiosError, ->( - catalog: string, - schema: string, - options?: { - query?: Partial< - UseQueryOptions>, TError, TData> - >; - axios?: AxiosRequestConfig; - }, -) => { - const { query: queryOptions, axios: axiosOptions } = options ?? {}; - - const queryKey = - queryOptions?.queryKey ?? getListTablesQueryKey(catalog, schema); - - const queryFn: QueryFunction>> = ({ - signal, - }) => listTables(catalog, schema, { signal, ...axiosOptions }); - - return { - queryKey, - queryFn, - enabled: !!(catalog && schema), - ...queryOptions, - } as UseQueryOptions< - Awaited>, - TError, - TData - > & { queryKey: DataTag }; -}; - -export type ListTablesQueryResult = NonNullable< - Awaited> ->; -export type ListTablesQueryError = AxiosError; - -export function useListTables< - TData = Awaited>, - TError = AxiosError, ->( - catalog: string, - schema: string, - options: { - query: Partial< - UseQueryOptions>, TError, TData> - > & - Pick< - DefinedInitialDataOptions< - Awaited>, - TError, - Awaited> - >, - "initialData" - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): DefinedUseQueryResult & { - queryKey: DataTag; -}; -export function useListTables< - TData = Awaited>, - TError = AxiosError, ->( - catalog: string, - schema: string, - options?: { - query?: Partial< - UseQueryOptions>, TError, TData> - > & - Pick< - UndefinedInitialDataOptions< - Awaited>, - TError, - Awaited> - >, - "initialData" - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): UseQueryResult & { - queryKey: DataTag; -}; -export function useListTables< - TData = Awaited>, - TError = AxiosError, ->( - catalog: string, - schema: string, - options?: { - query?: Partial< - UseQueryOptions>, TError, TData> - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): UseQueryResult & { - queryKey: DataTag; -}; -/** - * @summary List Tables - */ - -export function useListTables< - TData = Awaited>, - TError = AxiosError, ->( - catalog: string, - schema: string, - options?: { - query?: Partial< - UseQueryOptions>, TError, TData> - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): UseQueryResult & { - queryKey: DataTag; -} { - const queryOptions = getListTablesQueryOptions(catalog, schema, options); + const queryOptions = getListAvailableRolesQueryOptions(options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -7023,54 +7148,48 @@ export function useListTables< return query; } -export const getListTablesSuspenseQueryOptions = < - TData = Awaited>, +export const getListAvailableRolesSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, ->( - catalog: string, - schema: string, - options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; - }, -) => { +>(options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; +}) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = - queryOptions?.queryKey ?? getListTablesQueryKey(catalog, schema); + const queryKey = queryOptions?.queryKey ?? getListAvailableRolesQueryKey(); - const queryFn: QueryFunction>> = ({ - signal, - }) => listTables(catalog, schema, { signal, ...axiosOptions }); + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => listAvailableRoles({ signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListTablesSuspenseQueryResult = NonNullable< - Awaited> +export type ListAvailableRolesSuspenseQueryResult = NonNullable< + Awaited> >; -export type ListTablesSuspenseQueryError = AxiosError; +export type ListAvailableRolesSuspenseQueryError = + AxiosError; -export function useListTablesSuspense< - TData = Awaited>, +export function useListAvailableRolesSuspense< + TData = Awaited>, TError = AxiosError, >( - catalog: string, - schema: string, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -7081,16 +7200,14 @@ export function useListTablesSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListTablesSuspense< - TData = Awaited>, +export function useListAvailableRolesSuspense< + TData = Awaited>, TError = AxiosError, >( - catalog: string, - schema: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -7101,16 +7218,14 @@ export function useListTablesSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListTablesSuspense< - TData = Awaited>, +export function useListAvailableRolesSuspense< + TData = Awaited>, TError = AxiosError, >( - catalog: string, - schema: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -7122,19 +7237,17 @@ export function useListTablesSuspense< queryKey: DataTag; }; /** - * @summary List Tables + * @summary List Available Roles */ -export function useListTablesSuspense< - TData = Awaited>, +export function useListAvailableRolesSuspense< + TData = Awaited>, TError = AxiosError, >( - catalog: string, - schema: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -7145,11 +7258,7 @@ export function useListTablesSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListTablesSuspenseQueryOptions( - catalog, - schema, - options, - ); + const queryOptions = getListAvailableRolesSuspenseQueryOptions(options); const query = useSuspenseQuery( queryOptions, @@ -7164,92 +7273,60 @@ export function useListTablesSuspense< } /** - * Return fully qualified names for all tables in a schema (for batch profiling). - * @summary List All Table Fqns + * @summary List Catalogs */ -export const listAllTableFqns = ( - catalog: string, - schema: string, +export const listCatalogs = ( options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get( - `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/all-table-fqns`, - options, - ); +): Promise> => { + return axios.default.get(`/api/v1/discovery/catalogs`, options); }; -export const getListAllTableFqnsQueryKey = ( - catalog?: string, - schema?: string, -) => { - return [ - `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/all-table-fqns`, - ] as const; +export const getListCatalogsQueryKey = () => { + return [`/api/v1/discovery/catalogs`] as const; }; -export const getListAllTableFqnsQueryOptions = < - TData = Awaited>, +export const getListCatalogsQueryOptions = < + TData = Awaited>, TError = AxiosError, ->( - catalog: string, - schema: string, - options?: { - query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; - }, -) => { +>(options?: { + query?: Partial< + UseQueryOptions>, TError, TData> + >; + axios?: AxiosRequestConfig; +}) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = - queryOptions?.queryKey ?? getListAllTableFqnsQueryKey(catalog, schema); + const queryKey = queryOptions?.queryKey ?? getListCatalogsQueryKey(); - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => - listAllTableFqns(catalog, schema, { signal, ...axiosOptions }); + const queryFn: QueryFunction>> = ({ + signal, + }) => listCatalogs({ signal, ...axiosOptions }); - return { - queryKey, - queryFn, - enabled: !!(catalog && schema), - ...queryOptions, - } as UseQueryOptions< - Awaited>, + return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListAllTableFqnsQueryResult = NonNullable< - Awaited> +export type ListCatalogsQueryResult = NonNullable< + Awaited> >; -export type ListAllTableFqnsQueryError = AxiosError; +export type ListCatalogsQueryError = AxiosError; -export function useListAllTableFqns< - TData = Awaited>, +export function useListCatalogs< + TData = Awaited>, TError = AxiosError, >( - catalog: string, - schema: string, options: { query: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -7259,25 +7336,19 @@ export function useListAllTableFqns< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useListAllTableFqns< - TData = Awaited>, +export function useListCatalogs< + TData = Awaited>, TError = AxiosError, >( - catalog: string, - schema: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -7287,19 +7358,13 @@ export function useListAllTableFqns< ): UseQueryResult & { queryKey: DataTag; }; -export function useListAllTableFqns< - TData = Awaited>, +export function useListCatalogs< + TData = Awaited>, TError = AxiosError, >( - catalog: string, - schema: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, @@ -7308,22 +7373,16 @@ export function useListAllTableFqns< queryKey: DataTag; }; /** - * @summary List All Table Fqns + * @summary List Catalogs */ -export function useListAllTableFqns< - TData = Awaited>, +export function useListCatalogs< + TData = Awaited>, TError = AxiosError, >( - catalog: string, - schema: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, @@ -7331,11 +7390,7 @@ export function useListAllTableFqns< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListAllTableFqnsQueryOptions( - catalog, - schema, - options, - ); + const queryOptions = getListCatalogsQueryOptions(options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -7347,56 +7402,47 @@ export function useListAllTableFqns< return query; } -export const getListAllTableFqnsSuspenseQueryOptions = < - TData = Awaited>, +export const getListCatalogsSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, ->( - catalog: string, - schema: string, - options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; - }, -) => { +>(options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; +}) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = - queryOptions?.queryKey ?? getListAllTableFqnsQueryKey(catalog, schema); + const queryKey = queryOptions?.queryKey ?? getListCatalogsQueryKey(); - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => - listAllTableFqns(catalog, schema, { signal, ...axiosOptions }); + const queryFn: QueryFunction>> = ({ + signal, + }) => listCatalogs({ signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListAllTableFqnsSuspenseQueryResult = NonNullable< - Awaited> +export type ListCatalogsSuspenseQueryResult = NonNullable< + Awaited> >; -export type ListAllTableFqnsSuspenseQueryError = - AxiosError; +export type ListCatalogsSuspenseQueryError = AxiosError; -export function useListAllTableFqnsSuspense< - TData = Awaited>, +export function useListCatalogsSuspense< + TData = Awaited>, TError = AxiosError, >( - catalog: string, - schema: string, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -7407,16 +7453,14 @@ export function useListAllTableFqnsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListAllTableFqnsSuspense< - TData = Awaited>, +export function useListCatalogsSuspense< + TData = Awaited>, TError = AxiosError, >( - catalog: string, - schema: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -7427,16 +7471,14 @@ export function useListAllTableFqnsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListAllTableFqnsSuspense< - TData = Awaited>, +export function useListCatalogsSuspense< + TData = Awaited>, TError = AxiosError, >( - catalog: string, - schema: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -7448,19 +7490,17 @@ export function useListAllTableFqnsSuspense< queryKey: DataTag; }; /** - * @summary List All Table Fqns + * @summary List Catalogs */ -export function useListAllTableFqnsSuspense< - TData = Awaited>, +export function useListCatalogsSuspense< + TData = Awaited>, TError = AxiosError, >( - catalog: string, - schema: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -7471,11 +7511,7 @@ export function useListAllTableFqnsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListAllTableFqnsSuspenseQueryOptions( - catalog, - schema, - options, - ); + const queryOptions = getListCatalogsSuspenseQueryOptions(options); const query = useSuspenseQuery( queryOptions, @@ -7490,95 +7526,73 @@ export function useListAllTableFqnsSuspense< } /** - * @summary Get Table Columns + * @summary List Schemas */ -export const getTableColumns = ( +export const listSchemas = ( catalog: string, - schema: string, - table: string, options?: AxiosRequestConfig, -): Promise> => { +): Promise> => { return axios.default.get( - `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/tables/${table}/columns`, + `/api/v1/discovery/catalogs/${catalog}/schemas`, options, ); }; -export const getGetTableColumnsQueryKey = ( - catalog?: string, - schema?: string, - table?: string, -) => { - return [ - `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/tables/${table}/columns`, - ] as const; +export const getListSchemasQueryKey = (catalog?: string) => { + return [`/api/v1/discovery/catalogs/${catalog}/schemas`] as const; }; -export const getGetTableColumnsQueryOptions = < - TData = Awaited>, +export const getListSchemasQueryOptions = < + TData = Awaited>, TError = AxiosError, >( catalog: string, - schema: string, - table: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, ) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = - queryOptions?.queryKey ?? - getGetTableColumnsQueryKey(catalog, schema, table); + const queryKey = queryOptions?.queryKey ?? getListSchemasQueryKey(catalog); - const queryFn: QueryFunction>> = ({ + const queryFn: QueryFunction>> = ({ signal, - }) => getTableColumns(catalog, schema, table, { signal, ...axiosOptions }); + }) => listSchemas(catalog, { signal, ...axiosOptions }); return { queryKey, queryFn, - enabled: !!(catalog && schema && table), + enabled: !!catalog, ...queryOptions, } as UseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetTableColumnsQueryResult = NonNullable< - Awaited> +export type ListSchemasQueryResult = NonNullable< + Awaited> >; -export type GetTableColumnsQueryError = AxiosError; +export type ListSchemasQueryError = AxiosError; -export function useGetTableColumns< - TData = Awaited>, +export function useListSchemas< + TData = Awaited>, TError = AxiosError, >( catalog: string, - schema: string, - table: string, options: { query: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -7588,26 +7602,20 @@ export function useGetTableColumns< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useGetTableColumns< - TData = Awaited>, +export function useListSchemas< + TData = Awaited>, TError = AxiosError, >( catalog: string, - schema: string, - table: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -7617,20 +7625,14 @@ export function useGetTableColumns< ): UseQueryResult & { queryKey: DataTag; }; -export function useGetTableColumns< - TData = Awaited>, +export function useListSchemas< + TData = Awaited>, TError = AxiosError, >( catalog: string, - schema: string, - table: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, @@ -7639,23 +7641,17 @@ export function useGetTableColumns< queryKey: DataTag; }; /** - * @summary Get Table Columns + * @summary List Schemas */ -export function useGetTableColumns< - TData = Awaited>, +export function useListSchemas< + TData = Awaited>, TError = AxiosError, >( catalog: string, - schema: string, - table: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, @@ -7663,12 +7659,7 @@ export function useGetTableColumns< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetTableColumnsQueryOptions( - catalog, - schema, - table, - options, - ); + const queryOptions = getListSchemasQueryOptions(catalog, options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -7680,17 +7671,15 @@ export function useGetTableColumns< return query; } -export const getGetTableColumnsSuspenseQueryOptions = < - TData = Awaited>, +export const getListSchemasSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, >( catalog: string, - schema: string, - table: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -7700,37 +7689,33 @@ export const getGetTableColumnsSuspenseQueryOptions = < ) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = - queryOptions?.queryKey ?? - getGetTableColumnsQueryKey(catalog, schema, table); + const queryKey = queryOptions?.queryKey ?? getListSchemasQueryKey(catalog); - const queryFn: QueryFunction>> = ({ + const queryFn: QueryFunction>> = ({ signal, - }) => getTableColumns(catalog, schema, table, { signal, ...axiosOptions }); + }) => listSchemas(catalog, { signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetTableColumnsSuspenseQueryResult = NonNullable< - Awaited> +export type ListSchemasSuspenseQueryResult = NonNullable< + Awaited> >; -export type GetTableColumnsSuspenseQueryError = AxiosError; +export type ListSchemasSuspenseQueryError = AxiosError; -export function useGetTableColumnsSuspense< - TData = Awaited>, +export function useListSchemasSuspense< + TData = Awaited>, TError = AxiosError, >( catalog: string, - schema: string, - table: string, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -7741,17 +7726,15 @@ export function useGetTableColumnsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetTableColumnsSuspense< - TData = Awaited>, +export function useListSchemasSuspense< + TData = Awaited>, TError = AxiosError, >( catalog: string, - schema: string, - table: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -7762,17 +7745,15 @@ export function useGetTableColumnsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetTableColumnsSuspense< - TData = Awaited>, +export function useListSchemasSuspense< + TData = Awaited>, TError = AxiosError, >( catalog: string, - schema: string, - table: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -7784,20 +7765,18 @@ export function useGetTableColumnsSuspense< queryKey: DataTag; }; /** - * @summary Get Table Columns + * @summary List Schemas */ -export function useGetTableColumnsSuspense< - TData = Awaited>, +export function useListSchemasSuspense< + TData = Awaited>, TError = AxiosError, >( catalog: string, - schema: string, - table: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -7808,12 +7787,7 @@ export function useGetTableColumnsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetTableColumnsSuspenseQueryOptions( - catalog, - schema, - table, - options, - ); + const queryOptions = getListSchemasSuspenseQueryOptions(catalog, options); const query = useSuspenseQuery( queryOptions, @@ -7828,41 +7802,34 @@ export function useGetTableColumnsSuspense< } /** - * Get Unity Catalog tags for a table and its columns. - * @summary Get Table Tags + * @summary List Tables */ -export const getTableTags = ( +export const listTables = ( catalog: string, schema: string, - table: string, options?: AxiosRequestConfig, -): Promise> => { +): Promise> => { return axios.default.get( - `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/tables/${table}/tags`, + `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/tables`, options, ); }; -export const getGetTableTagsQueryKey = ( - catalog?: string, - schema?: string, - table?: string, -) => { +export const getListTablesQueryKey = (catalog?: string, schema?: string) => { return [ - `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/tables/${table}/tags`, + `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/tables`, ] as const; }; -export const getGetTableTagsQueryOptions = < - TData = Awaited>, +export const getListTablesQueryOptions = < + TData = Awaited>, TError = AxiosError, >( catalog: string, schema: string, - table: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, @@ -7870,45 +7837,44 @@ export const getGetTableTagsQueryOptions = < const { query: queryOptions, axios: axiosOptions } = options ?? {}; const queryKey = - queryOptions?.queryKey ?? getGetTableTagsQueryKey(catalog, schema, table); + queryOptions?.queryKey ?? getListTablesQueryKey(catalog, schema); - const queryFn: QueryFunction>> = ({ + const queryFn: QueryFunction>> = ({ signal, - }) => getTableTags(catalog, schema, table, { signal, ...axiosOptions }); + }) => listTables(catalog, schema, { signal, ...axiosOptions }); return { queryKey, queryFn, - enabled: !!(catalog && schema && table), + enabled: !!(catalog && schema), ...queryOptions, } as UseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetTableTagsQueryResult = NonNullable< - Awaited> +export type ListTablesQueryResult = NonNullable< + Awaited> >; -export type GetTableTagsQueryError = AxiosError; +export type ListTablesQueryError = AxiosError; -export function useGetTableTags< - TData = Awaited>, +export function useListTables< + TData = Awaited>, TError = AxiosError, >( catalog: string, schema: string, - table: string, options: { query: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions>, TError, TData> > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -7918,22 +7884,21 @@ export function useGetTableTags< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useGetTableTags< - TData = Awaited>, +export function useListTables< + TData = Awaited>, TError = AxiosError, >( catalog: string, schema: string, - table: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions>, TError, TData> > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -7943,16 +7908,15 @@ export function useGetTableTags< ): UseQueryResult & { queryKey: DataTag; }; -export function useGetTableTags< - TData = Awaited>, +export function useListTables< + TData = Awaited>, TError = AxiosError, >( catalog: string, schema: string, - table: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, @@ -7961,19 +7925,18 @@ export function useGetTableTags< queryKey: DataTag; }; /** - * @summary Get Table Tags + * @summary List Tables */ -export function useGetTableTags< - TData = Awaited>, +export function useListTables< + TData = Awaited>, TError = AxiosError, >( catalog: string, schema: string, - table: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, @@ -7981,12 +7944,7 @@ export function useGetTableTags< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetTableTagsQueryOptions( - catalog, - schema, - table, - options, - ); + const queryOptions = getListTablesQueryOptions(catalog, schema, options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -7998,17 +7956,16 @@ export function useGetTableTags< return query; } -export const getGetTableTagsSuspenseQueryOptions = < - TData = Awaited>, +export const getListTablesSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, >( catalog: string, schema: string, - table: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -8019,35 +7976,34 @@ export const getGetTableTagsSuspenseQueryOptions = < const { query: queryOptions, axios: axiosOptions } = options ?? {}; const queryKey = - queryOptions?.queryKey ?? getGetTableTagsQueryKey(catalog, schema, table); + queryOptions?.queryKey ?? getListTablesQueryKey(catalog, schema); - const queryFn: QueryFunction>> = ({ + const queryFn: QueryFunction>> = ({ signal, - }) => getTableTags(catalog, schema, table, { signal, ...axiosOptions }); + }) => listTables(catalog, schema, { signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetTableTagsSuspenseQueryResult = NonNullable< - Awaited> +export type ListTablesSuspenseQueryResult = NonNullable< + Awaited> >; -export type GetTableTagsSuspenseQueryError = AxiosError; +export type ListTablesSuspenseQueryError = AxiosError; -export function useGetTableTagsSuspense< - TData = Awaited>, +export function useListTablesSuspense< + TData = Awaited>, TError = AxiosError, >( catalog: string, schema: string, - table: string, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -8058,17 +8014,16 @@ export function useGetTableTagsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetTableTagsSuspense< - TData = Awaited>, +export function useListTablesSuspense< + TData = Awaited>, TError = AxiosError, >( catalog: string, schema: string, - table: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -8079,17 +8034,16 @@ export function useGetTableTagsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetTableTagsSuspense< - TData = Awaited>, +export function useListTablesSuspense< + TData = Awaited>, TError = AxiosError, >( catalog: string, schema: string, - table: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -8101,20 +8055,19 @@ export function useGetTableTagsSuspense< queryKey: DataTag; }; /** - * @summary Get Table Tags + * @summary List Tables */ -export function useGetTableTagsSuspense< - TData = Awaited>, +export function useListTablesSuspense< + TData = Awaited>, TError = AxiosError, >( catalog: string, schema: string, - table: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -8125,10 +8078,9 @@ export function useGetTableTagsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetTableTagsSuspenseQueryOptions( + const queryOptions = getListTablesSuspenseQueryOptions( catalog, schema, - table, options, ); @@ -8145,268 +8097,120 @@ export function useGetTableTagsSuspense< } /** - * Return only the tables that contain ALL of the required columns. - * @summary Filter Tables By Columns + * Return fully qualified names for all tables in a schema (for batch profiling). + * @summary List All Table Fqns */ -export const filterTablesByColumns = ( - filterTablesByColumnsIn: FilterTablesByColumnsIn, +export const listAllTableFqns = ( + catalog: string, + schema: string, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post( - `/api/v1/discovery/filter-tables-by-columns`, - filterTablesByColumnsIn, +): Promise> => { + return axios.default.get( + `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/all-table-fqns`, options, ); }; -export const getFilterTablesByColumnsMutationOptions = < - TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: FilterTablesByColumnsIn }, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - { data: FilterTablesByColumnsIn }, - TContext -> => { - const mutationKey = ["filterTablesByColumns"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; - - const mutationFn: MutationFunction< - Awaited>, - { data: FilterTablesByColumnsIn } - > = (props) => { - const { data } = props ?? {}; - - return filterTablesByColumns(data, axiosOptions); - }; - - return { mutationFn, ...mutationOptions }; +export const getListAllTableFqnsQueryKey = ( + catalog?: string, + schema?: string, +) => { + return [ + `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/all-table-fqns`, + ] as const; }; -export type FilterTablesByColumnsMutationResult = NonNullable< - Awaited> ->; -export type FilterTablesByColumnsMutationBody = FilterTablesByColumnsIn; -export type FilterTablesByColumnsMutationError = - AxiosError; - -/** - * @summary Filter Tables By Columns - */ -export const useFilterTablesByColumns = < +export const getListAllTableFqnsQueryOptions = < + TData = Awaited>, TError = AxiosError, - TContext = unknown, >( + catalog: string, + schema: string, options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: FilterTablesByColumnsIn }, - TContext + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, - queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - { data: FilterTablesByColumnsIn }, - TContext -> => { - const mutationOptions = getFilterTablesByColumnsMutationOptions(options); +) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; - return useMutation(mutationOptions, queryClient); -}; + const queryKey = + queryOptions?.queryKey ?? getListAllTableFqnsQueryKey(catalog, schema); -/** - * Generate data quality checks from natural language using AI-assisted generation. - * @summary Ai Generate Checks - */ -export const aiAssistedChecksGeneration = ( - generateChecksIn: GenerateChecksIn, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post( - `/api/v1/ai/generate-checks`, - generateChecksIn, - options, - ); -}; + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => + listAllTableFqns(catalog, schema, { signal, ...axiosOptions }); -export const getAiAssistedChecksGenerationMutationOptions = < - TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, + return { + queryKey, + queryFn, + enabled: !!(catalog && schema), + ...queryOptions, + } as UseQueryOptions< + Awaited>, TError, - { data: GenerateChecksIn }, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - { data: GenerateChecksIn }, - TContext -> => { - const mutationKey = ["aiAssistedChecksGeneration"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; - - const mutationFn: MutationFunction< - Awaited>, - { data: GenerateChecksIn } - > = (props) => { - const { data } = props ?? {}; - - return aiAssistedChecksGeneration(data, axiosOptions); - }; - - return { mutationFn, ...mutationOptions }; + TData + > & { queryKey: DataTag }; }; -export type AiAssistedChecksGenerationMutationResult = NonNullable< - Awaited> +export type ListAllTableFqnsQueryResult = NonNullable< + Awaited> >; -export type AiAssistedChecksGenerationMutationBody = GenerateChecksIn; -export type AiAssistedChecksGenerationMutationError = - AxiosError; +export type ListAllTableFqnsQueryError = AxiosError; -/** - * @summary Ai Generate Checks - */ -export const useAiAssistedChecksGeneration = < +export function useListAllTableFqns< + TData = Awaited>, TError = AxiosError, - TContext = unknown, >( - options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: GenerateChecksIn }, - TContext - >; + catalog: string, + schema: string, + options: { + query: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + > & + Pick< + DefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - { data: GenerateChecksIn }, - TContext -> => { - const mutationOptions = getAiAssistedChecksGenerationMutationOptions(options); - - return useMutation(mutationOptions, queryClient); -}; - -/** - * List rules filtered to catalogs the current user can access. - * @summary List Rules - */ -export const listRules = ( - params?: ListRulesParams, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/rules`, { - ...options, - params: { ...params, ...options?.params }, - }); -}; - -export const getListRulesQueryKey = (params?: ListRulesParams) => { - return [`/api/v1/rules`, ...(params ? [params] : [])] as const; +): DefinedUseQueryResult & { + queryKey: DataTag; }; - -export const getListRulesQueryOptions = < - TData = Awaited>, +export function useListAllTableFqns< + TData = Awaited>, TError = AxiosError, >( - params?: ListRulesParams, + catalog: string, + schema: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> - >; - axios?: AxiosRequestConfig; - }, -) => { - const { query: queryOptions, axios: axiosOptions } = options ?? {}; - - const queryKey = queryOptions?.queryKey ?? getListRulesQueryKey(params); - - const queryFn: QueryFunction>> = ({ - signal, - }) => listRules(params, { signal, ...axiosOptions }); - - return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< - Awaited>, - TError, - TData - > & { queryKey: DataTag }; -}; - -export type ListRulesQueryResult = NonNullable< - Awaited> ->; -export type ListRulesQueryError = AxiosError; - -export function useListRules< - TData = Awaited>, - TError = AxiosError, ->( - params: undefined | ListRulesParams, - options: { - query: Partial< - UseQueryOptions>, TError, TData> - > & - Pick< - DefinedInitialDataOptions< - Awaited>, - TError, - Awaited> - >, - "initialData" - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): DefinedUseQueryResult & { - queryKey: DataTag; -}; -export function useListRules< - TData = Awaited>, - TError = AxiosError, ->( - params?: ListRulesParams, - options?: { - query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -8416,14 +8220,19 @@ export function useListRules< ): UseQueryResult & { queryKey: DataTag; }; -export function useListRules< - TData = Awaited>, +export function useListAllTableFqns< + TData = Awaited>, TError = AxiosError, >( - params?: ListRulesParams, + catalog: string, + schema: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, @@ -8432,17 +8241,22 @@ export function useListRules< queryKey: DataTag; }; /** - * @summary List Rules + * @summary List All Table Fqns */ -export function useListRules< - TData = Awaited>, +export function useListAllTableFqns< + TData = Awaited>, TError = AxiosError, >( - params?: ListRulesParams, + catalog: string, + schema: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, @@ -8450,7 +8264,11 @@ export function useListRules< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListRulesQueryOptions(params, options); + const queryOptions = getListAllTableFqnsQueryOptions( + catalog, + schema, + options, + ); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -8462,15 +8280,16 @@ export function useListRules< return query; } -export const getListRulesSuspenseQueryOptions = < - TData = Awaited>, +export const getListAllTableFqnsSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, >( - params?: ListRulesParams, + catalog: string, + schema: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -8480,33 +8299,37 @@ export const getListRulesSuspenseQueryOptions = < ) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getListRulesQueryKey(params); + const queryKey = + queryOptions?.queryKey ?? getListAllTableFqnsQueryKey(catalog, schema); - const queryFn: QueryFunction>> = ({ - signal, - }) => listRules(params, { signal, ...axiosOptions }); + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => + listAllTableFqns(catalog, schema, { signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListRulesSuspenseQueryResult = NonNullable< - Awaited> +export type ListAllTableFqnsSuspenseQueryResult = NonNullable< + Awaited> >; -export type ListRulesSuspenseQueryError = AxiosError; +export type ListAllTableFqnsSuspenseQueryError = + AxiosError; -export function useListRulesSuspense< - TData = Awaited>, +export function useListAllTableFqnsSuspense< + TData = Awaited>, TError = AxiosError, >( - params: undefined | ListRulesParams, + catalog: string, + schema: string, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -8517,15 +8340,16 @@ export function useListRulesSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListRulesSuspense< - TData = Awaited>, +export function useListAllTableFqnsSuspense< + TData = Awaited>, TError = AxiosError, >( - params?: ListRulesParams, + catalog: string, + schema: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -8536,15 +8360,16 @@ export function useListRulesSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListRulesSuspense< - TData = Awaited>, +export function useListAllTableFqnsSuspense< + TData = Awaited>, TError = AxiosError, >( - params?: ListRulesParams, + catalog: string, + schema: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -8556,18 +8381,19 @@ export function useListRulesSuspense< queryKey: DataTag; }; /** - * @summary List Rules + * @summary List All Table Fqns */ -export function useListRulesSuspense< - TData = Awaited>, +export function useListAllTableFqnsSuspense< + TData = Awaited>, TError = AxiosError, >( - params?: ListRulesParams, + catalog: string, + schema: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -8578,7 +8404,11 @@ export function useListRulesSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListRulesSuspenseQueryOptions(params, options); + const queryOptions = getListAllTableFqnsSuspenseQueryOptions( + catalog, + schema, + options, + ); const query = useSuspenseQuery( queryOptions, @@ -8593,158 +8423,95 @@ export function useListRulesSuspense< } /** - * Save rules. Each check becomes an individual rule row. - -If ``rule_id`` is set, this updates an existing rule. Authors can only -update rules they themselves authored — otherwise they could silently -overwrite the contents of another user's rule (and then chain it with -submit/delete, which our other gates would reject only by accident). -Admins and approvers can update any rule. - * @summary Save Rules + * @summary Get Table Columns */ -export const saveRules = ( - saveRulesIn: SaveRulesIn, +export const getTableColumns = ( + catalog: string, + schema: string, + table: string, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post(`/api/v1/rules`, saveRulesIn, options); +): Promise> => { + return axios.default.get( + `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/tables/${table}/columns`, + options, + ); }; -export const getSaveRulesMutationOptions = < - TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: SaveRulesIn }, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - { data: SaveRulesIn }, - TContext -> => { - const mutationKey = ["saveRules"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; - - const mutationFn: MutationFunction< - Awaited>, - { data: SaveRulesIn } - > = (props) => { - const { data } = props ?? {}; - - return saveRules(data, axiosOptions); - }; - - return { mutationFn, ...mutationOptions }; +export const getGetTableColumnsQueryKey = ( + catalog?: string, + schema?: string, + table?: string, +) => { + return [ + `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/tables/${table}/columns`, + ] as const; }; -export type SaveRulesMutationResult = NonNullable< - Awaited> ->; -export type SaveRulesMutationBody = SaveRulesIn; -export type SaveRulesMutationError = AxiosError; - -/** - * @summary Save Rules - */ -export const useSaveRules = < +export const getGetTableColumnsQueryOptions = < + TData = Awaited>, TError = AxiosError, - TContext = unknown, >( - options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: SaveRulesIn }, - TContext - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - { data: SaveRulesIn }, - TContext -> => { - const mutationOptions = getSaveRulesMutationOptions(options); - - return useMutation(mutationOptions, queryClient); -}; - -/** - * Get all individual rules for a specific table. - * @summary Get Rules - */ -export const getRules = ( - tableFqn: string, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/rules/${tableFqn}`, options); -}; - -export const getGetRulesQueryKey = (tableFqn?: string) => { - return [`/api/v1/rules/${tableFqn}`] as const; -}; - -export const getGetRulesQueryOptions = < - TData = Awaited>, - TError = AxiosError, ->( - tableFqn: string, + catalog: string, + schema: string, + table: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, ) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getGetRulesQueryKey(tableFqn); + const queryKey = + queryOptions?.queryKey ?? + getGetTableColumnsQueryKey(catalog, schema, table); - const queryFn: QueryFunction>> = ({ + const queryFn: QueryFunction>> = ({ signal, - }) => getRules(tableFqn, { signal, ...axiosOptions }); + }) => getTableColumns(catalog, schema, table, { signal, ...axiosOptions }); return { queryKey, queryFn, - enabled: !!tableFqn, + enabled: !!(catalog && schema && table), ...queryOptions, - } as UseQueryOptions>, TError, TData> & { - queryKey: DataTag; - }; + } as UseQueryOptions< + Awaited>, + TError, + TData + > & { queryKey: DataTag }; }; -export type GetRulesQueryResult = NonNullable< - Awaited> +export type GetTableColumnsQueryResult = NonNullable< + Awaited> >; -export type GetRulesQueryError = AxiosError; +export type GetTableColumnsQueryError = AxiosError; -export function useGetRules< - TData = Awaited>, +export function useGetTableColumns< + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, + catalog: string, + schema: string, + table: string, options: { query: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -8754,20 +8521,26 @@ export function useGetRules< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useGetRules< - TData = Awaited>, +export function useGetTableColumns< + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, + catalog: string, + schema: string, + table: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -8777,14 +8550,20 @@ export function useGetRules< ): UseQueryResult & { queryKey: DataTag; }; -export function useGetRules< - TData = Awaited>, +export function useGetTableColumns< + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, + catalog: string, + schema: string, + table: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, @@ -8793,17 +8572,23 @@ export function useGetRules< queryKey: DataTag; }; /** - * @summary Get Rules + * @summary Get Table Columns */ -export function useGetRules< - TData = Awaited>, +export function useGetTableColumns< + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, + catalog: string, + schema: string, + table: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, @@ -8811,7 +8596,12 @@ export function useGetRules< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetRulesQueryOptions(tableFqn, options); + const queryOptions = getGetTableColumnsQueryOptions( + catalog, + schema, + table, + options, + ); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -8823,15 +8613,17 @@ export function useGetRules< return query; } -export const getGetRulesSuspenseQueryOptions = < - TData = Awaited>, +export const getGetTableColumnsSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, + catalog: string, + schema: string, + table: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -8841,33 +8633,37 @@ export const getGetRulesSuspenseQueryOptions = < ) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getGetRulesQueryKey(tableFqn); + const queryKey = + queryOptions?.queryKey ?? + getGetTableColumnsQueryKey(catalog, schema, table); - const queryFn: QueryFunction>> = ({ + const queryFn: QueryFunction>> = ({ signal, - }) => getRules(tableFqn, { signal, ...axiosOptions }); + }) => getTableColumns(catalog, schema, table, { signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetRulesSuspenseQueryResult = NonNullable< - Awaited> +export type GetTableColumnsSuspenseQueryResult = NonNullable< + Awaited> >; -export type GetRulesSuspenseQueryError = AxiosError; +export type GetTableColumnsSuspenseQueryError = AxiosError; -export function useGetRulesSuspense< - TData = Awaited>, +export function useGetTableColumnsSuspense< + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, + catalog: string, + schema: string, + table: string, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -8878,15 +8674,17 @@ export function useGetRulesSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetRulesSuspense< - TData = Awaited>, +export function useGetTableColumnsSuspense< + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, + catalog: string, + schema: string, + table: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -8897,15 +8695,17 @@ export function useGetRulesSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetRulesSuspense< - TData = Awaited>, +export function useGetTableColumnsSuspense< + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, + catalog: string, + schema: string, + table: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -8917,18 +8717,20 @@ export function useGetRulesSuspense< queryKey: DataTag; }; /** - * @summary Get Rules + * @summary Get Table Columns */ -export function useGetRulesSuspense< - TData = Awaited>, +export function useGetTableColumnsSuspense< + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, + catalog: string, + schema: string, + table: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -8939,7 +8741,12 @@ export function useGetRulesSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetRulesSuspenseQueryOptions(tableFqn, options); + const queryOptions = getGetTableColumnsSuspenseQueryOptions( + catalog, + schema, + table, + options, + ); const query = useSuspenseQuery( queryOptions, @@ -8954,207 +8761,355 @@ export function useGetRulesSuspense< } /** - * Save the same set of checks to multiple tables (reusable rules). - * @summary Batch Save Rules + * Get Unity Catalog tags for a table and its columns. + * @summary Get Table Tags */ -export const batchSaveRules = ( - batchSaveRulesIn: BatchSaveRulesIn, +export const getTableTags = ( + catalog: string, + schema: string, + table: string, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post(`/api/v1/rules/batch`, batchSaveRulesIn, options); +): Promise> => { + return axios.default.get( + `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/tables/${table}/tags`, + options, + ); }; -export const getBatchSaveRulesMutationOptions = < +export const getGetTableTagsQueryKey = ( + catalog?: string, + schema?: string, + table?: string, +) => { + return [ + `/api/v1/discovery/catalogs/${catalog}/schemas/${schema}/tables/${table}/tags`, + ] as const; +}; + +export const getGetTableTagsQueryOptions = < + TData = Awaited>, TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: BatchSaveRulesIn }, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - { data: BatchSaveRulesIn }, - TContext -> => { - const mutationKey = ["batchSaveRules"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; +>( + catalog: string, + schema: string, + table: string, + options?: { + query?: Partial< + UseQueryOptions>, TError, TData> + >; + axios?: AxiosRequestConfig; + }, +) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const mutationFn: MutationFunction< - Awaited>, - { data: BatchSaveRulesIn } - > = (props) => { - const { data } = props ?? {}; + const queryKey = + queryOptions?.queryKey ?? getGetTableTagsQueryKey(catalog, schema, table); - return batchSaveRules(data, axiosOptions); - }; + const queryFn: QueryFunction>> = ({ + signal, + }) => getTableTags(catalog, schema, table, { signal, ...axiosOptions }); - return { mutationFn, ...mutationOptions }; + return { + queryKey, + queryFn, + enabled: !!(catalog && schema && table), + ...queryOptions, + } as UseQueryOptions< + Awaited>, + TError, + TData + > & { queryKey: DataTag }; }; -export type BatchSaveRulesMutationResult = NonNullable< - Awaited> +export type GetTableTagsQueryResult = NonNullable< + Awaited> >; -export type BatchSaveRulesMutationBody = BatchSaveRulesIn; -export type BatchSaveRulesMutationError = AxiosError; +export type GetTableTagsQueryError = AxiosError; -/** - * @summary Batch Save Rules - */ -export const useBatchSaveRules = < +export function useGetTableTags< + TData = Awaited>, + TError = AxiosError, +>( + catalog: string, + schema: string, + table: string, + options: { + query: Partial< + UseQueryOptions>, TError, TData> + > & + Pick< + DefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): DefinedUseQueryResult & { + queryKey: DataTag; +}; +export function useGetTableTags< + TData = Awaited>, TError = AxiosError, - TContext = unknown, >( + catalog: string, + schema: string, + table: string, options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: BatchSaveRulesIn }, - TContext + query?: Partial< + UseQueryOptions>, TError, TData> + > & + Pick< + UndefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +}; +export function useGetTableTags< + TData = Awaited>, + TError = AxiosError, +>( + catalog: string, + schema: string, + table: string, + options?: { + query?: Partial< + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - { data: BatchSaveRulesIn }, - TContext -> => { - const mutationOptions = getBatchSaveRulesMutationOptions(options); - - return useMutation(mutationOptions, queryClient); +): UseQueryResult & { + queryKey: DataTag; }; - /** - * Check if any of the provided checks already exist for the given table. - * @summary Check Duplicates + * @summary Get Table Tags */ -export const checkDuplicates = ( - checkDuplicatesIn: CheckDuplicatesIn, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post( - `/api/v1/rules/check-duplicates`, - checkDuplicatesIn, - options, - ); -}; -export const getCheckDuplicatesMutationOptions = < +export function useGetTableTags< + TData = Awaited>, TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: CheckDuplicatesIn }, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - { data: CheckDuplicatesIn }, - TContext -> => { - const mutationKey = ["checkDuplicates"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; - - const mutationFn: MutationFunction< - Awaited>, - { data: CheckDuplicatesIn } - > = (props) => { - const { data } = props ?? {}; +>( + catalog: string, + schema: string, + table: string, + options?: { + query?: Partial< + UseQueryOptions>, TError, TData> + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +} { + const queryOptions = getGetTableTagsQueryOptions( + catalog, + schema, + table, + options, + ); - return checkDuplicates(data, axiosOptions); - }; + const query = useQuery(queryOptions, queryClient) as UseQueryResult< + TData, + TError + > & { queryKey: DataTag }; - return { mutationFn, ...mutationOptions }; -}; + query.queryKey = queryOptions.queryKey; -export type CheckDuplicatesMutationResult = NonNullable< - Awaited> ->; -export type CheckDuplicatesMutationBody = CheckDuplicatesIn; -export type CheckDuplicatesMutationError = AxiosError; + return query; +} -/** - * @summary Check Duplicates - */ -export const useCheckDuplicates = < +export const getGetTableTagsSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, - TContext = unknown, >( + catalog: string, + schema: string, + table: string, options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: CheckDuplicatesIn }, - TContext + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, - queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - { data: CheckDuplicatesIn }, - TContext -> => { - const mutationOptions = getCheckDuplicatesMutationOptions(options); +) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; - return useMutation(mutationOptions, queryClient); -}; + const queryKey = + queryOptions?.queryKey ?? getGetTableTagsQueryKey(catalog, schema, table); -/** - * Delete a single rule by rule_id. + const queryFn: QueryFunction>> = ({ + signal, + }) => getTableTags(catalog, schema, table, { signal, ...axiosOptions }); -Authors can only delete rules they themselves created. Admins and -approvers may delete any rule. - * @summary Delete Rule - */ -export const deleteRule = ( - ruleId: string, + return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > & { queryKey: DataTag }; +}; + +export type GetTableTagsSuspenseQueryResult = NonNullable< + Awaited> +>; +export type GetTableTagsSuspenseQueryError = AxiosError; + +export function useGetTableTagsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + catalog: string, + schema: string, + table: string, + options: { + query: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +export function useGetTableTagsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + catalog: string, + schema: string, + table: string, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +export function useGetTableTagsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + catalog: string, + schema: string, + table: string, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +/** + * @summary Get Table Tags + */ + +export function useGetTableTagsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + catalog: string, + schema: string, + table: string, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +} { + const queryOptions = getGetTableTagsSuspenseQueryOptions( + catalog, + schema, + table, + options, + ); + + const query = useSuspenseQuery( + queryOptions, + queryClient, + ) as UseSuspenseQueryResult & { + queryKey: DataTag; + }; + + query.queryKey = queryOptions.queryKey; + + return query; +} + +/** + * Return only the tables that contain ALL of the required columns. + * @summary Filter Tables By Columns + */ +export const filterTablesByColumns = ( + filterTablesByColumnsIn: FilterTablesByColumnsIn, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.delete(`/api/v1/rules/${ruleId}`, options); +): Promise> => { + return axios.default.post( + `/api/v1/discovery/filter-tables-by-columns`, + filterTablesByColumnsIn, + options, + ); }; -export const getDeleteRuleMutationOptions = < +export const getFilterTablesByColumnsMutationOptions = < TError = AxiosError, TContext = unknown, >(options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { ruleId: string }, + { data: FilterTablesByColumnsIn }, TContext >; axios?: AxiosRequestConfig; }): UseMutationOptions< - Awaited>, + Awaited>, TError, - { ruleId: string }, + { data: FilterTablesByColumnsIn }, TContext > => { - const mutationKey = ["deleteRule"]; + const mutationKey = ["filterTablesByColumns"]; const { mutation: mutationOptions, axios: axiosOptions } = options ? options.mutation && "mutationKey" in options.mutation && @@ -9164,88 +9119,85 @@ export const getDeleteRuleMutationOptions = < : { mutation: { mutationKey }, axios: undefined }; const mutationFn: MutationFunction< - Awaited>, - { ruleId: string } + Awaited>, + { data: FilterTablesByColumnsIn } > = (props) => { - const { ruleId } = props ?? {}; + const { data } = props ?? {}; - return deleteRule(ruleId, axiosOptions); + return filterTablesByColumns(data, axiosOptions); }; return { mutationFn, ...mutationOptions }; }; -export type DeleteRuleMutationResult = NonNullable< - Awaited> +export type FilterTablesByColumnsMutationResult = NonNullable< + Awaited> >; - -export type DeleteRuleMutationError = AxiosError; +export type FilterTablesByColumnsMutationBody = FilterTablesByColumnsIn; +export type FilterTablesByColumnsMutationError = + AxiosError; /** - * @summary Delete Rule + * @summary Filter Tables By Columns */ -export const useDeleteRule = < +export const useFilterTablesByColumns = < TError = AxiosError, TContext = unknown, >( options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { ruleId: string }, + { data: FilterTablesByColumnsIn }, TContext >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, ): UseMutationResult< - Awaited>, + Awaited>, TError, - { ruleId: string }, + { data: FilterTablesByColumnsIn }, TContext > => { - const mutationOptions = getDeleteRuleMutationOptions(options); + const mutationOptions = getFilterTablesByColumnsMutationOptions(options); return useMutation(mutationOptions, queryClient); }; /** - * Submit an individual rule for approval. - -Authors can only submit rules they themselves drafted. Admins and -approvers may submit any rule. - * @summary Submit For Approval + * Generate data quality checks from natural language using AI-assisted generation. + * @summary Ai Generate Checks */ -export const submitRuleForApproval = ( - ruleId: string, - submitRuleForApprovalBody: SubmitRuleForApprovalBody, +export const aiAssistedChecksGeneration = ( + generateChecksIn: GenerateChecksIn, options?: AxiosRequestConfig, -): Promise> => { +): Promise> => { return axios.default.post( - `/api/v1/rules/${ruleId}/submit`, - submitRuleForApprovalBody, + `/api/v1/ai/generate-checks`, + generateChecksIn, options, ); }; -export const getSubmitRuleForApprovalMutationOptions = < +export const getAiAssistedChecksGenerationMutationOptions = < TError = AxiosError, TContext = unknown, >(options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { ruleId: string; data: SubmitRuleForApprovalBody }, + { data: GenerateChecksIn }, TContext >; axios?: AxiosRequestConfig; }): UseMutationOptions< - Awaited>, + Awaited>, TError, - { ruleId: string; data: SubmitRuleForApprovalBody }, + { data: GenerateChecksIn }, TContext > => { - const mutationKey = ["submitRuleForApproval"]; + const mutationKey = ["aiAssistedChecksGeneration"]; const { mutation: mutationOptions, axios: axiosOptions } = options ? options.mutation && "mutationKey" in options.mutation && @@ -9255,347 +9207,2081 @@ export const getSubmitRuleForApprovalMutationOptions = < : { mutation: { mutationKey }, axios: undefined }; const mutationFn: MutationFunction< - Awaited>, - { ruleId: string; data: SubmitRuleForApprovalBody } + Awaited>, + { data: GenerateChecksIn } > = (props) => { - const { ruleId, data } = props ?? {}; + const { data } = props ?? {}; - return submitRuleForApproval(ruleId, data, axiosOptions); + return aiAssistedChecksGeneration(data, axiosOptions); }; return { mutationFn, ...mutationOptions }; }; -export type SubmitRuleForApprovalMutationResult = NonNullable< - Awaited> +export type AiAssistedChecksGenerationMutationResult = NonNullable< + Awaited> >; -export type SubmitRuleForApprovalMutationBody = SubmitRuleForApprovalBody; -export type SubmitRuleForApprovalMutationError = +export type AiAssistedChecksGenerationMutationBody = GenerateChecksIn; +export type AiAssistedChecksGenerationMutationError = AxiosError; /** - * @summary Submit For Approval + * @summary Ai Generate Checks */ -export const useSubmitRuleForApproval = < +export const useAiAssistedChecksGeneration = < TError = AxiosError, TContext = unknown, >( options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { ruleId: string; data: SubmitRuleForApprovalBody }, + { data: GenerateChecksIn }, TContext >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, ): UseMutationResult< - Awaited>, + Awaited>, TError, - { ruleId: string; data: SubmitRuleForApprovalBody }, + { data: GenerateChecksIn }, TContext > => { - const mutationOptions = getSubmitRuleForApprovalMutationOptions(options); + const mutationOptions = getAiAssistedChecksGenerationMutationOptions(options); return useMutation(mutationOptions, queryClient); }; /** - * Revoke a pending submission back to draft. - -Authors can only revoke their own submissions. Admins and approvers may -revoke any submission (paired with the existing ownership check on -``submit_for_approval`` so an author can't reach in and revoke someone -else's submission they didn't make). - * @summary Revoke Submission + * List rules filtered to catalogs the current user can access. + * @summary List Rules */ -export const revokeSubmission = ( - ruleId: string, - revokeSubmissionBody: RevokeSubmissionBody, +export const listRules = ( + params?: ListRulesParams, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post( - `/api/v1/rules/${ruleId}/revoke`, - revokeSubmissionBody, - options, - ); +): Promise> => { + return axios.default.get(`/api/v1/rules`, { + ...options, + params: { ...params, ...options?.params }, + }); }; -export const getRevokeSubmissionMutationOptions = < +export const getListRulesQueryKey = (params?: ListRulesParams) => { + return [`/api/v1/rules`, ...(params ? [params] : [])] as const; +}; + +export const getListRulesQueryOptions = < + TData = Awaited>, TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { ruleId: string; data: RevokeSubmissionBody }, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - { ruleId: string; data: RevokeSubmissionBody }, - TContext -> => { - const mutationKey = ["revokeSubmission"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; +>( + params?: ListRulesParams, + options?: { + query?: Partial< + UseQueryOptions>, TError, TData> + >; + axios?: AxiosRequestConfig; + }, +) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const mutationFn: MutationFunction< - Awaited>, - { ruleId: string; data: RevokeSubmissionBody } - > = (props) => { - const { ruleId, data } = props ?? {}; + const queryKey = queryOptions?.queryKey ?? getListRulesQueryKey(params); - return revokeSubmission(ruleId, data, axiosOptions); - }; + const queryFn: QueryFunction>> = ({ + signal, + }) => listRules(params, { signal, ...axiosOptions }); - return { mutationFn, ...mutationOptions }; + return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< + Awaited>, + TError, + TData + > & { queryKey: DataTag }; }; -export type RevokeSubmissionMutationResult = NonNullable< - Awaited> +export type ListRulesQueryResult = NonNullable< + Awaited> >; -export type RevokeSubmissionMutationBody = RevokeSubmissionBody; -export type RevokeSubmissionMutationError = AxiosError; +export type ListRulesQueryError = AxiosError; +export function useListRules< + TData = Awaited>, + TError = AxiosError, +>( + params: undefined | ListRulesParams, + options: { + query: Partial< + UseQueryOptions>, TError, TData> + > & + Pick< + DefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): DefinedUseQueryResult & { + queryKey: DataTag; +}; +export function useListRules< + TData = Awaited>, + TError = AxiosError, +>( + params?: ListRulesParams, + options?: { + query?: Partial< + UseQueryOptions>, TError, TData> + > & + Pick< + UndefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +}; +export function useListRules< + TData = Awaited>, + TError = AxiosError, +>( + params?: ListRulesParams, + options?: { + query?: Partial< + UseQueryOptions>, TError, TData> + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +}; /** - * @summary Revoke Submission + * @summary List Rules */ -export const useRevokeSubmission = < + +export function useListRules< + TData = Awaited>, TError = AxiosError, - TContext = unknown, >( + params?: ListRulesParams, options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { ruleId: string; data: RevokeSubmissionBody }, - TContext + query?: Partial< + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - { ruleId: string; data: RevokeSubmissionBody }, - TContext -> => { - const mutationOptions = getRevokeSubmissionMutationOptions(options); +): UseQueryResult & { + queryKey: DataTag; +} { + const queryOptions = getListRulesQueryOptions(params, options); - return useMutation(mutationOptions, queryClient); -}; + const query = useQuery(queryOptions, queryClient) as UseQueryResult< + TData, + TError + > & { queryKey: DataTag }; -/** - * Approve an individual rule. - * @summary Approve Rules - */ -export const approveRule = ( - ruleId: string, - approveRuleBody: ApproveRuleBody, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post( - `/api/v1/rules/${ruleId}/approve`, - approveRuleBody, - options, - ); -}; + query.queryKey = queryOptions.queryKey; -export const getApproveRuleMutationOptions = < + return query; +} + +export const getListRulesSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { ruleId: string; data: ApproveRuleBody }, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - { ruleId: string; data: ApproveRuleBody }, - TContext -> => { - const mutationKey = ["approveRule"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; +>( + params?: ListRulesParams, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, +) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const mutationFn: MutationFunction< - Awaited>, - { ruleId: string; data: ApproveRuleBody } - > = (props) => { - const { ruleId, data } = props ?? {}; + const queryKey = queryOptions?.queryKey ?? getListRulesQueryKey(params); - return approveRule(ruleId, data, axiosOptions); - }; + const queryFn: QueryFunction>> = ({ + signal, + }) => listRules(params, { signal, ...axiosOptions }); - return { mutationFn, ...mutationOptions }; + return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > & { queryKey: DataTag }; }; -export type ApproveRuleMutationResult = NonNullable< - Awaited> +export type ListRulesSuspenseQueryResult = NonNullable< + Awaited> >; -export type ApproveRuleMutationBody = ApproveRuleBody; -export type ApproveRuleMutationError = AxiosError; +export type ListRulesSuspenseQueryError = AxiosError; -/** - * @summary Approve Rules +export function useListRulesSuspense< + TData = Awaited>, + TError = AxiosError, +>( + params: undefined | ListRulesParams, + options: { + query: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +export function useListRulesSuspense< + TData = Awaited>, + TError = AxiosError, +>( + params?: ListRulesParams, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +export function useListRulesSuspense< + TData = Awaited>, + TError = AxiosError, +>( + params?: ListRulesParams, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +/** + * @summary List Rules + */ + +export function useListRulesSuspense< + TData = Awaited>, + TError = AxiosError, +>( + params?: ListRulesParams, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +} { + const queryOptions = getListRulesSuspenseQueryOptions(params, options); + + const query = useSuspenseQuery( + queryOptions, + queryClient, + ) as UseSuspenseQueryResult & { + queryKey: DataTag; + }; + + query.queryKey = queryOptions.queryKey; + + return query; +} + +/** + * Save rules. Each check becomes an individual rule row. + +If ``rule_id`` is set, this updates an existing rule. Authors can only +update rules they themselves authored — otherwise they could silently +overwrite the contents of another user's rule (and then chain it with +submit/delete, which our other gates would reject only by accident). +Admins and approvers can update any rule. + * @summary Save Rules + */ +export const saveRules = ( + saveRulesIn: SaveRulesIn, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.post(`/api/v1/rules`, saveRulesIn, options); +}; + +export const getSaveRulesMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: SaveRulesIn }, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + { data: SaveRulesIn }, + TContext +> => { + const mutationKey = ["saveRules"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + { data: SaveRulesIn } + > = (props) => { + const { data } = props ?? {}; + + return saveRules(data, axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; +}; + +export type SaveRulesMutationResult = NonNullable< + Awaited> +>; +export type SaveRulesMutationBody = SaveRulesIn; +export type SaveRulesMutationError = AxiosError; + +/** + * @summary Save Rules + */ +export const useSaveRules = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: SaveRulesIn }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { data: SaveRulesIn }, + TContext +> => { + const mutationOptions = getSaveRulesMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + +/** + * Get all individual rules for a specific table. + * @summary Get Rules + */ +export const getRules = ( + tableFqn: string, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.get(`/api/v1/rules/${tableFqn}`, options); +}; + +export const getGetRulesQueryKey = (tableFqn?: string) => { + return [`/api/v1/rules/${tableFqn}`] as const; +}; + +export const getGetRulesQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>( + tableFqn: string, + options?: { + query?: Partial< + UseQueryOptions>, TError, TData> + >; + axios?: AxiosRequestConfig; + }, +) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; + + const queryKey = queryOptions?.queryKey ?? getGetRulesQueryKey(tableFqn); + + const queryFn: QueryFunction>> = ({ + signal, + }) => getRules(tableFqn, { signal, ...axiosOptions }); + + return { + queryKey, + queryFn, + enabled: !!tableFqn, + ...queryOptions, + } as UseQueryOptions>, TError, TData> & { + queryKey: DataTag; + }; +}; + +export type GetRulesQueryResult = NonNullable< + Awaited> +>; +export type GetRulesQueryError = AxiosError; + +export function useGetRules< + TData = Awaited>, + TError = AxiosError, +>( + tableFqn: string, + options: { + query: Partial< + UseQueryOptions>, TError, TData> + > & + Pick< + DefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): DefinedUseQueryResult & { + queryKey: DataTag; +}; +export function useGetRules< + TData = Awaited>, + TError = AxiosError, +>( + tableFqn: string, + options?: { + query?: Partial< + UseQueryOptions>, TError, TData> + > & + Pick< + UndefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +}; +export function useGetRules< + TData = Awaited>, + TError = AxiosError, +>( + tableFqn: string, + options?: { + query?: Partial< + UseQueryOptions>, TError, TData> + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +}; +/** + * @summary Get Rules + */ + +export function useGetRules< + TData = Awaited>, + TError = AxiosError, +>( + tableFqn: string, + options?: { + query?: Partial< + UseQueryOptions>, TError, TData> + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +} { + const queryOptions = getGetRulesQueryOptions(tableFqn, options); + + const query = useQuery(queryOptions, queryClient) as UseQueryResult< + TData, + TError + > & { queryKey: DataTag }; + + query.queryKey = queryOptions.queryKey; + + return query; +} + +export const getGetRulesSuspenseQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>( + tableFqn: string, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, +) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; + + const queryKey = queryOptions?.queryKey ?? getGetRulesQueryKey(tableFqn); + + const queryFn: QueryFunction>> = ({ + signal, + }) => getRules(tableFqn, { signal, ...axiosOptions }); + + return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > & { queryKey: DataTag }; +}; + +export type GetRulesSuspenseQueryResult = NonNullable< + Awaited> +>; +export type GetRulesSuspenseQueryError = AxiosError; + +export function useGetRulesSuspense< + TData = Awaited>, + TError = AxiosError, +>( + tableFqn: string, + options: { + query: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +export function useGetRulesSuspense< + TData = Awaited>, + TError = AxiosError, +>( + tableFqn: string, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +export function useGetRulesSuspense< + TData = Awaited>, + TError = AxiosError, +>( + tableFqn: string, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +/** + * @summary Get Rules + */ + +export function useGetRulesSuspense< + TData = Awaited>, + TError = AxiosError, +>( + tableFqn: string, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +} { + const queryOptions = getGetRulesSuspenseQueryOptions(tableFqn, options); + + const query = useSuspenseQuery( + queryOptions, + queryClient, + ) as UseSuspenseQueryResult & { + queryKey: DataTag; + }; + + query.queryKey = queryOptions.queryKey; + + return query; +} + +/** + * Save the same set of checks to multiple tables (reusable rules). + * @summary Batch Save Rules + */ +export const batchSaveRules = ( + batchSaveRulesIn: BatchSaveRulesIn, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.post(`/api/v1/rules/batch`, batchSaveRulesIn, options); +}; + +export const getBatchSaveRulesMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: BatchSaveRulesIn }, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + { data: BatchSaveRulesIn }, + TContext +> => { + const mutationKey = ["batchSaveRules"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + { data: BatchSaveRulesIn } + > = (props) => { + const { data } = props ?? {}; + + return batchSaveRules(data, axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; +}; + +export type BatchSaveRulesMutationResult = NonNullable< + Awaited> +>; +export type BatchSaveRulesMutationBody = BatchSaveRulesIn; +export type BatchSaveRulesMutationError = AxiosError; + +/** + * @summary Batch Save Rules + */ +export const useBatchSaveRules = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: BatchSaveRulesIn }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { data: BatchSaveRulesIn }, + TContext +> => { + const mutationOptions = getBatchSaveRulesMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + +/** + * Check if any of the provided checks already exist for the given table. + * @summary Check Duplicates + */ +export const checkDuplicates = ( + checkDuplicatesIn: CheckDuplicatesIn, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.post( + `/api/v1/rules/check-duplicates`, + checkDuplicatesIn, + options, + ); +}; + +export const getCheckDuplicatesMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: CheckDuplicatesIn }, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + { data: CheckDuplicatesIn }, + TContext +> => { + const mutationKey = ["checkDuplicates"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + { data: CheckDuplicatesIn } + > = (props) => { + const { data } = props ?? {}; + + return checkDuplicates(data, axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; +}; + +export type CheckDuplicatesMutationResult = NonNullable< + Awaited> +>; +export type CheckDuplicatesMutationBody = CheckDuplicatesIn; +export type CheckDuplicatesMutationError = AxiosError; + +/** + * @summary Check Duplicates + */ +export const useCheckDuplicates = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: CheckDuplicatesIn }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { data: CheckDuplicatesIn }, + TContext +> => { + const mutationOptions = getCheckDuplicatesMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + +/** + * Delete a single rule by rule_id. + +Authors can only delete rules they themselves created. Admins and +approvers may delete any rule. + * @summary Delete Rule + */ +export const deleteRule = ( + ruleId: string, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.delete(`/api/v1/rules/${ruleId}`, options); +}; + +export const getDeleteRuleMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { ruleId: string }, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + { ruleId: string }, + TContext +> => { + const mutationKey = ["deleteRule"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + { ruleId: string } + > = (props) => { + const { ruleId } = props ?? {}; + + return deleteRule(ruleId, axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; +}; + +export type DeleteRuleMutationResult = NonNullable< + Awaited> +>; + +export type DeleteRuleMutationError = AxiosError; + +/** + * @summary Delete Rule + */ +export const useDeleteRule = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { ruleId: string }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { ruleId: string }, + TContext +> => { + const mutationOptions = getDeleteRuleMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + +/** + * Submit an individual rule for approval. + +Authors can only submit rules they themselves drafted. Admins and +approvers may submit any rule. + * @summary Submit For Approval + */ +export const submitRuleForApproval = ( + ruleId: string, + submitRuleForApprovalBody: SubmitRuleForApprovalBody, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.post( + `/api/v1/rules/${ruleId}/submit`, + submitRuleForApprovalBody, + options, + ); +}; + +export const getSubmitRuleForApprovalMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { ruleId: string; data: SubmitRuleForApprovalBody }, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + { ruleId: string; data: SubmitRuleForApprovalBody }, + TContext +> => { + const mutationKey = ["submitRuleForApproval"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + { ruleId: string; data: SubmitRuleForApprovalBody } + > = (props) => { + const { ruleId, data } = props ?? {}; + + return submitRuleForApproval(ruleId, data, axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; +}; + +export type SubmitRuleForApprovalMutationResult = NonNullable< + Awaited> +>; +export type SubmitRuleForApprovalMutationBody = SubmitRuleForApprovalBody; +export type SubmitRuleForApprovalMutationError = + AxiosError; + +/** + * @summary Submit For Approval + */ +export const useSubmitRuleForApproval = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { ruleId: string; data: SubmitRuleForApprovalBody }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { ruleId: string; data: SubmitRuleForApprovalBody }, + TContext +> => { + const mutationOptions = getSubmitRuleForApprovalMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + +/** + * Revoke a pending submission back to draft. + +Authors can only revoke their own submissions. Admins and approvers may +revoke any submission (paired with the existing ownership check on +``submit_for_approval`` so an author can't reach in and revoke someone +else's submission they didn't make). + * @summary Revoke Submission + */ +export const revokeSubmission = ( + ruleId: string, + revokeSubmissionBody: RevokeSubmissionBody, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.post( + `/api/v1/rules/${ruleId}/revoke`, + revokeSubmissionBody, + options, + ); +}; + +export const getRevokeSubmissionMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { ruleId: string; data: RevokeSubmissionBody }, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + { ruleId: string; data: RevokeSubmissionBody }, + TContext +> => { + const mutationKey = ["revokeSubmission"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + { ruleId: string; data: RevokeSubmissionBody } + > = (props) => { + const { ruleId, data } = props ?? {}; + + return revokeSubmission(ruleId, data, axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; +}; + +export type RevokeSubmissionMutationResult = NonNullable< + Awaited> +>; +export type RevokeSubmissionMutationBody = RevokeSubmissionBody; +export type RevokeSubmissionMutationError = AxiosError; + +/** + * @summary Revoke Submission + */ +export const useRevokeSubmission = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { ruleId: string; data: RevokeSubmissionBody }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { ruleId: string; data: RevokeSubmissionBody }, + TContext +> => { + const mutationOptions = getRevokeSubmissionMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + +/** + * Approve an individual rule. + * @summary Approve Rules + */ +export const approveRule = ( + ruleId: string, + approveRuleBody: ApproveRuleBody, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.post( + `/api/v1/rules/${ruleId}/approve`, + approveRuleBody, + options, + ); +}; + +export const getApproveRuleMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { ruleId: string; data: ApproveRuleBody }, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + { ruleId: string; data: ApproveRuleBody }, + TContext +> => { + const mutationKey = ["approveRule"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + { ruleId: string; data: ApproveRuleBody } + > = (props) => { + const { ruleId, data } = props ?? {}; + + return approveRule(ruleId, data, axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; +}; + +export type ApproveRuleMutationResult = NonNullable< + Awaited> +>; +export type ApproveRuleMutationBody = ApproveRuleBody; +export type ApproveRuleMutationError = AxiosError; + +/** + * @summary Approve Rules + */ +export const useApproveRule = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { ruleId: string; data: ApproveRuleBody }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { ruleId: string; data: ApproveRuleBody }, + TContext +> => { + const mutationOptions = getApproveRuleMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + +/** + * Assign UUIDs to rules that are missing a rule_id (legacy rows). + * @summary Backfill Rule Ids + */ +export const backfillRuleIds = ( + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.post(`/api/v1/rules/backfill-ids`, undefined, options); +}; + +export const getBackfillRuleIdsMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + void, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + void, + TContext +> => { + const mutationKey = ["backfillRuleIds"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + void + > = () => { + return backfillRuleIds(axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; +}; + +export type BackfillRuleIdsMutationResult = NonNullable< + Awaited> +>; + +export type BackfillRuleIdsMutationError = AxiosError; + +/** + * @summary Backfill Rule Ids + */ +export const useBackfillRuleIds = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + void, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + void, + TContext +> => { + const mutationOptions = getBackfillRuleIdsMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + +/** + * Reject an individual rule. + * @summary Reject Rules + */ +export const rejectRule = ( + ruleId: string, + rejectRuleBody: RejectRuleBody, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.post( + `/api/v1/rules/${ruleId}/reject`, + rejectRuleBody, + options, + ); +}; + +export const getRejectRuleMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { ruleId: string; data: RejectRuleBody }, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + { ruleId: string; data: RejectRuleBody }, + TContext +> => { + const mutationKey = ["rejectRule"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + { ruleId: string; data: RejectRuleBody } + > = (props) => { + const { ruleId, data } = props ?? {}; + + return rejectRule(ruleId, data, axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; +}; + +export type RejectRuleMutationResult = NonNullable< + Awaited> +>; +export type RejectRuleMutationBody = RejectRuleBody; +export type RejectRuleMutationError = AxiosError; + +/** + * @summary Reject Rules + */ +export const useRejectRule = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { ruleId: string; data: RejectRuleBody }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { ruleId: string; data: RejectRuleBody }, + TContext +> => { + const mutationOptions = getRejectRuleMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + +/** + * Validate a list of check definitions without saving them. + * @summary Validate Checks + */ +export const validateChecks = ( + validateChecksIn: ValidateChecksIn, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.post( + `/api/v1/rules/validate-checks`, + validateChecksIn, + options, + ); +}; + +export const getValidateChecksMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: ValidateChecksIn }, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + { data: ValidateChecksIn }, + TContext +> => { + const mutationKey = ["validateChecks"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + { data: ValidateChecksIn } + > = (props) => { + const { data } = props ?? {}; + + return validateChecks(data, axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; +}; + +export type ValidateChecksMutationResult = NonNullable< + Awaited> +>; +export type ValidateChecksMutationBody = ValidateChecksIn; +export type ValidateChecksMutationError = AxiosError; + +/** + * @summary Validate Checks + */ +export const useValidateChecks = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: ValidateChecksIn }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { data: ValidateChecksIn }, + TContext +> => { + const mutationOptions = getValidateChecksMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + +/** + * Return every DQX check function the single-table editor should offer. + +The response is built by introspecting DQX's own registry, so adding +a new ``@register_rule("row")`` to ``check_funcs.py`` is enough to +surface it in the UI on the next backend deploy — no frontend change +required. + * @summary List Check Functions + */ +export const listCheckFunctions = ( + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.get(`/api/v1/check-functions`, options); +}; + +export const getListCheckFunctionsQueryKey = () => { + return [`/api/v1/check-functions`] as const; +}; + +export const getListCheckFunctionsQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>(options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; +}) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; + + const queryKey = queryOptions?.queryKey ?? getListCheckFunctionsQueryKey(); + + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => listCheckFunctions({ signal, ...axiosOptions }); + + return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< + Awaited>, + TError, + TData + > & { queryKey: DataTag }; +}; + +export type ListCheckFunctionsQueryResult = NonNullable< + Awaited> +>; +export type ListCheckFunctionsQueryError = AxiosError; + +export function useListCheckFunctions< + TData = Awaited>, + TError = AxiosError, +>( + options: { + query: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + > & + Pick< + DefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): DefinedUseQueryResult & { + queryKey: DataTag; +}; +export function useListCheckFunctions< + TData = Awaited>, + TError = AxiosError, +>( + options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + > & + Pick< + UndefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +}; +export function useListCheckFunctions< + TData = Awaited>, + TError = AxiosError, +>( + options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +}; +/** + * @summary List Check Functions + */ + +export function useListCheckFunctions< + TData = Awaited>, + TError = AxiosError, +>( + options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +} { + const queryOptions = getListCheckFunctionsQueryOptions(options); + + const query = useQuery(queryOptions, queryClient) as UseQueryResult< + TData, + TError + > & { queryKey: DataTag }; + + query.queryKey = queryOptions.queryKey; + + return query; +} + +export const getListCheckFunctionsSuspenseQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>(options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; +}) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; + + const queryKey = queryOptions?.queryKey ?? getListCheckFunctionsQueryKey(); + + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => listCheckFunctions({ signal, ...axiosOptions }); + + return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > & { queryKey: DataTag }; +}; + +export type ListCheckFunctionsSuspenseQueryResult = NonNullable< + Awaited> +>; +export type ListCheckFunctionsSuspenseQueryError = AxiosError; + +export function useListCheckFunctionsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + options: { + query: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +export function useListCheckFunctionsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +export function useListCheckFunctionsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +/** + * @summary List Check Functions + */ + +export function useListCheckFunctionsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +} { + const queryOptions = getListCheckFunctionsSuspenseQueryOptions(options); + + const query = useSuspenseQuery( + queryOptions, + queryClient, + ) as UseSuspenseQueryResult & { + queryKey: DataTag; + }; + + query.queryKey = queryOptions.queryKey; + + return query; +} + +/** + * Return validation (dry-run) history filtered to user-accessible catalogs. + * @summary List Validation Runs + */ +export const listValidationRuns = ( + params?: ListValidationRunsParams, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.get(`/api/v1/dryrun/runs`, { + ...options, + params: { ...params, ...options?.params }, + }); +}; + +export const getListValidationRunsQueryKey = ( + params?: ListValidationRunsParams, +) => { + return [`/api/v1/dryrun/runs`, ...(params ? [params] : [])] as const; +}; + +export const getListValidationRunsQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>( + params?: ListValidationRunsParams, + options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, +) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; + + const queryKey = + queryOptions?.queryKey ?? getListValidationRunsQueryKey(params); + + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => listValidationRuns(params, { signal, ...axiosOptions }); + + return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< + Awaited>, + TError, + TData + > & { queryKey: DataTag }; +}; + +export type ListValidationRunsQueryResult = NonNullable< + Awaited> +>; +export type ListValidationRunsQueryError = AxiosError; + +export function useListValidationRuns< + TData = Awaited>, + TError = AxiosError, +>( + params: undefined | ListValidationRunsParams, + options: { + query: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + > & + Pick< + DefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): DefinedUseQueryResult & { + queryKey: DataTag; +}; +export function useListValidationRuns< + TData = Awaited>, + TError = AxiosError, +>( + params?: ListValidationRunsParams, + options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + > & + Pick< + UndefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +}; +export function useListValidationRuns< + TData = Awaited>, + TError = AxiosError, +>( + params?: ListValidationRunsParams, + options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseQueryResult & { + queryKey: DataTag; +}; +/** + * @summary List Validation Runs */ -export const useApproveRule = < + +export function useListValidationRuns< + TData = Awaited>, TError = AxiosError, - TContext = unknown, >( + params?: ListValidationRunsParams, options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { ruleId: string; data: ApproveRuleBody }, - TContext + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - { ruleId: string; data: ApproveRuleBody }, - TContext -> => { - const mutationOptions = getApproveRuleMutationOptions(options); +): UseQueryResult & { + queryKey: DataTag; +} { + const queryOptions = getListValidationRunsQueryOptions(params, options); - return useMutation(mutationOptions, queryClient); -}; + const query = useQuery(queryOptions, queryClient) as UseQueryResult< + TData, + TError + > & { queryKey: DataTag }; -/** - * Assign UUIDs to rules that are missing a rule_id (legacy rows). - * @summary Backfill Rule Ids - */ -export const backfillRuleIds = ( - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post(`/api/v1/rules/backfill-ids`, undefined, options); -}; + query.queryKey = queryOptions.queryKey; -export const getBackfillRuleIdsMutationOptions = < + return query; +} + +export const getListValidationRunsSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - void, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - void, - TContext -> => { - const mutationKey = ["backfillRuleIds"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; +>( + params?: ListValidationRunsParams, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, +) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const mutationFn: MutationFunction< - Awaited>, - void - > = () => { - return backfillRuleIds(axiosOptions); - }; + const queryKey = + queryOptions?.queryKey ?? getListValidationRunsQueryKey(params); - return { mutationFn, ...mutationOptions }; + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => listValidationRuns(params, { signal, ...axiosOptions }); + + return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > & { queryKey: DataTag }; }; -export type BackfillRuleIdsMutationResult = NonNullable< - Awaited> +export type ListValidationRunsSuspenseQueryResult = NonNullable< + Awaited> >; +export type ListValidationRunsSuspenseQueryError = + AxiosError; -export type BackfillRuleIdsMutationError = AxiosError; - +export function useListValidationRunsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + params: undefined | ListValidationRunsParams, + options: { + query: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +export function useListValidationRunsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + params?: ListValidationRunsParams, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; +export function useListValidationRunsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + params?: ListValidationRunsParams, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +}; /** - * @summary Backfill Rule Ids + * @summary List Validation Runs */ -export const useBackfillRuleIds = < + +export function useListValidationRunsSuspense< + TData = Awaited>, TError = AxiosError, - TContext = unknown, >( + params?: ListValidationRunsParams, options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - void, - TContext + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - void, - TContext -> => { - const mutationOptions = getBackfillRuleIdsMutationOptions(options); +): UseSuspenseQueryResult & { + queryKey: DataTag; +} { + const queryOptions = getListValidationRunsSuspenseQueryOptions( + params, + options, + ); - return useMutation(mutationOptions, queryClient); -}; + const query = useSuspenseQuery( + queryOptions, + queryClient, + ) as UseSuspenseQueryResult & { + queryKey: DataTag; + }; + + query.queryKey = queryOptions.queryKey; + + return query; +} /** - * Reject an individual rule. - * @summary Reject Rules + * Read approved checks from the rules catalog for the given tables and submit dry-run jobs. + * @summary Batch Run From Catalog */ -export const rejectRule = ( - ruleId: string, - rejectRuleBody: RejectRuleBody, +export const batchRunFromCatalog = ( + batchRunFromCatalogIn: BatchRunFromCatalogIn, options?: AxiosRequestConfig, -): Promise> => { +): Promise> => { return axios.default.post( - `/api/v1/rules/${ruleId}/reject`, - rejectRuleBody, + `/api/v1/dryrun/batch-from-catalog`, + batchRunFromCatalogIn, options, ); }; -export const getRejectRuleMutationOptions = < +export const getBatchRunFromCatalogMutationOptions = < TError = AxiosError, TContext = unknown, >(options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { ruleId: string; data: RejectRuleBody }, + { data: BatchRunFromCatalogIn }, TContext >; axios?: AxiosRequestConfig; }): UseMutationOptions< - Awaited>, + Awaited>, TError, - { ruleId: string; data: RejectRuleBody }, + { data: BatchRunFromCatalogIn }, TContext > => { - const mutationKey = ["rejectRule"]; + const mutationKey = ["batchRunFromCatalog"]; const { mutation: mutationOptions, axios: axiosOptions } = options ? options.mutation && "mutationKey" in options.mutation && @@ -9605,84 +11291,80 @@ export const getRejectRuleMutationOptions = < : { mutation: { mutationKey }, axios: undefined }; const mutationFn: MutationFunction< - Awaited>, - { ruleId: string; data: RejectRuleBody } + Awaited>, + { data: BatchRunFromCatalogIn } > = (props) => { - const { ruleId, data } = props ?? {}; + const { data } = props ?? {}; - return rejectRule(ruleId, data, axiosOptions); + return batchRunFromCatalog(data, axiosOptions); }; return { mutationFn, ...mutationOptions }; }; -export type RejectRuleMutationResult = NonNullable< - Awaited> +export type BatchRunFromCatalogMutationResult = NonNullable< + Awaited> >; -export type RejectRuleMutationBody = RejectRuleBody; -export type RejectRuleMutationError = AxiosError; +export type BatchRunFromCatalogMutationBody = BatchRunFromCatalogIn; +export type BatchRunFromCatalogMutationError = AxiosError; /** - * @summary Reject Rules + * @summary Batch Run From Catalog */ -export const useRejectRule = < +export const useBatchRunFromCatalog = < TError = AxiosError, TContext = unknown, >( options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { ruleId: string; data: RejectRuleBody }, + { data: BatchRunFromCatalogIn }, TContext >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, ): UseMutationResult< - Awaited>, + Awaited>, TError, - { ruleId: string; data: RejectRuleBody }, + { data: BatchRunFromCatalogIn }, TContext > => { - const mutationOptions = getRejectRuleMutationOptions(options); + const mutationOptions = getBatchRunFromCatalogMutationOptions(options); return useMutation(mutationOptions, queryClient); }; /** - * Validate a list of check definitions without saving them. - * @summary Validate Checks + * Validate checks, create a temporary view (OBO), and submit a dry-run job (SP). + * @summary Submit Dry Run */ -export const validateChecks = ( - validateChecksIn: ValidateChecksIn, +export const submitDryRun = ( + dryRunIn: DryRunIn, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post( - `/api/v1/rules/validate-checks`, - validateChecksIn, - options, - ); +): Promise> => { + return axios.default.post(`/api/v1/dryrun`, dryRunIn, options); }; -export const getValidateChecksMutationOptions = < +export const getSubmitDryRunMutationOptions = < TError = AxiosError, TContext = unknown, >(options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { data: ValidateChecksIn }, + { data: DryRunIn }, TContext >; axios?: AxiosRequestConfig; }): UseMutationOptions< - Awaited>, + Awaited>, TError, - { data: ValidateChecksIn }, + { data: DryRunIn }, TContext > => { - const mutationKey = ["validateChecks"]; + const mutationKey = ["submitDryRun"]; const { mutation: mutationOptions, axios: axiosOptions } = options ? options.mutation && "mutationKey" in options.mutation && @@ -9692,120 +11374,145 @@ export const getValidateChecksMutationOptions = < : { mutation: { mutationKey }, axios: undefined }; const mutationFn: MutationFunction< - Awaited>, - { data: ValidateChecksIn } + Awaited>, + { data: DryRunIn } > = (props) => { const { data } = props ?? {}; - return validateChecks(data, axiosOptions); + return submitDryRun(data, axiosOptions); }; return { mutationFn, ...mutationOptions }; }; -export type ValidateChecksMutationResult = NonNullable< - Awaited> +export type SubmitDryRunMutationResult = NonNullable< + Awaited> >; -export type ValidateChecksMutationBody = ValidateChecksIn; -export type ValidateChecksMutationError = AxiosError; +export type SubmitDryRunMutationBody = DryRunIn; +export type SubmitDryRunMutationError = AxiosError; /** - * @summary Validate Checks + * @summary Submit Dry Run */ -export const useValidateChecks = < +export const useSubmitDryRun = < TError = AxiosError, TContext = unknown, >( options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { data: ValidateChecksIn }, + { data: DryRunIn }, TContext >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, ): UseMutationResult< - Awaited>, + Awaited>, TError, - { data: ValidateChecksIn }, + { data: DryRunIn }, TContext > => { - const mutationOptions = getValidateChecksMutationOptions(options); + const mutationOptions = getSubmitDryRunMutationOptions(options); return useMutation(mutationOptions, queryClient); }; /** - * Return every DQX check function the single-table editor should offer. + * Poll the status of a dry-run job. Cleans up the view when job terminates. -The response is built by introspecting DQX's own registry, so adding -a new ``@register_rule("row")`` to ``check_funcs.py`` is enough to -surface it in the UI on the next backend deploy — no frontend change -required. - * @summary List Check Functions +When *job_run_id* and optionally *view_fqn* are supplied as query +parameters the endpoint skips the database lookup, which is required +for validation dry runs that are not recorded in the history table. +Ownership of the *job_run_id* is verified against the OBO caller via the +Databricks Jobs API so a client cannot use a guessed *view_fqn* to drop +another user's temporary view. + * @summary Get Dry Run Status */ -export const listCheckFunctions = ( +export const getDryRunStatus = ( + runId: string, + params?: GetDryRunStatusParams, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/check-functions`, options); +): Promise> => { + return axios.default.get(`/api/v1/dryrun/runs/${runId}/status`, { + ...options, + params: { ...params, ...options?.params }, + }); }; -export const getListCheckFunctionsQueryKey = () => { - return [`/api/v1/check-functions`] as const; +export const getGetDryRunStatusQueryKey = ( + runId?: string, + params?: GetDryRunStatusParams, +) => { + return [ + `/api/v1/dryrun/runs/${runId}/status`, + ...(params ? [params] : []), + ] as const; }; -export const getListCheckFunctionsQueryOptions = < - TData = Awaited>, - TError = AxiosError, ->(options?: { - query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; -}) => { +export const getGetDryRunStatusQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>( + runId: string, + params?: GetDryRunStatusParams, + options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, +) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getListCheckFunctionsQueryKey(); + const queryKey = + queryOptions?.queryKey ?? getGetDryRunStatusQueryKey(runId, params); - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => listCheckFunctions({ signal, ...axiosOptions }); + const queryFn: QueryFunction>> = ({ + signal, + }) => getDryRunStatus(runId, params, { signal, ...axiosOptions }); - return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< - Awaited>, + return { + queryKey, + queryFn, + enabled: !!runId, + ...queryOptions, + } as UseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListCheckFunctionsQueryResult = NonNullable< - Awaited> +export type GetDryRunStatusQueryResult = NonNullable< + Awaited> >; -export type ListCheckFunctionsQueryError = AxiosError; +export type GetDryRunStatusQueryError = AxiosError; -export function useListCheckFunctions< - TData = Awaited>, - TError = AxiosError, +export function useGetDryRunStatus< + TData = Awaited>, + TError = AxiosError, >( + runId: string, + params: undefined | GetDryRunStatusParams, options: { query: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -9815,23 +11522,25 @@ export function useListCheckFunctions< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useListCheckFunctions< - TData = Awaited>, - TError = AxiosError, +export function useGetDryRunStatus< + TData = Awaited>, + TError = AxiosError, >( + runId: string, + params?: GetDryRunStatusParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -9841,14 +11550,16 @@ export function useListCheckFunctions< ): UseQueryResult & { queryKey: DataTag; }; -export function useListCheckFunctions< - TData = Awaited>, - TError = AxiosError, +export function useGetDryRunStatus< + TData = Awaited>, + TError = AxiosError, >( + runId: string, + params?: GetDryRunStatusParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -9860,17 +11571,19 @@ export function useListCheckFunctions< queryKey: DataTag; }; /** - * @summary List Check Functions + * @summary Get Dry Run Status */ -export function useListCheckFunctions< - TData = Awaited>, - TError = AxiosError, +export function useGetDryRunStatus< + TData = Awaited>, + TError = AxiosError, >( + runId: string, + params?: GetDryRunStatusParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -9881,7 +11594,7 @@ export function useListCheckFunctions< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListCheckFunctionsQueryOptions(options); + const queryOptions = getGetDryRunStatusQueryOptions(runId, params, options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -9893,47 +11606,54 @@ export function useListCheckFunctions< return query; } -export const getListCheckFunctionsSuspenseQueryOptions = < - TData = Awaited>, - TError = AxiosError, ->(options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; -}) => { +export const getGetDryRunStatusSuspenseQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>( + runId: string, + params?: GetDryRunStatusParams, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, +) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getListCheckFunctionsQueryKey(); + const queryKey = + queryOptions?.queryKey ?? getGetDryRunStatusQueryKey(runId, params); - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => listCheckFunctions({ signal, ...axiosOptions }); + const queryFn: QueryFunction>> = ({ + signal, + }) => getDryRunStatus(runId, params, { signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListCheckFunctionsSuspenseQueryResult = NonNullable< - Awaited> +export type GetDryRunStatusSuspenseQueryResult = NonNullable< + Awaited> >; -export type ListCheckFunctionsSuspenseQueryError = AxiosError; - -export function useListCheckFunctionsSuspense< - TData = Awaited>, - TError = AxiosError, +export type GetDryRunStatusSuspenseQueryError = AxiosError; + +export function useGetDryRunStatusSuspense< + TData = Awaited>, + TError = AxiosError, >( + runId: string, + params: undefined | GetDryRunStatusParams, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -9944,14 +11664,16 @@ export function useListCheckFunctionsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListCheckFunctionsSuspense< - TData = Awaited>, - TError = AxiosError, +export function useGetDryRunStatusSuspense< + TData = Awaited>, + TError = AxiosError, >( + runId: string, + params?: GetDryRunStatusParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -9962,14 +11684,16 @@ export function useListCheckFunctionsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListCheckFunctionsSuspense< - TData = Awaited>, - TError = AxiosError, +export function useGetDryRunStatusSuspense< + TData = Awaited>, + TError = AxiosError, >( + runId: string, + params?: GetDryRunStatusParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -9981,17 +11705,19 @@ export function useListCheckFunctionsSuspense< queryKey: DataTag; }; /** - * @summary List Check Functions + * @summary Get Dry Run Status */ -export function useListCheckFunctionsSuspense< - TData = Awaited>, - TError = AxiosError, +export function useGetDryRunStatusSuspense< + TData = Awaited>, + TError = AxiosError, >( + runId: string, + params?: GetDryRunStatusParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -10002,7 +11728,11 @@ export function useListCheckFunctionsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListCheckFunctionsSuspenseQueryOptions(options); + const queryOptions = getGetDryRunStatusSuspenseQueryOptions( + runId, + params, + options, + ); const query = useSuspenseQuery( queryOptions, @@ -10017,234 +11747,221 @@ export function useListCheckFunctionsSuspense< } /** - * Return validation (dry-run) history filtered to user-accessible catalogs. - * @summary List Validation Runs + * Cancel a running dry-run job. + +When *job_run_id* is supplied as a query parameter the endpoint +skips the database lookup (needed for validation dry runs that were +not recorded in the history table). Ownership is still enforced via +the Databricks Jobs API — only admins/approvers may cancel others' runs. + +Note: a non-owner caller can confirm whether a given *job_run_id* exists +(and belongs to someone else) by observing the 403 response. This is +accepted — Databricks job IDs are large random integers and the response +leaks no identifying information beyond existence. + * @summary Cancel Dry Run */ -export const listValidationRuns = ( +export const cancelDryRun = ( + runId: string, + params?: CancelDryRunParams, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/dryrun/runs`, options); -}; - -export const getListValidationRunsQueryKey = () => { - return [`/api/v1/dryrun/runs`] as const; +): Promise> => { + return axios.default.post(`/api/v1/dryrun/runs/${runId}/cancel`, undefined, { + ...options, + params: { ...params, ...options?.params }, + }); }; -export const getListValidationRunsQueryOptions = < - TData = Awaited>, +export const getCancelDryRunMutationOptions = < TError = AxiosError, + TContext = unknown, >(options?: { - query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + mutation?: UseMutationOptions< + Awaited>, + TError, + { runId: string; params?: CancelDryRunParams }, + TContext >; axios?: AxiosRequestConfig; -}) => { - const { query: queryOptions, axios: axiosOptions } = options ?? {}; +}): UseMutationOptions< + Awaited>, + TError, + { runId: string; params?: CancelDryRunParams }, + TContext +> => { + const mutationKey = ["cancelDryRun"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; - const queryKey = queryOptions?.queryKey ?? getListValidationRunsQueryKey(); + const mutationFn: MutationFunction< + Awaited>, + { runId: string; params?: CancelDryRunParams } + > = (props) => { + const { runId, params } = props ?? {}; - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => listValidationRuns({ signal, ...axiosOptions }); + return cancelDryRun(runId, params, axiosOptions); + }; - return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< - Awaited>, - TError, - TData - > & { queryKey: DataTag }; + return { mutationFn, ...mutationOptions }; }; -export type ListValidationRunsQueryResult = NonNullable< - Awaited> +export type CancelDryRunMutationResult = NonNullable< + Awaited> >; -export type ListValidationRunsQueryError = AxiosError; -export function useListValidationRuns< - TData = Awaited>, - TError = AxiosError, ->( - options: { - query: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > - > & - Pick< - DefinedInitialDataOptions< - Awaited>, - TError, - Awaited> - >, - "initialData" - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): DefinedUseQueryResult & { - queryKey: DataTag; -}; -export function useListValidationRuns< - TData = Awaited>, - TError = AxiosError, ->( - options?: { - query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > - > & - Pick< - UndefinedInitialDataOptions< - Awaited>, - TError, - Awaited> - >, - "initialData" - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): UseQueryResult & { - queryKey: DataTag; -}; -export function useListValidationRuns< - TData = Awaited>, +export type CancelDryRunMutationError = AxiosError; + +/** + * @summary Cancel Dry Run + */ +export const useCancelDryRun = < TError = AxiosError, + TContext = unknown, >( options?: { - query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + mutation?: UseMutationOptions< + Awaited>, + TError, + { runId: string; params?: CancelDryRunParams }, + TContext >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseQueryResult & { - queryKey: DataTag; +): UseMutationResult< + Awaited>, + TError, + { runId: string; params?: CancelDryRunParams }, + TContext +> => { + const mutationOptions = getCancelDryRunMutationOptions(options); + + return useMutation(mutationOptions, queryClient); }; + /** - * @summary List Validation Runs + * Read dry-run results from the Delta table. + * @summary Get Dry Run Results */ +export const getDryRunResults = ( + runId: string, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.get(`/api/v1/dryrun/runs/${runId}/results`, options); +}; -export function useListValidationRuns< - TData = Awaited>, +export const getGetDryRunResultsQueryKey = (runId?: string) => { + return [`/api/v1/dryrun/runs/${runId}/results`] as const; +}; + +export const getGetDryRunResultsQueryOptions = < + TData = Awaited>, TError = AxiosError, >( + runId: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > >; axios?: AxiosRequestConfig; }, - queryClient?: QueryClient, -): UseQueryResult & { - queryKey: DataTag; -} { - const queryOptions = getListValidationRunsQueryOptions(options); - - const query = useQuery(queryOptions, queryClient) as UseQueryResult< - TData, - TError - > & { queryKey: DataTag }; - - query.queryKey = queryOptions.queryKey; - - return query; -} - -export const getListValidationRunsSuspenseQueryOptions = < - TData = Awaited>, - TError = AxiosError, ->(options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; -}) => { +) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getListValidationRunsQueryKey(); + const queryKey = queryOptions?.queryKey ?? getGetDryRunResultsQueryKey(runId); const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => listValidationRuns({ signal, ...axiosOptions }); + Awaited> + > = ({ signal }) => getDryRunResults(runId, { signal, ...axiosOptions }); - return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + return { + queryKey, + queryFn, + enabled: !!runId, + ...queryOptions, + } as UseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListValidationRunsSuspenseQueryResult = NonNullable< - Awaited> +export type GetDryRunResultsQueryResult = NonNullable< + Awaited> >; -export type ListValidationRunsSuspenseQueryError = - AxiosError; +export type GetDryRunResultsQueryError = AxiosError; -export function useListValidationRunsSuspense< - TData = Awaited>, +export function useGetDryRunResults< + TData = Awaited>, TError = AxiosError, >( + runId: string, options: { query: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > - >; + > & + Pick< + DefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useListValidationRunsSuspense< - TData = Awaited>, +export function useGetDryRunResults< + TData = Awaited>, TError = AxiosError, >( + runId: string, options?: { query?: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > - >; + > & + Pick< + UndefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): UseQueryResult & { queryKey: DataTag; }; -export function useListValidationRunsSuspense< - TData = Awaited>, +export function useGetDryRunResults< + TData = Awaited>, TError = AxiosError, >( + runId: string, options?: { query?: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > @@ -10252,21 +11969,22 @@ export function useListValidationRunsSuspense< axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): UseQueryResult & { queryKey: DataTag; }; /** - * @summary List Validation Runs + * @summary Get Dry Run Results */ -export function useListValidationRunsSuspense< - TData = Awaited>, +export function useGetDryRunResults< + TData = Awaited>, TError = AxiosError, >( + runId: string, options?: { query?: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > @@ -10274,287 +11992,212 @@ export function useListValidationRunsSuspense< axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListValidationRunsSuspenseQueryOptions(options); + const queryOptions = getGetDryRunResultsQueryOptions(runId, options); - const query = useSuspenseQuery( - queryOptions, - queryClient, - ) as UseSuspenseQueryResult & { - queryKey: DataTag; - }; + const query = useQuery(queryOptions, queryClient) as UseQueryResult< + TData, + TError + > & { queryKey: DataTag }; query.queryKey = queryOptions.queryKey; return query; } -/** - * Read approved checks from the rules catalog for the given tables and submit dry-run jobs. - * @summary Batch Run From Catalog - */ -export const batchRunFromCatalog = ( - batchRunFromCatalogIn: BatchRunFromCatalogIn, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post( - `/api/v1/dryrun/batch-from-catalog`, - batchRunFromCatalogIn, - options, - ); -}; - -export const getBatchRunFromCatalogMutationOptions = < +export const getGetDryRunResultsSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: BatchRunFromCatalogIn }, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - { data: BatchRunFromCatalogIn }, - TContext -> => { - const mutationKey = ["batchRunFromCatalog"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; +>( + runId: string, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, +) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const mutationFn: MutationFunction< - Awaited>, - { data: BatchRunFromCatalogIn } - > = (props) => { - const { data } = props ?? {}; + const queryKey = queryOptions?.queryKey ?? getGetDryRunResultsQueryKey(runId); - return batchRunFromCatalog(data, axiosOptions); - }; + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => getDryRunResults(runId, { signal, ...axiosOptions }); - return { mutationFn, ...mutationOptions }; + return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > & { queryKey: DataTag }; }; -export type BatchRunFromCatalogMutationResult = NonNullable< - Awaited> +export type GetDryRunResultsSuspenseQueryResult = NonNullable< + Awaited> >; -export type BatchRunFromCatalogMutationBody = BatchRunFromCatalogIn; -export type BatchRunFromCatalogMutationError = AxiosError; +export type GetDryRunResultsSuspenseQueryError = + AxiosError; -/** - * @summary Batch Run From Catalog - */ -export const useBatchRunFromCatalog = < +export function useGetDryRunResultsSuspense< + TData = Awaited>, TError = AxiosError, - TContext = unknown, >( - options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: BatchRunFromCatalogIn }, - TContext + runId: string, + options: { + query: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - { data: BatchRunFromCatalogIn }, - TContext -> => { - const mutationOptions = getBatchRunFromCatalogMutationOptions(options); - - return useMutation(mutationOptions, queryClient); -}; - -/** - * Validate checks, create a temporary view (OBO), and submit a dry-run job (SP). - * @summary Submit Dry Run - */ -export const submitDryRun = ( - dryRunIn: DryRunIn, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post(`/api/v1/dryrun`, dryRunIn, options); +): UseSuspenseQueryResult & { + queryKey: DataTag; }; - -export const getSubmitDryRunMutationOptions = < - TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: DryRunIn }, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - { data: DryRunIn }, - TContext -> => { - const mutationKey = ["submitDryRun"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; - - const mutationFn: MutationFunction< - Awaited>, - { data: DryRunIn } - > = (props) => { - const { data } = props ?? {}; - - return submitDryRun(data, axiosOptions); - }; - - return { mutationFn, ...mutationOptions }; +export function useGetDryRunResultsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + runId: string, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; }; - -export type SubmitDryRunMutationResult = NonNullable< - Awaited> ->; -export type SubmitDryRunMutationBody = DryRunIn; -export type SubmitDryRunMutationError = AxiosError; - -/** - * @summary Submit Dry Run - */ -export const useSubmitDryRun = < +export function useGetDryRunResultsSuspense< + TData = Awaited>, TError = AxiosError, - TContext = unknown, >( + runId: string, options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: DryRunIn }, - TContext + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - { data: DryRunIn }, - TContext -> => { - const mutationOptions = getSubmitDryRunMutationOptions(options); - - return useMutation(mutationOptions, queryClient); +): UseSuspenseQueryResult & { + queryKey: DataTag; }; - /** - * Poll the status of a dry-run job. Cleans up the view when job terminates. - -When *job_run_id* and optionally *view_fqn* are supplied as query -parameters the endpoint skips the database lookup, which is required -for validation dry runs that are not recorded in the history table. -Ownership of the *job_run_id* is verified against the OBO caller via the -Databricks Jobs API so a client cannot use a guessed *view_fqn* to drop -another user's temporary view. - * @summary Get Dry Run Status + * @summary Get Dry Run Results */ -export const getDryRunStatus = ( - runId: string, - params?: GetDryRunStatusParams, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/dryrun/runs/${runId}/status`, { - ...options, - params: { ...params, ...options?.params }, - }); -}; - -export const getGetDryRunStatusQueryKey = ( - runId?: string, - params?: GetDryRunStatusParams, -) => { - return [ - `/api/v1/dryrun/runs/${runId}/status`, - ...(params ? [params] : []), - ] as const; -}; -export const getGetDryRunStatusQueryOptions = < - TData = Awaited>, +export function useGetDryRunResultsSuspense< + TData = Awaited>, TError = AxiosError, >( runId: string, - params?: GetDryRunStatusParams, options?: { query?: Partial< - UseQueryOptions< - Awaited>, + UseSuspenseQueryOptions< + Awaited>, TError, TData > >; axios?: AxiosRequestConfig; }, -) => { + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +} { + const queryOptions = getGetDryRunResultsSuspenseQueryOptions(runId, options); + + const query = useSuspenseQuery( + queryOptions, + queryClient, + ) as UseSuspenseQueryResult & { + queryKey: DataTag; + }; + + query.queryKey = queryOptions.queryKey; + + return query; +} + +/** + * Return profiling run history, newest first. + * @summary List Profile Runs + */ +export const listProfileRuns = ( + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.get(`/api/v1/profiler/runs`, options); +}; + +export const getListProfileRunsQueryKey = () => { + return [`/api/v1/profiler/runs`] as const; +}; + +export const getListProfileRunsQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>(options?: { + query?: Partial< + UseQueryOptions>, TError, TData> + >; + axios?: AxiosRequestConfig; +}) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = - queryOptions?.queryKey ?? getGetDryRunStatusQueryKey(runId, params); + const queryKey = queryOptions?.queryKey ?? getListProfileRunsQueryKey(); - const queryFn: QueryFunction>> = ({ + const queryFn: QueryFunction>> = ({ signal, - }) => getDryRunStatus(runId, params, { signal, ...axiosOptions }); + }) => listProfileRuns({ signal, ...axiosOptions }); - return { - queryKey, - queryFn, - enabled: !!runId, - ...queryOptions, - } as UseQueryOptions< - Awaited>, + return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetDryRunStatusQueryResult = NonNullable< - Awaited> +export type ListProfileRunsQueryResult = NonNullable< + Awaited> >; -export type GetDryRunStatusQueryError = AxiosError; +export type ListProfileRunsQueryError = AxiosError; -export function useGetDryRunStatus< - TData = Awaited>, +export function useListProfileRuns< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params: undefined | GetDryRunStatusParams, options: { query: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -10564,25 +12207,23 @@ export function useGetDryRunStatus< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useGetDryRunStatus< - TData = Awaited>, +export function useListProfileRuns< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params?: GetDryRunStatusParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -10592,16 +12233,14 @@ export function useGetDryRunStatus< ): UseQueryResult & { queryKey: DataTag; }; -export function useGetDryRunStatus< - TData = Awaited>, +export function useListProfileRuns< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params?: GetDryRunStatusParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -10613,19 +12252,17 @@ export function useGetDryRunStatus< queryKey: DataTag; }; /** - * @summary Get Dry Run Status + * @summary List Profile Runs */ -export function useGetDryRunStatus< - TData = Awaited>, +export function useListProfileRuns< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params?: GetDryRunStatusParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -10636,7 +12273,7 @@ export function useGetDryRunStatus< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetDryRunStatusQueryOptions(runId, params, options); + const queryOptions = getListProfileRunsQueryOptions(options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -10648,54 +12285,47 @@ export function useGetDryRunStatus< return query; } -export const getGetDryRunStatusSuspenseQueryOptions = < - TData = Awaited>, +export const getListProfileRunsSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, ->( - runId: string, - params?: GetDryRunStatusParams, - options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; - }, -) => { +>(options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; +}) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = - queryOptions?.queryKey ?? getGetDryRunStatusQueryKey(runId, params); + const queryKey = queryOptions?.queryKey ?? getListProfileRunsQueryKey(); - const queryFn: QueryFunction>> = ({ + const queryFn: QueryFunction>> = ({ signal, - }) => getDryRunStatus(runId, params, { signal, ...axiosOptions }); + }) => listProfileRuns({ signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetDryRunStatusSuspenseQueryResult = NonNullable< - Awaited> +export type ListProfileRunsSuspenseQueryResult = NonNullable< + Awaited> >; -export type GetDryRunStatusSuspenseQueryError = AxiosError; +export type ListProfileRunsSuspenseQueryError = AxiosError; -export function useGetDryRunStatusSuspense< - TData = Awaited>, +export function useListProfileRunsSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params: undefined | GetDryRunStatusParams, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -10705,17 +12335,15 @@ export function useGetDryRunStatusSuspense< queryClient?: QueryClient, ): UseSuspenseQueryResult & { queryKey: DataTag; -}; -export function useGetDryRunStatusSuspense< - TData = Awaited>, +}; +export function useListProfileRunsSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params?: GetDryRunStatusParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -10726,16 +12354,14 @@ export function useGetDryRunStatusSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetDryRunStatusSuspense< - TData = Awaited>, +export function useListProfileRunsSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params?: GetDryRunStatusParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -10747,19 +12373,17 @@ export function useGetDryRunStatusSuspense< queryKey: DataTag; }; /** - * @summary Get Dry Run Status + * @summary List Profile Runs */ -export function useGetDryRunStatusSuspense< - TData = Awaited>, +export function useListProfileRunsSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params?: GetDryRunStatusParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -10770,11 +12394,7 @@ export function useGetDryRunStatusSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetDryRunStatusSuspenseQueryOptions( - runId, - params, - options, - ); + const queryOptions = getListProfileRunsSuspenseQueryOptions(options); const query = useSuspenseQuery( queryOptions, @@ -10789,48 +12409,121 @@ export function useGetDryRunStatusSuspense< } /** - * Cancel a running dry-run job. + * Create a temporary view (OBO) and submit a profiler job (SP). + * @summary Submit Profile Run + */ +export const submitProfileRun = ( + profileRunIn: ProfileRunIn, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.post(`/api/v1/profiler/run`, profileRunIn, options); +}; -When *job_run_id* is supplied as a query parameter the endpoint -skips the database lookup (needed for validation dry runs that were -not recorded in the history table). Ownership is still enforced via -the Databricks Jobs API — only admins/approvers may cancel others' runs. +export const getSubmitProfileRunMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: ProfileRunIn }, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + { data: ProfileRunIn }, + TContext +> => { + const mutationKey = ["submitProfileRun"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; -Note: a non-owner caller can confirm whether a given *job_run_id* exists -(and belongs to someone else) by observing the 403 response. This is -accepted — Databricks job IDs are large random integers and the response -leaks no identifying information beyond existence. - * @summary Cancel Dry Run + const mutationFn: MutationFunction< + Awaited>, + { data: ProfileRunIn } + > = (props) => { + const { data } = props ?? {}; + + return submitProfileRun(data, axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; +}; + +export type SubmitProfileRunMutationResult = NonNullable< + Awaited> +>; +export type SubmitProfileRunMutationBody = ProfileRunIn; +export type SubmitProfileRunMutationError = AxiosError; + +/** + * @summary Submit Profile Run */ -export const cancelDryRun = ( - runId: string, - params?: CancelDryRunParams, +export const useSubmitProfileRun = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: ProfileRunIn }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { data: ProfileRunIn }, + TContext +> => { + const mutationOptions = getSubmitProfileRunMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + +/** + * Create temporary views and submit profiler jobs for multiple tables in parallel. + * @summary Submit Batch Profile Run + */ +export const submitBatchProfileRun = ( + batchProfileRunIn: BatchProfileRunIn, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post(`/api/v1/dryrun/runs/${runId}/cancel`, undefined, { - ...options, - params: { ...params, ...options?.params }, - }); +): Promise> => { + return axios.default.post( + `/api/v1/profiler/batch-run`, + batchProfileRunIn, + options, + ); }; -export const getCancelDryRunMutationOptions = < +export const getSubmitBatchProfileRunMutationOptions = < TError = AxiosError, TContext = unknown, >(options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { runId: string; params?: CancelDryRunParams }, + { data: BatchProfileRunIn }, TContext >; axios?: AxiosRequestConfig; }): UseMutationOptions< - Awaited>, + Awaited>, TError, - { runId: string; params?: CancelDryRunParams }, + { data: BatchProfileRunIn }, TContext > => { - const mutationKey = ["cancelDryRun"]; + const mutationKey = ["submitBatchProfileRun"]; const { mutation: mutationOptions, axios: axiosOptions } = options ? options.mutation && "mutationKey" in options.mutation && @@ -10840,75 +12533,76 @@ export const getCancelDryRunMutationOptions = < : { mutation: { mutationKey }, axios: undefined }; const mutationFn: MutationFunction< - Awaited>, - { runId: string; params?: CancelDryRunParams } + Awaited>, + { data: BatchProfileRunIn } > = (props) => { - const { runId, params } = props ?? {}; + const { data } = props ?? {}; - return cancelDryRun(runId, params, axiosOptions); + return submitBatchProfileRun(data, axiosOptions); }; return { mutationFn, ...mutationOptions }; }; -export type CancelDryRunMutationResult = NonNullable< - Awaited> +export type SubmitBatchProfileRunMutationResult = NonNullable< + Awaited> >; - -export type CancelDryRunMutationError = AxiosError; +export type SubmitBatchProfileRunMutationBody = BatchProfileRunIn; +export type SubmitBatchProfileRunMutationError = + AxiosError; /** - * @summary Cancel Dry Run + * @summary Submit Batch Profile Run */ -export const useCancelDryRun = < +export const useSubmitBatchProfileRun = < TError = AxiosError, TContext = unknown, >( options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { runId: string; params?: CancelDryRunParams }, + { data: BatchProfileRunIn }, TContext >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, ): UseMutationResult< - Awaited>, + Awaited>, TError, - { runId: string; params?: CancelDryRunParams }, + { data: BatchProfileRunIn }, TContext > => { - const mutationOptions = getCancelDryRunMutationOptions(options); + const mutationOptions = getSubmitBatchProfileRunMutationOptions(options); return useMutation(mutationOptions, queryClient); }; /** - * Read dry-run results from the Delta table. - * @summary Get Dry Run Results + * Poll the status of a profiler job run. Cleans up the view when job terminates. + * @summary Get Profile Run Status */ -export const getDryRunResults = ( +export const getProfileRunStatus = ( runId: string, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/dryrun/runs/${runId}/results`, options); +): Promise> => { + return axios.default.get(`/api/v1/profiler/runs/${runId}/status`, options); }; -export const getGetDryRunResultsQueryKey = (runId?: string) => { - return [`/api/v1/dryrun/runs/${runId}/results`] as const; +export const getGetProfileRunStatusQueryKey = (runId?: string) => { + return [`/api/v1/profiler/runs/${runId}/status`] as const; }; -export const getGetDryRunResultsQueryOptions = < - TData = Awaited>, +export const getGetProfileRunStatusQueryOptions = < + TData = Awaited>, TError = AxiosError, >( runId: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -10918,11 +12612,12 @@ export const getGetDryRunResultsQueryOptions = < ) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getGetDryRunResultsQueryKey(runId); + const queryKey = + queryOptions?.queryKey ?? getGetProfileRunStatusQueryKey(runId); const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => getDryRunResults(runId, { signal, ...axiosOptions }); + Awaited> + > = ({ signal }) => getProfileRunStatus(runId, { signal, ...axiosOptions }); return { queryKey, @@ -10930,35 +12625,35 @@ export const getGetDryRunResultsQueryOptions = < enabled: !!runId, ...queryOptions, } as UseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetDryRunResultsQueryResult = NonNullable< - Awaited> +export type GetProfileRunStatusQueryResult = NonNullable< + Awaited> >; -export type GetDryRunResultsQueryError = AxiosError; +export type GetProfileRunStatusQueryError = AxiosError; -export function useGetDryRunResults< - TData = Awaited>, +export function useGetProfileRunStatus< + TData = Awaited>, TError = AxiosError, >( runId: string, options: { query: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -10968,24 +12663,24 @@ export function useGetDryRunResults< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useGetDryRunResults< - TData = Awaited>, +export function useGetProfileRunStatus< + TData = Awaited>, TError = AxiosError, >( runId: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -10995,15 +12690,15 @@ export function useGetDryRunResults< ): UseQueryResult & { queryKey: DataTag; }; -export function useGetDryRunResults< - TData = Awaited>, +export function useGetProfileRunStatus< + TData = Awaited>, TError = AxiosError, >( runId: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -11015,18 +12710,18 @@ export function useGetDryRunResults< queryKey: DataTag; }; /** - * @summary Get Dry Run Results + * @summary Get Profile Run Status */ -export function useGetDryRunResults< - TData = Awaited>, +export function useGetProfileRunStatus< + TData = Awaited>, TError = AxiosError, >( runId: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -11037,7 +12732,7 @@ export function useGetDryRunResults< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetDryRunResultsQueryOptions(runId, options); + const queryOptions = getGetProfileRunStatusQueryOptions(runId, options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -11049,15 +12744,15 @@ export function useGetDryRunResults< return query; } -export const getGetDryRunResultsSuspenseQueryOptions = < - TData = Awaited>, +export const getGetProfileRunStatusSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, >( runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -11067,34 +12762,35 @@ export const getGetDryRunResultsSuspenseQueryOptions = < ) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getGetDryRunResultsQueryKey(runId); + const queryKey = + queryOptions?.queryKey ?? getGetProfileRunStatusQueryKey(runId); const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => getDryRunResults(runId, { signal, ...axiosOptions }); + Awaited> + > = ({ signal }) => getProfileRunStatus(runId, { signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetDryRunResultsSuspenseQueryResult = NonNullable< - Awaited> +export type GetProfileRunStatusSuspenseQueryResult = NonNullable< + Awaited> >; -export type GetDryRunResultsSuspenseQueryError = +export type GetProfileRunStatusSuspenseQueryError = AxiosError; -export function useGetDryRunResultsSuspense< - TData = Awaited>, +export function useGetProfileRunStatusSuspense< + TData = Awaited>, TError = AxiosError, >( runId: string, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -11105,15 +12801,15 @@ export function useGetDryRunResultsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetDryRunResultsSuspense< - TData = Awaited>, +export function useGetProfileRunStatusSuspense< + TData = Awaited>, TError = AxiosError, >( runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -11124,15 +12820,15 @@ export function useGetDryRunResultsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetDryRunResultsSuspense< - TData = Awaited>, +export function useGetProfileRunStatusSuspense< + TData = Awaited>, TError = AxiosError, >( runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -11144,18 +12840,18 @@ export function useGetDryRunResultsSuspense< queryKey: DataTag; }; /** - * @summary Get Dry Run Results + * @summary Get Profile Run Status */ -export function useGetDryRunResultsSuspense< - TData = Awaited>, +export function useGetProfileRunStatusSuspense< + TData = Awaited>, TError = AxiosError, >( runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -11166,7 +12862,10 @@ export function useGetDryRunResultsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetDryRunResultsSuspenseQueryOptions(runId, options); + const queryOptions = getGetProfileRunStatusSuspenseQueryOptions( + runId, + options, + ); const query = useSuspenseQuery( queryOptions, @@ -11181,229 +12880,212 @@ export function useGetDryRunResultsSuspense< } /** - * Return profiling run history, newest first. - * @summary List Profile Runs + * Cancel a running profiler job. + * @summary Cancel Profile Run */ -export const listProfileRuns = ( +export const cancelProfileRun = ( + runId: string, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/profiler/runs`, options); -}; - -export const getListProfileRunsQueryKey = () => { - return [`/api/v1/profiler/runs`] as const; +): Promise> => { + return axios.default.post( + `/api/v1/profiler/runs/${runId}/cancel`, + undefined, + options, + ); }; -export const getListProfileRunsQueryOptions = < - TData = Awaited>, +export const getCancelProfileRunMutationOptions = < TError = AxiosError, + TContext = unknown, >(options?: { - query?: Partial< - UseQueryOptions>, TError, TData> + mutation?: UseMutationOptions< + Awaited>, + TError, + { runId: string }, + TContext >; axios?: AxiosRequestConfig; -}) => { - const { query: queryOptions, axios: axiosOptions } = options ?? {}; +}): UseMutationOptions< + Awaited>, + TError, + { runId: string }, + TContext +> => { + const mutationKey = ["cancelProfileRun"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; - const queryKey = queryOptions?.queryKey ?? getListProfileRunsQueryKey(); + const mutationFn: MutationFunction< + Awaited>, + { runId: string } + > = (props) => { + const { runId } = props ?? {}; - const queryFn: QueryFunction>> = ({ - signal, - }) => listProfileRuns({ signal, ...axiosOptions }); + return cancelProfileRun(runId, axiosOptions); + }; - return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< - Awaited>, - TError, - TData - > & { queryKey: DataTag }; + return { mutationFn, ...mutationOptions }; }; -export type ListProfileRunsQueryResult = NonNullable< - Awaited> +export type CancelProfileRunMutationResult = NonNullable< + Awaited> >; -export type ListProfileRunsQueryError = AxiosError; -export function useListProfileRuns< - TData = Awaited>, - TError = AxiosError, ->( - options: { - query: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > - > & - Pick< - DefinedInitialDataOptions< - Awaited>, - TError, - Awaited> - >, - "initialData" - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): DefinedUseQueryResult & { - queryKey: DataTag; -}; -export function useListProfileRuns< - TData = Awaited>, - TError = AxiosError, ->( - options?: { - query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > - > & - Pick< - UndefinedInitialDataOptions< - Awaited>, - TError, - Awaited> - >, - "initialData" - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): UseQueryResult & { - queryKey: DataTag; -}; -export function useListProfileRuns< - TData = Awaited>, - TError = AxiosError, ->( - options?: { - query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): UseQueryResult & { - queryKey: DataTag; -}; +export type CancelProfileRunMutationError = AxiosError; + /** - * @summary List Profile Runs + * @summary Cancel Profile Run */ - -export function useListProfileRuns< - TData = Awaited>, +export const useCancelProfileRun = < TError = AxiosError, + TContext = unknown, >( options?: { - query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + mutation?: UseMutationOptions< + Awaited>, + TError, + { runId: string }, + TContext >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseQueryResult & { - queryKey: DataTag; -} { - const queryOptions = getListProfileRunsQueryOptions(options); +): UseMutationResult< + Awaited>, + TError, + { runId: string }, + TContext +> => { + const mutationOptions = getCancelProfileRunMutationOptions(options); - const query = useQuery(queryOptions, queryClient) as UseQueryResult< - TData, - TError - > & { queryKey: DataTag }; + return useMutation(mutationOptions, queryClient); +}; - query.queryKey = queryOptions.queryKey; +/** + * Read profiler results from the Delta table. + * @summary Get Profile Run Results + */ +export const getProfileRunResults = ( + runId: string, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.get(`/api/v1/profiler/runs/${runId}/results`, options); +}; - return query; -} +export const getGetProfileRunResultsQueryKey = (runId?: string) => { + return [`/api/v1/profiler/runs/${runId}/results`] as const; +}; -export const getListProfileRunsSuspenseQueryOptions = < - TData = Awaited>, +export const getGetProfileRunResultsQueryOptions = < + TData = Awaited>, TError = AxiosError, ->(options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; -}) => { +>( + runId: string, + options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, +) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getListProfileRunsQueryKey(); + const queryKey = + queryOptions?.queryKey ?? getGetProfileRunResultsQueryKey(runId); - const queryFn: QueryFunction>> = ({ - signal, - }) => listProfileRuns({ signal, ...axiosOptions }); + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => getProfileRunResults(runId, { signal, ...axiosOptions }); - return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + return { + queryKey, + queryFn, + enabled: !!runId, + ...queryOptions, + } as UseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListProfileRunsSuspenseQueryResult = NonNullable< - Awaited> +export type GetProfileRunResultsQueryResult = NonNullable< + Awaited> >; -export type ListProfileRunsSuspenseQueryError = AxiosError; +export type GetProfileRunResultsQueryError = AxiosError; -export function useListProfileRunsSuspense< - TData = Awaited>, +export function useGetProfileRunResults< + TData = Awaited>, TError = AxiosError, >( + runId: string, options: { query: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > - >; + > & + Pick< + DefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useListProfileRunsSuspense< - TData = Awaited>, +export function useGetProfileRunResults< + TData = Awaited>, TError = AxiosError, >( + runId: string, options?: { query?: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > - >; + > & + Pick< + UndefinedInitialDataOptions< + Awaited>, + TError, + Awaited> + >, + "initialData" + >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): UseQueryResult & { queryKey: DataTag; }; -export function useListProfileRunsSuspense< - TData = Awaited>, +export function useGetProfileRunResults< + TData = Awaited>, TError = AxiosError, >( + runId: string, options?: { query?: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > @@ -11411,21 +13093,22 @@ export function useListProfileRunsSuspense< axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): UseQueryResult & { queryKey: DataTag; }; /** - * @summary List Profile Runs + * @summary Get Profile Run Results */ -export function useListProfileRunsSuspense< - TData = Awaited>, +export function useGetProfileRunResults< + TData = Awaited>, TError = AxiosError, >( + runId: string, options?: { query?: Partial< - UseSuspenseQueryOptions< - Awaited>, + UseQueryOptions< + Awaited>, TError, TData > @@ -11433,269 +13116,211 @@ export function useListProfileRunsSuspense< axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseSuspenseQueryResult & { +): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListProfileRunsSuspenseQueryOptions(options); + const queryOptions = getGetProfileRunResultsQueryOptions(runId, options); - const query = useSuspenseQuery( - queryOptions, - queryClient, - ) as UseSuspenseQueryResult & { - queryKey: DataTag; - }; + const query = useQuery(queryOptions, queryClient) as UseQueryResult< + TData, + TError + > & { queryKey: DataTag }; query.queryKey = queryOptions.queryKey; return query; } -/** - * Create a temporary view (OBO) and submit a profiler job (SP). - * @summary Submit Profile Run - */ -export const submitProfileRun = ( - profileRunIn: ProfileRunIn, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post(`/api/v1/profiler/run`, profileRunIn, options); -}; - -export const getSubmitProfileRunMutationOptions = < - TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: ProfileRunIn }, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - { data: ProfileRunIn }, - TContext -> => { - const mutationKey = ["submitProfileRun"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; - - const mutationFn: MutationFunction< - Awaited>, - { data: ProfileRunIn } - > = (props) => { - const { data } = props ?? {}; - - return submitProfileRun(data, axiosOptions); - }; - - return { mutationFn, ...mutationOptions }; -}; - -export type SubmitProfileRunMutationResult = NonNullable< - Awaited> ->; -export type SubmitProfileRunMutationBody = ProfileRunIn; -export type SubmitProfileRunMutationError = AxiosError; - -/** - * @summary Submit Profile Run - */ -export const useSubmitProfileRun = < +export const getGetProfileRunResultsSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, - TContext = unknown, >( + runId: string, options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: ProfileRunIn }, - TContext + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, - queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - { data: ProfileRunIn }, - TContext -> => { - const mutationOptions = getSubmitProfileRunMutationOptions(options); +) => { + const { query: queryOptions, axios: axiosOptions } = options ?? {}; - return useMutation(mutationOptions, queryClient); -}; + const queryKey = + queryOptions?.queryKey ?? getGetProfileRunResultsQueryKey(runId); -/** - * Create temporary views and submit profiler jobs for multiple tables in parallel. - * @summary Submit Batch Profile Run - */ -export const submitBatchProfileRun = ( - batchProfileRunIn: BatchProfileRunIn, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post( - `/api/v1/profiler/batch-run`, - batchProfileRunIn, - options, - ); -}; + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => getProfileRunResults(runId, { signal, ...axiosOptions }); -export const getSubmitBatchProfileRunMutationOptions = < - TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, + return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< + Awaited>, TError, - { data: BatchProfileRunIn }, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - { data: BatchProfileRunIn }, - TContext -> => { - const mutationKey = ["submitBatchProfileRun"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; - - const mutationFn: MutationFunction< - Awaited>, - { data: BatchProfileRunIn } - > = (props) => { - const { data } = props ?? {}; - - return submitBatchProfileRun(data, axiosOptions); - }; - - return { mutationFn, ...mutationOptions }; + TData + > & { queryKey: DataTag }; }; -export type SubmitBatchProfileRunMutationResult = NonNullable< - Awaited> +export type GetProfileRunResultsSuspenseQueryResult = NonNullable< + Awaited> >; -export type SubmitBatchProfileRunMutationBody = BatchProfileRunIn; -export type SubmitBatchProfileRunMutationError = +export type GetProfileRunResultsSuspenseQueryError = AxiosError; -/** - * @summary Submit Batch Profile Run - */ -export const useSubmitBatchProfileRun = < +export function useGetProfileRunResultsSuspense< + TData = Awaited>, TError = AxiosError, - TContext = unknown, >( - options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: BatchProfileRunIn }, - TContext + runId: string, + options: { + query: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - { data: BatchProfileRunIn }, - TContext -> => { - const mutationOptions = getSubmitBatchProfileRunMutationOptions(options); - - return useMutation(mutationOptions, queryClient); +): UseSuspenseQueryResult & { + queryKey: DataTag; }; - -/** - * Poll the status of a profiler job run. Cleans up the view when job terminates. - * @summary Get Profile Run Status - */ -export const getProfileRunStatus = ( +export function useGetProfileRunResultsSuspense< + TData = Awaited>, + TError = AxiosError, +>( runId: string, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/profiler/runs/${runId}/status`, options); + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; }; - -export const getGetProfileRunStatusQueryKey = (runId?: string) => { - return [`/api/v1/profiler/runs/${runId}/status`] as const; +export function useGetProfileRunResultsSuspense< + TData = Awaited>, + TError = AxiosError, +>( + runId: string, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; }; +/** + * @summary Get Profile Run Results + */ -export const getGetProfileRunStatusQueryOptions = < - TData = Awaited>, +export function useGetProfileRunResultsSuspense< + TData = Awaited>, TError = AxiosError, >( runId: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, + UseSuspenseQueryOptions< + Awaited>, TError, TData > >; axios?: AxiosRequestConfig; }, -) => { + queryClient?: QueryClient, +): UseSuspenseQueryResult & { + queryKey: DataTag; +} { + const queryOptions = getGetProfileRunResultsSuspenseQueryOptions( + runId, + options, + ); + + const query = useSuspenseQuery( + queryOptions, + queryClient, + ) as UseSuspenseQueryResult & { + queryKey: DataTag; + }; + + query.queryKey = queryOptions.queryKey; + + return query; +} + +/** + * @summary Get Settings + */ +export const getSettings = ( + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.get(`/api/v1/settings`, options); +}; + +export const getGetSettingsQueryKey = () => { + return [`/api/v1/settings`] as const; +}; + +export const getGetSettingsQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>(options?: { + query?: Partial< + UseQueryOptions>, TError, TData> + >; + axios?: AxiosRequestConfig; +}) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = - queryOptions?.queryKey ?? getGetProfileRunStatusQueryKey(runId); + const queryKey = queryOptions?.queryKey ?? getGetSettingsQueryKey(); - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => getProfileRunStatus(runId, { signal, ...axiosOptions }); + const queryFn: QueryFunction>> = ({ + signal, + }) => getSettings({ signal, ...axiosOptions }); - return { - queryKey, - queryFn, - enabled: !!runId, - ...queryOptions, - } as UseQueryOptions< - Awaited>, + return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetProfileRunStatusQueryResult = NonNullable< - Awaited> +export type GetSettingsQueryResult = NonNullable< + Awaited> >; -export type GetProfileRunStatusQueryError = AxiosError; +export type GetSettingsQueryError = AxiosError; -export function useGetProfileRunStatus< - TData = Awaited>, +export function useGetSettings< + TData = Awaited>, TError = AxiosError, >( - runId: string, options: { query: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -11705,24 +13330,19 @@ export function useGetProfileRunStatus< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useGetProfileRunStatus< - TData = Awaited>, +export function useGetSettings< + TData = Awaited>, TError = AxiosError, >( - runId: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -11732,18 +13352,13 @@ export function useGetProfileRunStatus< ): UseQueryResult & { queryKey: DataTag; }; -export function useGetProfileRunStatus< - TData = Awaited>, +export function useGetSettings< + TData = Awaited>, TError = AxiosError, >( - runId: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, @@ -11752,21 +13367,16 @@ export function useGetProfileRunStatus< queryKey: DataTag; }; /** - * @summary Get Profile Run Status + * @summary Get Settings */ -export function useGetProfileRunStatus< - TData = Awaited>, +export function useGetSettings< + TData = Awaited>, TError = AxiosError, >( - runId: string, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, @@ -11774,7 +13384,7 @@ export function useGetProfileRunStatus< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetProfileRunStatusQueryOptions(runId, options); + const queryOptions = getGetSettingsQueryOptions(options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -11786,53 +13396,47 @@ export function useGetProfileRunStatus< return query; } -export const getGetProfileRunStatusSuspenseQueryOptions = < - TData = Awaited>, +export const getGetSettingsSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, ->( - runId: string, - options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; - }, -) => { +>(options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; +}) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = - queryOptions?.queryKey ?? getGetProfileRunStatusQueryKey(runId); + const queryKey = queryOptions?.queryKey ?? getGetSettingsQueryKey(); - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => getProfileRunStatus(runId, { signal, ...axiosOptions }); + const queryFn: QueryFunction>> = ({ + signal, + }) => getSettings({ signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetProfileRunStatusSuspenseQueryResult = NonNullable< - Awaited> +export type GetSettingsSuspenseQueryResult = NonNullable< + Awaited> >; -export type GetProfileRunStatusSuspenseQueryError = - AxiosError; +export type GetSettingsSuspenseQueryError = AxiosError; -export function useGetProfileRunStatusSuspense< - TData = Awaited>, +export function useGetSettingsSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -11843,15 +13447,14 @@ export function useGetProfileRunStatusSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetProfileRunStatusSuspense< - TData = Awaited>, +export function useGetSettingsSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -11862,15 +13465,14 @@ export function useGetProfileRunStatusSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetProfileRunStatusSuspense< - TData = Awaited>, +export function useGetSettingsSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -11882,18 +13484,17 @@ export function useGetProfileRunStatusSuspense< queryKey: DataTag; }; /** - * @summary Get Profile Run Status + * @summary Get Settings */ -export function useGetProfileRunStatusSuspense< - TData = Awaited>, +export function useGetSettingsSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -11904,10 +13505,7 @@ export function useGetProfileRunStatusSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetProfileRunStatusSuspenseQueryOptions( - runId, - options, - ); + const queryOptions = getGetSettingsSuspenseQueryOptions(options); const query = useSuspenseQuery( queryOptions, @@ -11922,38 +13520,33 @@ export function useGetProfileRunStatusSuspense< } /** - * Cancel a running profiler job. - * @summary Cancel Profile Run + * @summary Save Settings */ -export const cancelProfileRun = ( - runId: string, +export const saveSettings = ( + installationSettings: InstallationSettings, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post( - `/api/v1/profiler/runs/${runId}/cancel`, - undefined, - options, - ); +): Promise> => { + return axios.default.post(`/api/v1/settings`, installationSettings, options); }; -export const getCancelProfileRunMutationOptions = < +export const getSaveSettingsMutationOptions = < TError = AxiosError, TContext = unknown, >(options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { runId: string }, + { data: InstallationSettings }, TContext >; axios?: AxiosRequestConfig; }): UseMutationOptions< - Awaited>, + Awaited>, TError, - { runId: string }, + { data: InstallationSettings }, TContext > => { - const mutationKey = ["cancelProfileRun"]; + const mutationKey = ["saveSettings"]; const { mutation: mutationOptions, axios: axiosOptions } = options ? options.mutation && "mutationKey" in options.mutation && @@ -11963,126 +13556,198 @@ export const getCancelProfileRunMutationOptions = < : { mutation: { mutationKey }, axios: undefined }; const mutationFn: MutationFunction< - Awaited>, - { runId: string } + Awaited>, + { data: InstallationSettings } > = (props) => { - const { runId } = props ?? {}; + const { data } = props ?? {}; - return cancelProfileRun(runId, axiosOptions); + return saveSettings(data, axiosOptions); }; return { mutationFn, ...mutationOptions }; }; -export type CancelProfileRunMutationResult = NonNullable< - Awaited> +export type SaveSettingsMutationResult = NonNullable< + Awaited> >; +export type SaveSettingsMutationBody = InstallationSettings; +export type SaveSettingsMutationError = AxiosError; -export type CancelProfileRunMutationError = AxiosError; +/** + * @summary Save Settings + */ +export const useSaveSettings = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: InstallationSettings }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { data: InstallationSettings }, + TContext +> => { + const mutationOptions = getSaveSettingsMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; /** - * @summary Cancel Profile Run + * Add a comment to a run or rule. + * @summary Add Comment */ -export const useCancelProfileRun = < +export const addComment = ( + addCommentIn: AddCommentIn, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.post(`/api/v1/comments`, addCommentIn, options); +}; + +export const getAddCommentMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { data: AddCommentIn }, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + { data: AddCommentIn }, + TContext +> => { + const mutationKey = ["addComment"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + { data: AddCommentIn } + > = (props) => { + const { data } = props ?? {}; + + return addComment(data, axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; +}; + +export type AddCommentMutationResult = NonNullable< + Awaited> +>; +export type AddCommentMutationBody = AddCommentIn; +export type AddCommentMutationError = AxiosError; + +/** + * @summary Add Comment + */ +export const useAddComment = < TError = AxiosError, TContext = unknown, >( options?: { mutation?: UseMutationOptions< - Awaited>, + Awaited>, TError, - { runId: string }, + { data: AddCommentIn }, TContext >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, ): UseMutationResult< - Awaited>, + Awaited>, TError, - { runId: string }, + { data: AddCommentIn }, TContext > => { - const mutationOptions = getCancelProfileRunMutationOptions(options); + const mutationOptions = getAddCommentMutationOptions(options); return useMutation(mutationOptions, queryClient); }; /** - * Read profiler results from the Delta table. - * @summary Get Profile Run Results + * List comments for a specific run or rule. + * @summary List Comments */ -export const getProfileRunResults = ( - runId: string, +export const listComments = ( + params: ListCommentsParams, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/profiler/runs/${runId}/results`, options); +): Promise> => { + return axios.default.get(`/api/v1/comments`, { + ...options, + params: { ...params, ...options?.params }, + }); }; -export const getGetProfileRunResultsQueryKey = (runId?: string) => { - return [`/api/v1/profiler/runs/${runId}/results`] as const; +export const getListCommentsQueryKey = (params?: ListCommentsParams) => { + return [`/api/v1/comments`, ...(params ? [params] : [])] as const; }; -export const getGetProfileRunResultsQueryOptions = < - TData = Awaited>, +export const getListCommentsQueryOptions = < + TData = Awaited>, TError = AxiosError, >( - runId: string, + params: ListCommentsParams, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, ) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = - queryOptions?.queryKey ?? getGetProfileRunResultsQueryKey(runId); + const queryKey = queryOptions?.queryKey ?? getListCommentsQueryKey(params); - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => getProfileRunResults(runId, { signal, ...axiosOptions }); + const queryFn: QueryFunction>> = ({ + signal, + }) => listComments(params, { signal, ...axiosOptions }); - return { - queryKey, - queryFn, - enabled: !!runId, - ...queryOptions, - } as UseQueryOptions< - Awaited>, + return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetProfileRunResultsQueryResult = NonNullable< - Awaited> +export type ListCommentsQueryResult = NonNullable< + Awaited> >; -export type GetProfileRunResultsQueryError = AxiosError; +export type ListCommentsQueryError = AxiosError; -export function useGetProfileRunResults< - TData = Awaited>, +export function useListComments< + TData = Awaited>, TError = AxiosError, >( - runId: string, + params: ListCommentsParams, options: { query: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -12092,24 +13757,20 @@ export function useGetProfileRunResults< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useGetProfileRunResults< - TData = Awaited>, +export function useListComments< + TData = Awaited>, TError = AxiosError, >( - runId: string, + params: ListCommentsParams, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -12119,18 +13780,14 @@ export function useGetProfileRunResults< ): UseQueryResult & { queryKey: DataTag; }; -export function useGetProfileRunResults< - TData = Awaited>, +export function useListComments< + TData = Awaited>, TError = AxiosError, >( - runId: string, + params: ListCommentsParams, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, @@ -12139,21 +13796,17 @@ export function useGetProfileRunResults< queryKey: DataTag; }; /** - * @summary Get Profile Run Results + * @summary List Comments */ -export function useGetProfileRunResults< - TData = Awaited>, +export function useListComments< + TData = Awaited>, TError = AxiosError, >( - runId: string, + params: ListCommentsParams, options?: { query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > + UseQueryOptions>, TError, TData> >; axios?: AxiosRequestConfig; }, @@ -12161,7 +13814,7 @@ export function useGetProfileRunResults< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetProfileRunResultsQueryOptions(runId, options); + const queryOptions = getListCommentsQueryOptions(params, options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -12173,15 +13826,15 @@ export function useGetProfileRunResults< return query; } -export const getGetProfileRunResultsSuspenseQueryOptions = < - TData = Awaited>, +export const getListCommentsSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, >( - runId: string, + params: ListCommentsParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -12191,35 +13844,33 @@ export const getGetProfileRunResultsSuspenseQueryOptions = < ) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = - queryOptions?.queryKey ?? getGetProfileRunResultsQueryKey(runId); + const queryKey = queryOptions?.queryKey ?? getListCommentsQueryKey(params); - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => getProfileRunResults(runId, { signal, ...axiosOptions }); + const queryFn: QueryFunction>> = ({ + signal, + }) => listComments(params, { signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetProfileRunResultsSuspenseQueryResult = NonNullable< - Awaited> +export type ListCommentsSuspenseQueryResult = NonNullable< + Awaited> >; -export type GetProfileRunResultsSuspenseQueryError = - AxiosError; +export type ListCommentsSuspenseQueryError = AxiosError; -export function useGetProfileRunResultsSuspense< - TData = Awaited>, +export function useListCommentsSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, + params: ListCommentsParams, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -12230,15 +13881,15 @@ export function useGetProfileRunResultsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetProfileRunResultsSuspense< - TData = Awaited>, +export function useListCommentsSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, + params: ListCommentsParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -12249,15 +13900,15 @@ export function useGetProfileRunResultsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetProfileRunResultsSuspense< - TData = Awaited>, +export function useListCommentsSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, + params: ListCommentsParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -12269,18 +13920,18 @@ export function useGetProfileRunResultsSuspense< queryKey: DataTag; }; /** - * @summary Get Profile Run Results + * @summary List Comments */ -export function useGetProfileRunResultsSuspense< - TData = Awaited>, +export function useListCommentsSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, + params: ListCommentsParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -12291,10 +13942,7 @@ export function useGetProfileRunResultsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetProfileRunResultsSuspenseQueryOptions( - runId, - options, - ); + const queryOptions = getListCommentsSuspenseQueryOptions(params, options); const query = useSuspenseQuery( queryOptions, @@ -12309,60 +13957,175 @@ export function useGetProfileRunResultsSuspense< } /** - * @summary Get Settings + * Delete a comment (only the author can delete their own comments). + * @summary Delete Comment */ -export const getSettings = ( +export const deleteComment = ( + commentId: string, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/settings`, options); -}; - -export const getGetSettingsQueryKey = () => { - return [`/api/v1/settings`] as const; +): Promise> => { + return axios.default.delete(`/api/v1/comments/${commentId}`, options); }; -export const getGetSettingsQueryOptions = < - TData = Awaited>, +export const getDeleteCommentMutationOptions = < TError = AxiosError, + TContext = unknown, >(options?: { - query?: Partial< - UseQueryOptions>, TError, TData> + mutation?: UseMutationOptions< + Awaited>, + TError, + { commentId: string }, + TContext >; axios?: AxiosRequestConfig; -}) => { +}): UseMutationOptions< + Awaited>, + TError, + { commentId: string }, + TContext +> => { + const mutationKey = ["deleteComment"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + { commentId: string } + > = (props) => { + const { commentId } = props ?? {}; + + return deleteComment(commentId, axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; +}; + +export type DeleteCommentMutationResult = NonNullable< + Awaited> +>; + +export type DeleteCommentMutationError = AxiosError; + +/** + * @summary Delete Comment + */ +export const useDeleteComment = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { commentId: string }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { commentId: string }, + TContext +> => { + const mutationOptions = getDeleteCommentMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + +/** + * @summary List Quarantine Records + */ +export const listQuarantineRecords = ( + runId: string, + params?: ListQuarantineRecordsParams, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.get(`/api/v1/quarantine/runs/${runId}`, { + ...options, + params: { ...params, ...options?.params }, + }); +}; + +export const getListQuarantineRecordsQueryKey = ( + runId?: string, + params?: ListQuarantineRecordsParams, +) => { + return [ + `/api/v1/quarantine/runs/${runId}`, + ...(params ? [params] : []), + ] as const; +}; + +export const getListQuarantineRecordsQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>( + runId: string, + params?: ListQuarantineRecordsParams, + options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, +) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getGetSettingsQueryKey(); + const queryKey = + queryOptions?.queryKey ?? getListQuarantineRecordsQueryKey(runId, params); - const queryFn: QueryFunction>> = ({ - signal, - }) => getSettings({ signal, ...axiosOptions }); + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => + listQuarantineRecords(runId, params, { signal, ...axiosOptions }); - return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< - Awaited>, + return { + queryKey, + queryFn, + enabled: !!runId, + ...queryOptions, + } as UseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetSettingsQueryResult = NonNullable< - Awaited> +export type ListQuarantineRecordsQueryResult = NonNullable< + Awaited> >; -export type GetSettingsQueryError = AxiosError; +export type ListQuarantineRecordsQueryError = AxiosError; -export function useGetSettings< - TData = Awaited>, +export function useListQuarantineRecords< + TData = Awaited>, TError = AxiosError, >( + runId: string, + params: undefined | ListQuarantineRecordsParams, options: { query: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -12372,19 +14135,25 @@ export function useGetSettings< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useGetSettings< - TData = Awaited>, +export function useListQuarantineRecords< + TData = Awaited>, TError = AxiosError, >( + runId: string, + params?: ListQuarantineRecordsParams, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -12394,13 +14163,19 @@ export function useGetSettings< ): UseQueryResult & { queryKey: DataTag; }; -export function useGetSettings< - TData = Awaited>, +export function useListQuarantineRecords< + TData = Awaited>, TError = AxiosError, >( + runId: string, + params?: ListQuarantineRecordsParams, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, @@ -12409,16 +14184,22 @@ export function useGetSettings< queryKey: DataTag; }; /** - * @summary Get Settings + * @summary List Quarantine Records */ -export function useGetSettings< - TData = Awaited>, +export function useListQuarantineRecords< + TData = Awaited>, TError = AxiosError, >( + runId: string, + params?: ListQuarantineRecordsParams, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, @@ -12426,7 +14207,11 @@ export function useGetSettings< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetSettingsQueryOptions(options); + const queryOptions = getListQuarantineRecordsQueryOptions( + runId, + params, + options, + ); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -12438,83 +14223,56 @@ export function useGetSettings< return query; } -export const getGetSettingsSuspenseQueryOptions = < - TData = Awaited>, +export const getListQuarantineRecordsSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, ->(options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; -}) => { +>( + runId: string, + params?: ListQuarantineRecordsParams, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, +) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getGetSettingsQueryKey(); + const queryKey = + queryOptions?.queryKey ?? getListQuarantineRecordsQueryKey(runId, params); - const queryFn: QueryFunction>> = ({ - signal, - }) => getSettings({ signal, ...axiosOptions }); + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => + listQuarantineRecords(runId, params, { signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetSettingsSuspenseQueryResult = NonNullable< - Awaited> +export type ListQuarantineRecordsSuspenseQueryResult = NonNullable< + Awaited> >; -export type GetSettingsSuspenseQueryError = AxiosError; +export type ListQuarantineRecordsSuspenseQueryError = + AxiosError; -export function useGetSettingsSuspense< - TData = Awaited>, +export function useListQuarantineRecordsSuspense< + TData = Awaited>, TError = AxiosError, >( + runId: string, + params: undefined | ListQuarantineRecordsParams, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): UseSuspenseQueryResult & { - queryKey: DataTag; -}; -export function useGetSettingsSuspense< - TData = Awaited>, - TError = AxiosError, ->( - options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): UseSuspenseQueryResult & { - queryKey: DataTag; -}; -export function useGetSettingsSuspense< - TData = Awaited>, - TError = AxiosError, ->( - options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -12525,18 +14283,16 @@ export function useGetSettingsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -/** - * @summary Get Settings - */ - -export function useGetSettingsSuspense< - TData = Awaited>, +export function useListQuarantineRecordsSuspense< + TData = Awaited>, TError = AxiosError, >( + runId: string, + params?: ListQuarantineRecordsParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -12546,250 +14302,143 @@ export function useGetSettingsSuspense< queryClient?: QueryClient, ): UseSuspenseQueryResult & { queryKey: DataTag; -} { - const queryOptions = getGetSettingsSuspenseQueryOptions(options); - - const query = useSuspenseQuery( - queryOptions, - queryClient, - ) as UseSuspenseQueryResult & { - queryKey: DataTag; - }; - - query.queryKey = queryOptions.queryKey; - - return query; -} - -/** - * @summary Save Settings - */ -export const saveSettings = ( - installationSettings: InstallationSettings, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post(`/api/v1/settings`, installationSettings, options); -}; - -export const getSaveSettingsMutationOptions = < - TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: InstallationSettings }, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - { data: InstallationSettings }, - TContext -> => { - const mutationKey = ["saveSettings"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; - - const mutationFn: MutationFunction< - Awaited>, - { data: InstallationSettings } - > = (props) => { - const { data } = props ?? {}; - - return saveSettings(data, axiosOptions); - }; - - return { mutationFn, ...mutationOptions }; }; - -export type SaveSettingsMutationResult = NonNullable< - Awaited> ->; -export type SaveSettingsMutationBody = InstallationSettings; -export type SaveSettingsMutationError = AxiosError; - -/** - * @summary Save Settings - */ -export const useSaveSettings = < +export function useListQuarantineRecordsSuspense< + TData = Awaited>, TError = AxiosError, - TContext = unknown, >( - options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: InstallationSettings }, - TContext + runId: string, + params?: ListQuarantineRecordsParams, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - { data: InstallationSettings }, - TContext -> => { - const mutationOptions = getSaveSettingsMutationOptions(options); - - return useMutation(mutationOptions, queryClient); +): UseSuspenseQueryResult & { + queryKey: DataTag; }; - /** - * Add a comment to a run or rule. - * @summary Add Comment + * @summary List Quarantine Records */ -export const addComment = ( - addCommentIn: AddCommentIn, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.post(`/api/v1/comments`, addCommentIn, options); -}; - -export const getAddCommentMutationOptions = < - TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: AddCommentIn }, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - { data: AddCommentIn }, - TContext -> => { - const mutationKey = ["addComment"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; - - const mutationFn: MutationFunction< - Awaited>, - { data: AddCommentIn } - > = (props) => { - const { data } = props ?? {}; - - return addComment(data, axiosOptions); - }; - - return { mutationFn, ...mutationOptions }; -}; - -export type AddCommentMutationResult = NonNullable< - Awaited> ->; -export type AddCommentMutationBody = AddCommentIn; -export type AddCommentMutationError = AxiosError; -/** - * @summary Add Comment - */ -export const useAddComment = < +export function useListQuarantineRecordsSuspense< + TData = Awaited>, TError = AxiosError, - TContext = unknown, >( + runId: string, + params?: ListQuarantineRecordsParams, options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { data: AddCommentIn }, - TContext + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - { data: AddCommentIn }, - TContext -> => { - const mutationOptions = getAddCommentMutationOptions(options); +): UseSuspenseQueryResult & { + queryKey: DataTag; +} { + const queryOptions = getListQuarantineRecordsSuspenseQueryOptions( + runId, + params, + options, + ); - return useMutation(mutationOptions, queryClient); -}; + const query = useSuspenseQuery( + queryOptions, + queryClient, + ) as UseSuspenseQueryResult & { + queryKey: DataTag; + }; + + query.queryKey = queryOptions.queryKey; + + return query; +} /** - * List comments for a specific run or rule. - * @summary List Comments + * @summary Get Quarantine Count */ -export const listComments = ( - params: ListCommentsParams, +export const getQuarantineCount = ( + runId: string, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/comments`, { - ...options, - params: { ...params, ...options?.params }, - }); +): Promise> => { + return axios.default.get(`/api/v1/quarantine/runs/${runId}/count`, options); }; -export const getListCommentsQueryKey = (params?: ListCommentsParams) => { - return [`/api/v1/comments`, ...(params ? [params] : [])] as const; +export const getGetQuarantineCountQueryKey = (runId?: string) => { + return [`/api/v1/quarantine/runs/${runId}/count`] as const; }; -export const getListCommentsQueryOptions = < - TData = Awaited>, +export const getGetQuarantineCountQueryOptions = < + TData = Awaited>, TError = AxiosError, >( - params: ListCommentsParams, + runId: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, ) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getListCommentsQueryKey(params); + const queryKey = + queryOptions?.queryKey ?? getGetQuarantineCountQueryKey(runId); - const queryFn: QueryFunction>> = ({ - signal, - }) => listComments(params, { signal, ...axiosOptions }); + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => getQuarantineCount(runId, { signal, ...axiosOptions }); - return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< - Awaited>, + return { + queryKey, + queryFn, + enabled: !!runId, + ...queryOptions, + } as UseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListCommentsQueryResult = NonNullable< - Awaited> +export type GetQuarantineCountQueryResult = NonNullable< + Awaited> >; -export type ListCommentsQueryError = AxiosError; +export type GetQuarantineCountQueryError = AxiosError; -export function useListComments< - TData = Awaited>, +export function useGetQuarantineCount< + TData = Awaited>, TError = AxiosError, >( - params: ListCommentsParams, + runId: string, options: { query: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -12799,20 +14448,24 @@ export function useListComments< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useListComments< - TData = Awaited>, +export function useGetQuarantineCount< + TData = Awaited>, TError = AxiosError, >( - params: ListCommentsParams, + runId: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -12822,14 +14475,18 @@ export function useListComments< ): UseQueryResult & { queryKey: DataTag; }; -export function useListComments< - TData = Awaited>, +export function useGetQuarantineCount< + TData = Awaited>, TError = AxiosError, >( - params: ListCommentsParams, + runId: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, @@ -12838,17 +14495,21 @@ export function useListComments< queryKey: DataTag; }; /** - * @summary List Comments + * @summary Get Quarantine Count */ -export function useListComments< - TData = Awaited>, +export function useGetQuarantineCount< + TData = Awaited>, TError = AxiosError, >( - params: ListCommentsParams, + runId: string, options?: { query?: Partial< - UseQueryOptions>, TError, TData> + UseQueryOptions< + Awaited>, + TError, + TData + > >; axios?: AxiosRequestConfig; }, @@ -12856,7 +14517,7 @@ export function useListComments< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListCommentsQueryOptions(params, options); + const queryOptions = getGetQuarantineCountQueryOptions(runId, options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -12868,15 +14529,15 @@ export function useListComments< return query; } -export const getListCommentsSuspenseQueryOptions = < - TData = Awaited>, +export const getGetQuarantineCountSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, >( - params: ListCommentsParams, + runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -12886,33 +14547,35 @@ export const getListCommentsSuspenseQueryOptions = < ) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getListCommentsQueryKey(params); + const queryKey = + queryOptions?.queryKey ?? getGetQuarantineCountQueryKey(runId); - const queryFn: QueryFunction>> = ({ - signal, - }) => listComments(params, { signal, ...axiosOptions }); + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => getQuarantineCount(runId, { signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListCommentsSuspenseQueryResult = NonNullable< - Awaited> +export type GetQuarantineCountSuspenseQueryResult = NonNullable< + Awaited> >; -export type ListCommentsSuspenseQueryError = AxiosError; +export type GetQuarantineCountSuspenseQueryError = + AxiosError; -export function useListCommentsSuspense< - TData = Awaited>, +export function useGetQuarantineCountSuspense< + TData = Awaited>, TError = AxiosError, >( - params: ListCommentsParams, + runId: string, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -12923,15 +14586,15 @@ export function useListCommentsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListCommentsSuspense< - TData = Awaited>, +export function useGetQuarantineCountSuspense< + TData = Awaited>, TError = AxiosError, >( - params: ListCommentsParams, + runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -12942,15 +14605,15 @@ export function useListCommentsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListCommentsSuspense< - TData = Awaited>, +export function useGetQuarantineCountSuspense< + TData = Awaited>, TError = AxiosError, >( - params: ListCommentsParams, + runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -12962,18 +14625,18 @@ export function useListCommentsSuspense< queryKey: DataTag; }; /** - * @summary List Comments + * @summary Get Quarantine Count */ -export function useListCommentsSuspense< - TData = Awaited>, +export function useGetQuarantineCountSuspense< + TData = Awaited>, TError = AxiosError, >( - params: ListCommentsParams, + runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -12984,7 +14647,10 @@ export function useListCommentsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListCommentsSuspenseQueryOptions(params, options); + const queryOptions = getGetQuarantineCountSuspenseQueryOptions( + runId, + options, + ); const query = useSuspenseQuery( queryOptions, @@ -12999,122 +14665,40 @@ export function useListCommentsSuspense< } /** - * Delete a comment (only the author can delete their own comments). - * @summary Delete Comment - */ -export const deleteComment = ( - commentId: string, - options?: AxiosRequestConfig, -): Promise> => { - return axios.default.delete(`/api/v1/comments/${commentId}`, options); -}; - -export const getDeleteCommentMutationOptions = < - TError = AxiosError, - TContext = unknown, ->(options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { commentId: string }, - TContext - >; - axios?: AxiosRequestConfig; -}): UseMutationOptions< - Awaited>, - TError, - { commentId: string }, - TContext -> => { - const mutationKey = ["deleteComment"]; - const { mutation: mutationOptions, axios: axiosOptions } = options - ? options.mutation && - "mutationKey" in options.mutation && - options.mutation.mutationKey - ? options - : { ...options, mutation: { ...options.mutation, mutationKey } } - : { mutation: { mutationKey }, axios: undefined }; - - const mutationFn: MutationFunction< - Awaited>, - { commentId: string } - > = (props) => { - const { commentId } = props ?? {}; - - return deleteComment(commentId, axiosOptions); - }; - - return { mutationFn, ...mutationOptions }; -}; - -export type DeleteCommentMutationResult = NonNullable< - Awaited> ->; - -export type DeleteCommentMutationError = AxiosError; - -/** - * @summary Delete Comment - */ -export const useDeleteComment = < - TError = AxiosError, - TContext = unknown, ->( - options?: { - mutation?: UseMutationOptions< - Awaited>, - TError, - { commentId: string }, - TContext - >; - axios?: AxiosRequestConfig; - }, - queryClient?: QueryClient, -): UseMutationResult< - Awaited>, - TError, - { commentId: string }, - TContext -> => { - const mutationOptions = getDeleteCommentMutationOptions(options); - - return useMutation(mutationOptions, queryClient); -}; - -/** - * @summary List Quarantine Records + * Export quarantine records for a run as CSV or JSON download (capped). + * @summary Export Quarantine Records */ -export const listQuarantineRecords = ( +export const exportQuarantineRecords = ( runId: string, - params?: ListQuarantineRecordsParams, + params?: ExportQuarantineRecordsParams, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/quarantine/runs/${runId}`, { +): Promise> => { + return axios.default.get(`/api/v1/quarantine/runs/${runId}/export`, { ...options, params: { ...params, ...options?.params }, }); }; -export const getListQuarantineRecordsQueryKey = ( +export const getExportQuarantineRecordsQueryKey = ( runId?: string, - params?: ListQuarantineRecordsParams, + params?: ExportQuarantineRecordsParams, ) => { return [ - `/api/v1/quarantine/runs/${runId}`, + `/api/v1/quarantine/runs/${runId}/export`, ...(params ? [params] : []), ] as const; }; -export const getListQuarantineRecordsQueryOptions = < - TData = Awaited>, +export const getExportQuarantineRecordsQueryOptions = < + TData = Awaited>, TError = AxiosError, >( runId: string, - params?: ListQuarantineRecordsParams, + params?: ExportQuarantineRecordsParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13125,12 +14709,12 @@ export const getListQuarantineRecordsQueryOptions = < const { query: queryOptions, axios: axiosOptions } = options ?? {}; const queryKey = - queryOptions?.queryKey ?? getListQuarantineRecordsQueryKey(runId, params); + queryOptions?.queryKey ?? getExportQuarantineRecordsQueryKey(runId, params); const queryFn: QueryFunction< - Awaited> + Awaited> > = ({ signal }) => - listQuarantineRecords(runId, params, { signal, ...axiosOptions }); + exportQuarantineRecords(runId, params, { signal, ...axiosOptions }); return { queryKey, @@ -13138,36 +14722,36 @@ export const getListQuarantineRecordsQueryOptions = < enabled: !!runId, ...queryOptions, } as UseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListQuarantineRecordsQueryResult = NonNullable< - Awaited> +export type ExportQuarantineRecordsQueryResult = NonNullable< + Awaited> >; -export type ListQuarantineRecordsQueryError = AxiosError; +export type ExportQuarantineRecordsQueryError = AxiosError; -export function useListQuarantineRecords< - TData = Awaited>, +export function useExportQuarantineRecords< + TData = Awaited>, TError = AxiosError, >( runId: string, - params: undefined | ListQuarantineRecordsParams, + params: undefined | ExportQuarantineRecordsParams, options: { query: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -13177,25 +14761,25 @@ export function useListQuarantineRecords< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useListQuarantineRecords< - TData = Awaited>, +export function useExportQuarantineRecords< + TData = Awaited>, TError = AxiosError, >( runId: string, - params?: ListQuarantineRecordsParams, + params?: ExportQuarantineRecordsParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -13205,16 +14789,16 @@ export function useListQuarantineRecords< ): UseQueryResult & { queryKey: DataTag; }; -export function useListQuarantineRecords< - TData = Awaited>, +export function useExportQuarantineRecords< + TData = Awaited>, TError = AxiosError, >( runId: string, - params?: ListQuarantineRecordsParams, + params?: ExportQuarantineRecordsParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13226,19 +14810,19 @@ export function useListQuarantineRecords< queryKey: DataTag; }; /** - * @summary List Quarantine Records + * @summary Export Quarantine Records */ -export function useListQuarantineRecords< - TData = Awaited>, +export function useExportQuarantineRecords< + TData = Awaited>, TError = AxiosError, >( runId: string, - params?: ListQuarantineRecordsParams, + params?: ExportQuarantineRecordsParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13249,7 +14833,7 @@ export function useListQuarantineRecords< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListQuarantineRecordsQueryOptions( + const queryOptions = getExportQuarantineRecordsQueryOptions( runId, params, options, @@ -13265,16 +14849,16 @@ export function useListQuarantineRecords< return query; } -export const getListQuarantineRecordsSuspenseQueryOptions = < - TData = Awaited>, +export const getExportQuarantineRecordsSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, >( runId: string, - params?: ListQuarantineRecordsParams, + params?: ExportQuarantineRecordsParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13285,36 +14869,36 @@ export const getListQuarantineRecordsSuspenseQueryOptions = < const { query: queryOptions, axios: axiosOptions } = options ?? {}; const queryKey = - queryOptions?.queryKey ?? getListQuarantineRecordsQueryKey(runId, params); + queryOptions?.queryKey ?? getExportQuarantineRecordsQueryKey(runId, params); const queryFn: QueryFunction< - Awaited> + Awaited> > = ({ signal }) => - listQuarantineRecords(runId, params, { signal, ...axiosOptions }); + exportQuarantineRecords(runId, params, { signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ListQuarantineRecordsSuspenseQueryResult = NonNullable< - Awaited> +export type ExportQuarantineRecordsSuspenseQueryResult = NonNullable< + Awaited> >; -export type ListQuarantineRecordsSuspenseQueryError = +export type ExportQuarantineRecordsSuspenseQueryError = AxiosError; -export function useListQuarantineRecordsSuspense< - TData = Awaited>, +export function useExportQuarantineRecordsSuspense< + TData = Awaited>, TError = AxiosError, >( runId: string, - params: undefined | ListQuarantineRecordsParams, + params: undefined | ExportQuarantineRecordsParams, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13325,16 +14909,16 @@ export function useListQuarantineRecordsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListQuarantineRecordsSuspense< - TData = Awaited>, +export function useExportQuarantineRecordsSuspense< + TData = Awaited>, TError = AxiosError, >( runId: string, - params?: ListQuarantineRecordsParams, + params?: ExportQuarantineRecordsParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13345,16 +14929,16 @@ export function useListQuarantineRecordsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useListQuarantineRecordsSuspense< - TData = Awaited>, +export function useExportQuarantineRecordsSuspense< + TData = Awaited>, TError = AxiosError, >( runId: string, - params?: ListQuarantineRecordsParams, + params?: ExportQuarantineRecordsParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13366,19 +14950,19 @@ export function useListQuarantineRecordsSuspense< queryKey: DataTag; }; /** - * @summary List Quarantine Records + * @summary Export Quarantine Records */ -export function useListQuarantineRecordsSuspense< - TData = Awaited>, +export function useExportQuarantineRecordsSuspense< + TData = Awaited>, TError = AxiosError, >( runId: string, - params?: ListQuarantineRecordsParams, + params?: ExportQuarantineRecordsParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13389,7 +14973,7 @@ export function useListQuarantineRecordsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getListQuarantineRecordsSuspenseQueryOptions( + const queryOptions = getExportQuarantineRecordsSuspenseQueryOptions( runId, params, options, @@ -13408,28 +14992,41 @@ export function useListQuarantineRecordsSuspense< } /** - * @summary Get Quarantine Count + * Return quality metric snapshots for a specific table, ordered by time (newest first). + +Joins ``dq_metrics`` (long-format observations) to ``dq_validation_runs`` +so we recover ``run_type``, ``requesting_user`` and ``created_at`` from +the lifecycle table without duplicating them across every metric row. + * @summary Get Metrics Trend */ -export const getQuarantineCount = ( - runId: string, +export const getMetricsTrend = ( + tableFqn: string, + params?: GetMetricsTrendParams, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/quarantine/runs/${runId}/count`, options); +): Promise> => { + return axios.default.get(`/api/v1/metrics/${tableFqn}`, { + ...options, + params: { ...params, ...options?.params }, + }); }; -export const getGetQuarantineCountQueryKey = (runId?: string) => { - return [`/api/v1/quarantine/runs/${runId}/count`] as const; +export const getGetMetricsTrendQueryKey = ( + tableFqn?: string, + params?: GetMetricsTrendParams, +) => { + return [`/api/v1/metrics/${tableFqn}`, ...(params ? [params] : [])] as const; }; -export const getGetQuarantineCountQueryOptions = < - TData = Awaited>, +export const getGetMetricsTrendQueryOptions = < + TData = Awaited>, TError = AxiosError, >( - runId: string, + tableFqn: string, + params?: GetMetricsTrendParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13440,47 +15037,48 @@ export const getGetQuarantineCountQueryOptions = < const { query: queryOptions, axios: axiosOptions } = options ?? {}; const queryKey = - queryOptions?.queryKey ?? getGetQuarantineCountQueryKey(runId); + queryOptions?.queryKey ?? getGetMetricsTrendQueryKey(tableFqn, params); - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => getQuarantineCount(runId, { signal, ...axiosOptions }); + const queryFn: QueryFunction>> = ({ + signal, + }) => getMetricsTrend(tableFqn, params, { signal, ...axiosOptions }); return { queryKey, queryFn, - enabled: !!runId, + enabled: !!tableFqn, ...queryOptions, } as UseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetQuarantineCountQueryResult = NonNullable< - Awaited> +export type GetMetricsTrendQueryResult = NonNullable< + Awaited> >; -export type GetQuarantineCountQueryError = AxiosError; +export type GetMetricsTrendQueryError = AxiosError; -export function useGetQuarantineCount< - TData = Awaited>, +export function useGetMetricsTrend< + TData = Awaited>, TError = AxiosError, >( - runId: string, + tableFqn: string, + params: undefined | GetMetricsTrendParams, options: { query: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -13490,24 +15088,25 @@ export function useGetQuarantineCount< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useGetQuarantineCount< - TData = Awaited>, +export function useGetMetricsTrend< + TData = Awaited>, TError = AxiosError, >( - runId: string, + tableFqn: string, + params?: GetMetricsTrendParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -13517,15 +15116,16 @@ export function useGetQuarantineCount< ): UseQueryResult & { queryKey: DataTag; }; -export function useGetQuarantineCount< - TData = Awaited>, +export function useGetMetricsTrend< + TData = Awaited>, TError = AxiosError, >( - runId: string, + tableFqn: string, + params?: GetMetricsTrendParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13537,18 +15137,19 @@ export function useGetQuarantineCount< queryKey: DataTag; }; /** - * @summary Get Quarantine Count + * @summary Get Metrics Trend */ -export function useGetQuarantineCount< - TData = Awaited>, +export function useGetMetricsTrend< + TData = Awaited>, TError = AxiosError, >( - runId: string, + tableFqn: string, + params?: GetMetricsTrendParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13559,7 +15160,11 @@ export function useGetQuarantineCount< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetQuarantineCountQueryOptions(runId, options); + const queryOptions = getGetMetricsTrendQueryOptions( + tableFqn, + params, + options, + ); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -13571,15 +15176,16 @@ export function useGetQuarantineCount< return query; } -export const getGetQuarantineCountSuspenseQueryOptions = < - TData = Awaited>, +export const getGetMetricsTrendSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, >( - runId: string, + tableFqn: string, + params?: GetMetricsTrendParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13590,34 +15196,34 @@ export const getGetQuarantineCountSuspenseQueryOptions = < const { query: queryOptions, axios: axiosOptions } = options ?? {}; const queryKey = - queryOptions?.queryKey ?? getGetQuarantineCountQueryKey(runId); + queryOptions?.queryKey ?? getGetMetricsTrendQueryKey(tableFqn, params); - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => getQuarantineCount(runId, { signal, ...axiosOptions }); + const queryFn: QueryFunction>> = ({ + signal, + }) => getMetricsTrend(tableFqn, params, { signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetQuarantineCountSuspenseQueryResult = NonNullable< - Awaited> +export type GetMetricsTrendSuspenseQueryResult = NonNullable< + Awaited> >; -export type GetQuarantineCountSuspenseQueryError = - AxiosError; +export type GetMetricsTrendSuspenseQueryError = AxiosError; -export function useGetQuarantineCountSuspense< - TData = Awaited>, +export function useGetMetricsTrendSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, + tableFqn: string, + params: undefined | GetMetricsTrendParams, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13628,15 +15234,16 @@ export function useGetQuarantineCountSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetQuarantineCountSuspense< - TData = Awaited>, +export function useGetMetricsTrendSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, + tableFqn: string, + params?: GetMetricsTrendParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13647,15 +15254,16 @@ export function useGetQuarantineCountSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetQuarantineCountSuspense< - TData = Awaited>, +export function useGetMetricsTrendSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, + tableFqn: string, + params?: GetMetricsTrendParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13667,18 +15275,19 @@ export function useGetQuarantineCountSuspense< queryKey: DataTag; }; /** - * @summary Get Quarantine Count + * @summary Get Metrics Trend */ -export function useGetQuarantineCountSuspense< - TData = Awaited>, +export function useGetMetricsTrendSuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, + tableFqn: string, + params?: GetMetricsTrendParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13689,8 +15298,9 @@ export function useGetQuarantineCountSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetQuarantineCountSuspenseQueryOptions( - runId, + const queryOptions = getGetMetricsTrendSuspenseQueryOptions( + tableFqn, + params, options, ); @@ -13707,93 +15317,72 @@ export function useGetQuarantineCountSuspense< } /** - * Export quarantine records for a run as CSV or JSON download (capped). - * @summary Export Quarantine Records + * Return the latest pass-rate per tracked table. + +Computes pass rate inline from ``valid_row_count`` and +``input_row_count`` so we don't need a stored ``pass_rate`` column. + * @summary Get Metrics Summary */ -export const exportQuarantineRecords = ( - runId: string, - params?: ExportQuarantineRecordsParams, +export const getMetricsSummary = ( options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/quarantine/runs/${runId}/export`, { - ...options, - params: { ...params, ...options?.params }, - }); +): Promise> => { + return axios.default.get(`/api/v1/metrics`, options); }; -export const getExportQuarantineRecordsQueryKey = ( - runId?: string, - params?: ExportQuarantineRecordsParams, -) => { - return [ - `/api/v1/quarantine/runs/${runId}/export`, - ...(params ? [params] : []), - ] as const; +export const getGetMetricsSummaryQueryKey = () => { + return [`/api/v1/metrics`] as const; }; -export const getExportQuarantineRecordsQueryOptions = < - TData = Awaited>, +export const getGetMetricsSummaryQueryOptions = < + TData = Awaited>, TError = AxiosError, ->( - runId: string, - params?: ExportQuarantineRecordsParams, - options?: { - query?: Partial< - UseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; - }, -) => { +>(options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; +}) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = - queryOptions?.queryKey ?? getExportQuarantineRecordsQueryKey(runId, params); - - const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => - exportQuarantineRecords(runId, params, { signal, ...axiosOptions }); - - return { - queryKey, - queryFn, - enabled: !!runId, - ...queryOptions, - } as UseQueryOptions< - Awaited>, + const queryKey = queryOptions?.queryKey ?? getGetMetricsSummaryQueryKey(); + + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => getMetricsSummary({ signal, ...axiosOptions }); + + return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ExportQuarantineRecordsQueryResult = NonNullable< - Awaited> +export type GetMetricsSummaryQueryResult = NonNullable< + Awaited> >; -export type ExportQuarantineRecordsQueryError = AxiosError; +export type GetMetricsSummaryQueryError = AxiosError; -export function useExportQuarantineRecords< - TData = Awaited>, +export function useGetMetricsSummary< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params: undefined | ExportQuarantineRecordsParams, options: { query: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -13803,25 +15392,23 @@ export function useExportQuarantineRecords< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useExportQuarantineRecords< - TData = Awaited>, +export function useGetMetricsSummary< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params?: ExportQuarantineRecordsParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -13831,16 +15418,14 @@ export function useExportQuarantineRecords< ): UseQueryResult & { queryKey: DataTag; }; -export function useExportQuarantineRecords< - TData = Awaited>, +export function useGetMetricsSummary< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params?: ExportQuarantineRecordsParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13852,19 +15437,17 @@ export function useExportQuarantineRecords< queryKey: DataTag; }; /** - * @summary Export Quarantine Records + * @summary Get Metrics Summary */ -export function useExportQuarantineRecords< - TData = Awaited>, +export function useGetMetricsSummary< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params?: ExportQuarantineRecordsParams, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13875,11 +15458,7 @@ export function useExportQuarantineRecords< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getExportQuarantineRecordsQueryOptions( - runId, - params, - options, - ); + const queryOptions = getGetMetricsSummaryQueryOptions(options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -13891,56 +15470,48 @@ export function useExportQuarantineRecords< return query; } -export const getExportQuarantineRecordsSuspenseQueryOptions = < - TData = Awaited>, +export const getGetMetricsSummarySuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, ->( - runId: string, - params?: ExportQuarantineRecordsParams, - options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; - }, -) => { +>(options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; +}) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = - queryOptions?.queryKey ?? getExportQuarantineRecordsQueryKey(runId, params); + const queryKey = queryOptions?.queryKey ?? getGetMetricsSummaryQueryKey(); const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => - exportQuarantineRecords(runId, params, { signal, ...axiosOptions }); + Awaited> + > = ({ signal }) => getMetricsSummary({ signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type ExportQuarantineRecordsSuspenseQueryResult = NonNullable< - Awaited> +export type GetMetricsSummarySuspenseQueryResult = NonNullable< + Awaited> >; -export type ExportQuarantineRecordsSuspenseQueryError = +export type GetMetricsSummarySuspenseQueryError = AxiosError; -export function useExportQuarantineRecordsSuspense< - TData = Awaited>, +export function useGetMetricsSummarySuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params: undefined | ExportQuarantineRecordsParams, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13951,16 +15522,14 @@ export function useExportQuarantineRecordsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useExportQuarantineRecordsSuspense< - TData = Awaited>, +export function useGetMetricsSummarySuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params?: ExportQuarantineRecordsParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13971,16 +15540,14 @@ export function useExportQuarantineRecordsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useExportQuarantineRecordsSuspense< - TData = Awaited>, +export function useGetMetricsSummarySuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params?: ExportQuarantineRecordsParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -13992,19 +15559,17 @@ export function useExportQuarantineRecordsSuspense< queryKey: DataTag; }; /** - * @summary Export Quarantine Records + * @summary Get Metrics Summary */ -export function useExportQuarantineRecordsSuspense< - TData = Awaited>, +export function useGetMetricsSummarySuspense< + TData = Awaited>, TError = AxiosError, >( - runId: string, - params?: ExportQuarantineRecordsParams, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -14015,11 +15580,7 @@ export function useExportQuarantineRecordsSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getExportQuarantineRecordsSuspenseQueryOptions( - runId, - params, - options, - ); + const queryOptions = getGetMetricsSummarySuspenseQueryOptions(options); const query = useSuspenseQuery( queryOptions, @@ -14034,41 +15595,34 @@ export function useExportQuarantineRecordsSuspense< } /** - * Return quality metric snapshots for a specific table, ordered by time (newest first). + * Return the effective status for the run. -Joins ``dq_metrics`` (long-format observations) to ``dq_validation_runs`` -so we recover ``run_type``, ``requesting_user`` and ``created_at`` from -the lifecycle table without duplicating them across every metric row. - * @summary Get Metrics Trend +Falls back to the catalogue default when no explicit row exists — +the UI uses ``is_default`` to render an "(auto)" hint and skip the +``updated_by`` / ``updated_at`` metadata that would otherwise be +misleading for an unreviewed run. + * @summary Get Run Review Status */ -export const getMetricsTrend = ( - tableFqn: string, - params?: GetMetricsTrendParams, +export const getRunReviewStatus = ( + runId: string, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/metrics/${tableFqn}`, { - ...options, - params: { ...params, ...options?.params }, - }); +): Promise> => { + return axios.default.get(`/api/v1/runs/${runId}/review-status`, options); }; -export const getGetMetricsTrendQueryKey = ( - tableFqn?: string, - params?: GetMetricsTrendParams, -) => { - return [`/api/v1/metrics/${tableFqn}`, ...(params ? [params] : [])] as const; +export const getGetRunReviewStatusQueryKey = (runId?: string) => { + return [`/api/v1/runs/${runId}/review-status`] as const; }; -export const getGetMetricsTrendQueryOptions = < - TData = Awaited>, +export const getGetRunReviewStatusQueryOptions = < + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, - params?: GetMetricsTrendParams, + runId: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -14079,48 +15633,47 @@ export const getGetMetricsTrendQueryOptions = < const { query: queryOptions, axios: axiosOptions } = options ?? {}; const queryKey = - queryOptions?.queryKey ?? getGetMetricsTrendQueryKey(tableFqn, params); + queryOptions?.queryKey ?? getGetRunReviewStatusQueryKey(runId); - const queryFn: QueryFunction>> = ({ - signal, - }) => getMetricsTrend(tableFqn, params, { signal, ...axiosOptions }); + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => getRunReviewStatus(runId, { signal, ...axiosOptions }); return { queryKey, queryFn, - enabled: !!tableFqn, + enabled: !!runId, ...queryOptions, } as UseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetMetricsTrendQueryResult = NonNullable< - Awaited> +export type GetRunReviewStatusQueryResult = NonNullable< + Awaited> >; -export type GetMetricsTrendQueryError = AxiosError; +export type GetRunReviewStatusQueryError = AxiosError; -export function useGetMetricsTrend< - TData = Awaited>, +export function useGetRunReviewStatus< + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, - params: undefined | GetMetricsTrendParams, + runId: string, options: { query: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -14130,25 +15683,24 @@ export function useGetMetricsTrend< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useGetMetricsTrend< - TData = Awaited>, +export function useGetRunReviewStatus< + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, - params?: GetMetricsTrendParams, + runId: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -14158,16 +15710,15 @@ export function useGetMetricsTrend< ): UseQueryResult & { queryKey: DataTag; }; -export function useGetMetricsTrend< - TData = Awaited>, +export function useGetRunReviewStatus< + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, - params?: GetMetricsTrendParams, + runId: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -14179,19 +15730,18 @@ export function useGetMetricsTrend< queryKey: DataTag; }; /** - * @summary Get Metrics Trend + * @summary Get Run Review Status */ -export function useGetMetricsTrend< - TData = Awaited>, +export function useGetRunReviewStatus< + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, - params?: GetMetricsTrendParams, + runId: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -14202,11 +15752,7 @@ export function useGetMetricsTrend< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetMetricsTrendQueryOptions( - tableFqn, - params, - options, - ); + const queryOptions = getGetRunReviewStatusQueryOptions(runId, options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -14218,16 +15764,15 @@ export function useGetMetricsTrend< return query; } -export const getGetMetricsTrendSuspenseQueryOptions = < - TData = Awaited>, +export const getGetRunReviewStatusSuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, - params?: GetMetricsTrendParams, + runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -14238,34 +15783,34 @@ export const getGetMetricsTrendSuspenseQueryOptions = < const { query: queryOptions, axios: axiosOptions } = options ?? {}; const queryKey = - queryOptions?.queryKey ?? getGetMetricsTrendQueryKey(tableFqn, params); + queryOptions?.queryKey ?? getGetRunReviewStatusQueryKey(runId); - const queryFn: QueryFunction>> = ({ - signal, - }) => getMetricsTrend(tableFqn, params, { signal, ...axiosOptions }); + const queryFn: QueryFunction< + Awaited> + > = ({ signal }) => getRunReviewStatus(runId, { signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetMetricsTrendSuspenseQueryResult = NonNullable< - Awaited> +export type GetRunReviewStatusSuspenseQueryResult = NonNullable< + Awaited> >; -export type GetMetricsTrendSuspenseQueryError = AxiosError; +export type GetRunReviewStatusSuspenseQueryError = + AxiosError; -export function useGetMetricsTrendSuspense< - TData = Awaited>, +export function useGetRunReviewStatusSuspense< + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, - params: undefined | GetMetricsTrendParams, + runId: string, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -14276,16 +15821,15 @@ export function useGetMetricsTrendSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetMetricsTrendSuspense< - TData = Awaited>, +export function useGetRunReviewStatusSuspense< + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, - params?: GetMetricsTrendParams, + runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -14296,16 +15840,15 @@ export function useGetMetricsTrendSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetMetricsTrendSuspense< - TData = Awaited>, +export function useGetRunReviewStatusSuspense< + TData = Awaited>, TError = AxiosError, >( - tableFqn: string, - params?: GetMetricsTrendParams, + runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -14317,19 +15860,18 @@ export function useGetMetricsTrendSuspense< queryKey: DataTag; }; /** - * @summary Get Metrics Trend + * @summary Get Run Review Status */ -export function useGetMetricsTrendSuspense< - TData = Awaited>, +export function useGetRunReviewStatusSuspense< + TData = Awaited>, TError = AxiosError, ->( - tableFqn: string, - params?: GetMetricsTrendParams, +>( + runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -14340,9 +15882,8 @@ export function useGetMetricsTrendSuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetMetricsTrendSuspenseQueryOptions( - tableFqn, - params, + const queryOptions = getGetRunReviewStatusSuspenseQueryOptions( + runId, options, ); @@ -14359,72 +15900,265 @@ export function useGetMetricsTrendSuspense< } /** - * Return the latest pass-rate per tracked table. + * Set the review status for a run. -Computes pass rate inline from ``valid_row_count`` and -``input_row_count`` so we don't need a stored ``pass_rate`` column. - * @summary Get Metrics Summary +Records the change in the audit history with the previous effective +value (virtual default included) so the run detail page can render +"Pending review → Acknowledged" naturally. + * @summary Set Run Review Status */ -export const getMetricsSummary = ( +export const setRunReviewStatus = ( + runId: string, + setReviewStatusIn: SetReviewStatusIn, options?: AxiosRequestConfig, -): Promise> => { - return axios.default.get(`/api/v1/metrics`, options); +): Promise> => { + return axios.default.put( + `/api/v1/runs/${runId}/review-status`, + setReviewStatusIn, + options, + ); }; -export const getGetMetricsSummaryQueryKey = () => { - return [`/api/v1/metrics`] as const; +export const getSetRunReviewStatusMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { runId: string; data: SetReviewStatusIn }, + TContext + >; + axios?: AxiosRequestConfig; +}): UseMutationOptions< + Awaited>, + TError, + { runId: string; data: SetReviewStatusIn }, + TContext +> => { + const mutationKey = ["setRunReviewStatus"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + { runId: string; data: SetReviewStatusIn } + > = (props) => { + const { runId, data } = props ?? {}; + + return setRunReviewStatus(runId, data, axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; }; -export const getGetMetricsSummaryQueryOptions = < - TData = Awaited>, +export type SetRunReviewStatusMutationResult = NonNullable< + Awaited> +>; +export type SetRunReviewStatusMutationBody = SetReviewStatusIn; +export type SetRunReviewStatusMutationError = AxiosError; + +/** + * @summary Set Run Review Status + */ +export const useSetRunReviewStatus = < TError = AxiosError, ->(options?: { - query?: Partial< - UseQueryOptions< - Awaited>, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, TError, - TData - > + { runId: string; data: SetReviewStatusIn }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { runId: string; data: SetReviewStatusIn }, + TContext +> => { + const mutationOptions = getSetRunReviewStatusMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + +/** + * Revert the run to the catalogue default. + +Drops the explicit row and appends a history entry. Useful when a +reviewer wants to "unacknowledge" something without picking another +explicit value (e.g. the run was re-classified and should go back +into the unreviewed queue). + * @summary Clear Run Review Status + */ +export const clearRunReviewStatus = ( + runId: string, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.delete(`/api/v1/runs/${runId}/review-status`, options); +}; + +export const getClearRunReviewStatusMutationOptions = < + TError = AxiosError, + TContext = unknown, +>(options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { runId: string }, + TContext >; axios?: AxiosRequestConfig; -}) => { +}): UseMutationOptions< + Awaited>, + TError, + { runId: string }, + TContext +> => { + const mutationKey = ["clearRunReviewStatus"]; + const { mutation: mutationOptions, axios: axiosOptions } = options + ? options.mutation && + "mutationKey" in options.mutation && + options.mutation.mutationKey + ? options + : { ...options, mutation: { ...options.mutation, mutationKey } } + : { mutation: { mutationKey }, axios: undefined }; + + const mutationFn: MutationFunction< + Awaited>, + { runId: string } + > = (props) => { + const { runId } = props ?? {}; + + return clearRunReviewStatus(runId, axiosOptions); + }; + + return { mutationFn, ...mutationOptions }; +}; + +export type ClearRunReviewStatusMutationResult = NonNullable< + Awaited> +>; + +export type ClearRunReviewStatusMutationError = AxiosError; + +/** + * @summary Clear Run Review Status + */ +export const useClearRunReviewStatus = < + TError = AxiosError, + TContext = unknown, +>( + options?: { + mutation?: UseMutationOptions< + Awaited>, + TError, + { runId: string }, + TContext + >; + axios?: AxiosRequestConfig; + }, + queryClient?: QueryClient, +): UseMutationResult< + Awaited>, + TError, + { runId: string }, + TContext +> => { + const mutationOptions = getClearRunReviewStatusMutationOptions(options); + + return useMutation(mutationOptions, queryClient); +}; + +/** + * Return up to 200 most-recent audit rows, newest first. + * @summary Get Run Review Status History + */ +export const getRunReviewStatusHistory = ( + runId: string, + options?: AxiosRequestConfig, +): Promise> => { + return axios.default.get( + `/api/v1/runs/${runId}/review-status/history`, + options, + ); +}; + +export const getGetRunReviewStatusHistoryQueryKey = (runId?: string) => { + return [`/api/v1/runs/${runId}/review-status/history`] as const; +}; + +export const getGetRunReviewStatusHistoryQueryOptions = < + TData = Awaited>, + TError = AxiosError, +>( + runId: string, + options?: { + query?: Partial< + UseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, +) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getGetMetricsSummaryQueryKey(); + const queryKey = + queryOptions?.queryKey ?? getGetRunReviewStatusHistoryQueryKey(runId); const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => getMetricsSummary({ signal, ...axiosOptions }); + Awaited> + > = ({ signal }) => + getRunReviewStatusHistory(runId, { signal, ...axiosOptions }); - return { queryKey, queryFn, ...queryOptions } as UseQueryOptions< - Awaited>, + return { + queryKey, + queryFn, + enabled: !!runId, + ...queryOptions, + } as UseQueryOptions< + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetMetricsSummaryQueryResult = NonNullable< - Awaited> +export type GetRunReviewStatusHistoryQueryResult = NonNullable< + Awaited> >; -export type GetMetricsSummaryQueryError = AxiosError; +export type GetRunReviewStatusHistoryQueryError = + AxiosError; -export function useGetMetricsSummary< - TData = Awaited>, +export function useGetRunReviewStatusHistory< + TData = Awaited>, TError = AxiosError, >( + runId: string, options: { query: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< DefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -14434,23 +16168,24 @@ export function useGetMetricsSummary< ): DefinedUseQueryResult & { queryKey: DataTag; }; -export function useGetMetricsSummary< - TData = Awaited>, +export function useGetRunReviewStatusHistory< + TData = Awaited>, TError = AxiosError, >( + runId: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > > & Pick< UndefinedInitialDataOptions< - Awaited>, + Awaited>, TError, - Awaited> + Awaited> >, "initialData" >; @@ -14460,14 +16195,15 @@ export function useGetMetricsSummary< ): UseQueryResult & { queryKey: DataTag; }; -export function useGetMetricsSummary< - TData = Awaited>, +export function useGetRunReviewStatusHistory< + TData = Awaited>, TError = AxiosError, >( + runId: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -14479,17 +16215,18 @@ export function useGetMetricsSummary< queryKey: DataTag; }; /** - * @summary Get Metrics Summary + * @summary Get Run Review Status History */ -export function useGetMetricsSummary< - TData = Awaited>, +export function useGetRunReviewStatusHistory< + TData = Awaited>, TError = AxiosError, >( + runId: string, options?: { query?: Partial< UseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -14500,7 +16237,7 @@ export function useGetMetricsSummary< ): UseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetMetricsSummaryQueryOptions(options); + const queryOptions = getGetRunReviewStatusHistoryQueryOptions(runId, options); const query = useQuery(queryOptions, queryClient) as UseQueryResult< TData, @@ -14512,48 +16249,54 @@ export function useGetMetricsSummary< return query; } -export const getGetMetricsSummarySuspenseQueryOptions = < - TData = Awaited>, +export const getGetRunReviewStatusHistorySuspenseQueryOptions = < + TData = Awaited>, TError = AxiosError, ->(options?: { - query?: Partial< - UseSuspenseQueryOptions< - Awaited>, - TError, - TData - > - >; - axios?: AxiosRequestConfig; -}) => { +>( + runId: string, + options?: { + query?: Partial< + UseSuspenseQueryOptions< + Awaited>, + TError, + TData + > + >; + axios?: AxiosRequestConfig; + }, +) => { const { query: queryOptions, axios: axiosOptions } = options ?? {}; - const queryKey = queryOptions?.queryKey ?? getGetMetricsSummaryQueryKey(); + const queryKey = + queryOptions?.queryKey ?? getGetRunReviewStatusHistoryQueryKey(runId); const queryFn: QueryFunction< - Awaited> - > = ({ signal }) => getMetricsSummary({ signal, ...axiosOptions }); + Awaited> + > = ({ signal }) => + getRunReviewStatusHistory(runId, { signal, ...axiosOptions }); return { queryKey, queryFn, ...queryOptions } as UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > & { queryKey: DataTag }; }; -export type GetMetricsSummarySuspenseQueryResult = NonNullable< - Awaited> +export type GetRunReviewStatusHistorySuspenseQueryResult = NonNullable< + Awaited> >; -export type GetMetricsSummarySuspenseQueryError = +export type GetRunReviewStatusHistorySuspenseQueryError = AxiosError; -export function useGetMetricsSummarySuspense< - TData = Awaited>, +export function useGetRunReviewStatusHistorySuspense< + TData = Awaited>, TError = AxiosError, >( + runId: string, options: { query: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -14564,14 +16307,15 @@ export function useGetMetricsSummarySuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetMetricsSummarySuspense< - TData = Awaited>, +export function useGetRunReviewStatusHistorySuspense< + TData = Awaited>, TError = AxiosError, >( + runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -14582,14 +16326,15 @@ export function useGetMetricsSummarySuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; }; -export function useGetMetricsSummarySuspense< - TData = Awaited>, +export function useGetRunReviewStatusHistorySuspense< + TData = Awaited>, TError = AxiosError, >( + runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -14601,17 +16346,18 @@ export function useGetMetricsSummarySuspense< queryKey: DataTag; }; /** - * @summary Get Metrics Summary + * @summary Get Run Review Status History */ -export function useGetMetricsSummarySuspense< - TData = Awaited>, +export function useGetRunReviewStatusHistorySuspense< + TData = Awaited>, TError = AxiosError, >( + runId: string, options?: { query?: Partial< UseSuspenseQueryOptions< - Awaited>, + Awaited>, TError, TData > @@ -14622,7 +16368,10 @@ export function useGetMetricsSummarySuspense< ): UseSuspenseQueryResult & { queryKey: DataTag; } { - const queryOptions = getGetMetricsSummarySuspenseQueryOptions(options); + const queryOptions = getGetRunReviewStatusHistorySuspenseQueryOptions( + runId, + options, + ); const query = useSuspenseQuery( queryOptions, diff --git a/app/src/databricks_labs_dqx_app/ui/routes/_sidebar/config.tsx b/app/src/databricks_labs_dqx_app/ui/routes/_sidebar/config.tsx index 997467709..84c638fd6 100644 --- a/app/src/databricks_labs_dqx_app/ui/routes/_sidebar/config.tsx +++ b/app/src/databricks_labs_dqx_app/ui/routes/_sidebar/config.tsx @@ -3,7 +3,7 @@ import { QueryErrorResetBoundary, useQueryClient } from "@tanstack/react-query"; import { ErrorBoundary } from "react-error-boundary"; import { Button } from "@/components/ui/button"; import { PageBreadcrumb } from "@/components/apx/PageBreadcrumb"; -import { AlertCircle, Clock, Globe, Loader2, Search, Tags, Plus, Trash2, X } from "lucide-react"; +import { AlertCircle, CheckCircle2, Circle, Clock, Globe, LayoutDashboard, Loader2, Search, Tags, Plus, Trash2, X, ExternalLink, RotateCcw, ShieldCheck } from "lucide-react"; import { FadeIn } from "@/components/anim/FadeIn"; import { ShinyText } from "@/components/anim/ShinyText"; import { RoleManagement } from "@/components/RoleManagement"; @@ -28,8 +28,16 @@ import { useRetentionSettings, useSaveRetentionSettings, getRetentionSettingsQueryKey, + useEmbeddedDashboard, + useSaveEmbeddedDashboard, + useDeleteEmbeddedDashboard, + getEmbeddedDashboardQueryKey, + useRunReviewStatuses, + useSaveRunReviewStatuses, + getRunReviewStatusesQueryKey, type LabelDefinition, type RetentionSettingsOut, + type RunReviewStatusOption, } from "@/lib/api-custom"; import type { AxiosError } from "axios"; import { toast } from "sonner"; @@ -805,6 +813,601 @@ function RetentionSettings() { ); } +// ───────────────────────────────────────────────────────────────────────────── +// Embedded Dashboard — pins a Databricks AI/BI dashboard ID into app state so +// the Insights page can render it inside an iframe. Falls back to the env +// default (set by the bundle's DQX_DEFAULT_DASHBOARD_ID) when unset, so a +// shipped starter dashboard works out-of-the-box. +// ───────────────────────────────────────────────────────────────────────────── + +function EmbeddedDashboardSettings() { + const { data, isLoading } = useEmbeddedDashboard(); + const queryClient = useQueryClient(); + const saveMutation = useSaveEmbeddedDashboard(); + const deleteMutation = useDeleteEmbeddedDashboard(); + const { data: role } = useCurrentUserRoleSuspense(); + const isAdmin = role?.data?.role === "admin"; + + const [dashboardId, setDashboardId] = useState(""); + const [title, setTitle] = useState(""); + const [hydrated, setHydrated] = useState(false); + + useEffect(() => { + if (data && !hydrated) { + // Only seed the inputs with admin-saved values. If only the env + // default is in play, leave the inputs blank so the placeholder + // copy makes clear the field is empty (and saving an empty value + // would be rejected). + if (data.is_set) { + setDashboardId(data.dashboard_id); + setTitle(data.title ?? ""); + } + setHydrated(true); + } + }, [data, hydrated]); + + const trimmedId = dashboardId.trim(); + const trimmedTitle = title.trim(); + + const isDirty = useMemo(() => { + if (!data) return false; + if (!data.is_set) return trimmedId !== ""; + return trimmedId !== data.dashboard_id || trimmedTitle !== (data.title ?? ""); + }, [data, trimmedId, trimmedTitle]); + + const validationError = useMemo(() => { + if (!trimmedId) return null; + if (!/^[A-Za-z0-9_-]{1,128}$/.test(trimmedId)) { + return "Use the ID only (letters/digits/_/-, ≤128 chars) — not a full URL."; + } + return null; + }, [trimmedId]); + + const previewUrl = useMemo(() => { + if (!data?.workspace_host || !trimmedId || validationError) return null; + return `${data.workspace_host}/dashboardsv3/${trimmedId}`; + }, [data?.workspace_host, trimmedId, validationError]); + + const handleSave = () => { + if (!trimmedId || validationError) return; + saveMutation.mutate( + { data: { dashboard_id: trimmedId, title: trimmedTitle || null } }, + { + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: getEmbeddedDashboardQueryKey() }); + toast.success("Dashboard settings saved."); + }, + onError: (err: unknown) => { + const axErr = err as AxiosError<{ detail?: string }>; + toast.error(axErr?.response?.data?.detail ?? "Failed to save dashboard settings."); + }, + }, + ); + }; + + const handleClear = () => { + deleteMutation.mutate(undefined, { + onSuccess: () => { + setDashboardId(""); + setTitle(""); + setHydrated(false); + queryClient.invalidateQueries({ queryKey: getEmbeddedDashboardQueryKey() }); + toast.success("Cleared dashboard override."); + }, + onError: () => toast.error("Failed to clear dashboard override."), + }); + }; + + if (isLoading || !data) return ; + + return ( + + + + + Insights dashboard + + + +

+ Pin a Databricks AI/BI dashboard to the Insights page. + Anyone with access to this app sees the dashboard rendered as an iframe; row-level visibility + is enforced by Unity Catalog on the underlying tables. Build your dashboard against{" "} + dq_validation_runs, dq_metrics, dq_quarantine_records, and{" "} + dq_profiling_results, then paste the ID below. +

+ + {data.is_default && !data.is_set && ( +
+ A default dashboard is configured by the deployment bundle. Saving below overrides it + for this workspace; "Restore default" reverts. +
+ )} + +
+
+ + setDashboardId(e.target.value)} + placeholder={ + data.is_default + ? `e.g. ${data.dashboard_id} (default)` + : "e.g. 01abc23d456789..." + } + disabled={!isAdmin || saveMutation.isPending || deleteMutation.isPending} + className={cn("h-8 font-mono text-xs", validationError && "border-destructive")} + autoComplete="off" + /> + {validationError && ( +

+ + {validationError} +

+ )} +

+ Find the ID in the dashboard URL after /dashboardsv3/. +

+
+
+ + setTitle(e.target.value)} + placeholder="e.g. Quality Overview" + maxLength={200} + disabled={!isAdmin || saveMutation.isPending || deleteMutation.isPending} + className="h-8 text-xs" + /> +

Shown on the Insights page header.

+
+
+ +
+ + {data.is_set && ( + + )} + {previewUrl && ( + + )} + {!isAdmin && ( + Only admins can change this setting + )} +
+
+
+ ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Run review statuses — admin-managed catalogue surfaced as the per-run +// review dropdown (Runs detail page) and as a filter on the Runs History +// page. The backend enforces the invariant "exactly one entry has +// is_default == true"; the UI mirrors that with a single radio group +// rather than per-row toggles so the constraint is obvious and the save +// button can stay enabled. +// +// Colors are mapped through a small token table here so the catalogue +// data only ever stores token names ("amber", "green", ...) — the +// design system can rebrand without a data migration. +// ───────────────────────────────────────────────────────────────────────────── + +// Token → tailwind classes. Adding a new color elsewhere in the UI +// means adding one entry here; everything else stays data-only. +const REVIEW_STATUS_COLOR_TOKENS = ["gray", "amber", "green", "blue", "red", "purple"] as const; +type ReviewStatusColorToken = (typeof REVIEW_STATUS_COLOR_TOKENS)[number]; + +const REVIEW_STATUS_COLOR_CLASSES: Record = { + gray: { + swatch: "bg-gray-300 dark:bg-gray-700 border-gray-400 dark:border-gray-600", + badge: "bg-gray-100 text-gray-800 dark:bg-gray-800 dark:text-gray-200 border-gray-300 dark:border-gray-700", + }, + amber: { + swatch: "bg-amber-400 border-amber-500", + badge: "bg-amber-100 text-amber-900 dark:bg-amber-950 dark:text-amber-200 border-amber-300 dark:border-amber-800", + }, + green: { + swatch: "bg-green-500 border-green-600", + badge: "bg-green-100 text-green-900 dark:bg-green-950 dark:text-green-200 border-green-300 dark:border-green-800", + }, + blue: { + swatch: "bg-blue-500 border-blue-600", + badge: "bg-blue-100 text-blue-900 dark:bg-blue-950 dark:text-blue-200 border-blue-300 dark:border-blue-800", + }, + red: { + swatch: "bg-red-500 border-red-600", + badge: "bg-red-100 text-red-900 dark:bg-red-950 dark:text-red-200 border-red-300 dark:border-red-800", + }, + purple: { + swatch: "bg-purple-500 border-purple-600", + badge: "bg-purple-100 text-purple-900 dark:bg-purple-950 dark:text-purple-200 border-purple-300 dark:border-purple-800", + }, +}; + +/** Normalise an arbitrary color string to a known token; unknown values fall back to gray. */ +function normaliseReviewStatusColor(value: string | undefined | null): ReviewStatusColorToken { + const lower = (value || "").trim().toLowerCase(); + return (REVIEW_STATUS_COLOR_TOKENS as readonly string[]).includes(lower) + ? (lower as ReviewStatusColorToken) + : "gray"; +} + +function ReviewStatusColorSwatch({ color }: { color: string }) { + const token = normaliseReviewStatusColor(color); + return ( + + ); +} + +// Exported helpers — re-used by the Runs detail dropdown and the +// Runs History badge so all three places render consistent colors +// without each one duplicating the token table. +export function reviewStatusBadgeClasses(color: string) { + return REVIEW_STATUS_COLOR_CLASSES[normaliseReviewStatusColor(color)].badge; +} +export { REVIEW_STATUS_COLOR_TOKENS }; + +function RunReviewStatusesSettings() { + const { data, isLoading } = useRunReviewStatuses(); + const queryClient = useQueryClient(); + const saveMutation = useSaveRunReviewStatuses(); + const { data: role } = useCurrentUserRoleSuspense(); + const isAdmin = role?.data?.role === "admin"; + + // Local working copy; never mutated in-place. We re-hydrate from + // the server response on first load and after every successful save + // so concurrent edits from another admin don't get clobbered if the + // user navigates away and back. + const [draft, setDraft] = useState([]); + const [hydrated, setHydrated] = useState(false); + + useEffect(() => { + if (data && !hydrated) { + setDraft(data.statuses.map((s) => ({ ...s }))); + setHydrated(true); + } + }, [data, hydrated]); + + // Re-derive whether the form has unsaved changes from the canonical + // server response rather than tracking a separate dirty flag — + // cheaper and avoids drift after a partial save. + const isDirty = useMemo(() => { + if (!data) return false; + if (data.statuses.length !== draft.length) return true; + return data.statuses.some((s, i) => { + const d = draft[i]; + return ( + !d || + d.value !== s.value || + d.description !== s.description || + d.color !== s.color || + d.is_default !== s.is_default + ); + }); + }, [data, draft]); + + // The save endpoint enforces exactly-one-default; mirror that in the + // UI so the button stays disabled when the constraint can't be met. + const validationError = useMemo(() => { + if (draft.length === 0) { + return "At least one status is required."; + } + const seen = new Set(); + for (const entry of draft) { + const trimmed = entry.value.trim(); + if (!trimmed) return "Every status needs a value."; + if (!/^[A-Za-z0-9][A-Za-z0-9 _\-/.]{0,79}$/.test(trimmed)) { + return `Invalid value "${trimmed}". Use letters, digits, spaces, hyphens (max 80 chars).`; + } + if (seen.has(trimmed)) return `Duplicate value: "${trimmed}".`; + seen.add(trimmed); + } + const defaults = draft.filter((d) => d.is_default).length; + if (defaults === 0) return "Pick one status as the default for unreviewed runs."; + if (defaults > 1) return "Only one status can be marked default."; + return null; + }, [draft]); + + const handleAdd = () => { + setDraft((d) => [ + ...d, + { value: "", description: "", color: "gray", is_default: false }, + ]); + }; + + const handleRemove = (idx: number) => { + setDraft((d) => d.filter((_, i) => i !== idx)); + }; + + const handlePatch = (idx: number, patch: Partial) => { + setDraft((d) => d.map((entry, i) => (i === idx ? { ...entry, ...patch } : entry))); + }; + + // Radio-group semantics on top of an array — selecting a default + // unsets the previous one rather than allowing the constraint + // violation to slip into the validation message. + const handleMakeDefault = (idx: number) => { + setDraft((d) => d.map((entry, i) => ({ ...entry, is_default: i === idx }))); + }; + + const handleSave = () => { + if (validationError) return; + const cleaned = draft.map((entry) => ({ + value: entry.value.trim(), + description: (entry.description || "").trim(), + color: normaliseReviewStatusColor(entry.color), + is_default: Boolean(entry.is_default), + })); + saveMutation.mutate( + { data: { statuses: cleaned } }, + { + onSuccess: (resp) => { + queryClient.invalidateQueries({ queryKey: getRunReviewStatusesQueryKey() }); + setDraft(resp.data.statuses.map((s) => ({ ...s }))); + toast.success("Review statuses saved."); + }, + onError: (err: unknown) => { + const axErr = err as AxiosError<{ detail?: string }>; + toast.error(axErr?.response?.data?.detail ?? "Failed to save review statuses."); + }, + }, + ); + }; + + const handleReset = () => { + if (data) setDraft(data.statuses.map((s) => ({ ...s }))); + }; + + if (isLoading || !data) { + return ; + } + + return ( + + + + + Run review statuses + + + +

+ Reviewers tag each validation run with one of these values on the{" "} + Runs detail page (next to comments). + The dropdown is filterable on the Runs History{" "} + page so the team can answer questions like "what's been acknowledged?" at a glance. + One value is the default — newly completed runs surface that value until a reviewer + changes it, so dashboards never see an empty state. +

+ +
+ {draft.map((entry, idx) => { + const colorToken = normaliseReviewStatusColor(entry.color); + return ( +
+
+
+ + handlePatch(idx, { value: e.target.value })} + placeholder="e.g. Acknowledged" + maxLength={80} + disabled={!isAdmin || saveMutation.isPending} + className="h-8 text-xs" + autoComplete="off" + /> +
+
+ + handlePatch(idx, { description: e.target.value })} + placeholder="Shown as a tooltip on the dropdown" + maxLength={200} + disabled={!isAdmin || saveMutation.isPending} + className="h-8 text-xs" + /> +
+
+ + + + + + +
+ {REVIEW_STATUS_COLOR_TOKENS.map((tok) => ( + + ))} +
+
+
+
+
+ + +
+
+ +
+ ); + })} +
+ + + + {validationError && ( +

+ + {validationError} +

+ )} + +
+ + {isDirty && ( + + )} + {!isAdmin && ( + Only admins can change this setting + )} +
+ +
+

+ Renaming a value doesn't rewrite existing + run history — historical entries keep the old text so the audit trail stays accurate. + To retire a value cleanly, leave it in the list (not as default) until the affected + runs age out. +

+
+
+
+ ); +} + function ConfigPage() { const { isAdmin } = usePermissions(); const navigate = useNavigate(); @@ -853,11 +1456,25 @@ function ConfigPage() { }> - + + + }> + + + + + + + }> + + + + + diff --git a/app/src/databricks_labs_dqx_app/ui/routes/_sidebar/insights.tsx b/app/src/databricks_labs_dqx_app/ui/routes/_sidebar/insights.tsx new file mode 100644 index 000000000..7f5036011 --- /dev/null +++ b/app/src/databricks_labs_dqx_app/ui/routes/_sidebar/insights.tsx @@ -0,0 +1,159 @@ +import { createFileRoute, Link } from "@tanstack/react-router"; +import { QueryErrorResetBoundary } from "@tanstack/react-query"; +import { ErrorBoundary } from "react-error-boundary"; +import { Suspense, useMemo } from "react"; +import { AlertCircle, ExternalLink, LayoutDashboard, Settings } from "lucide-react"; + +import { Button } from "@/components/ui/button"; +import { Skeleton } from "@/components/ui/skeleton"; +import { PageBreadcrumb } from "@/components/apx/PageBreadcrumb"; +import { ShinyText } from "@/components/anim/ShinyText"; +import { FadeIn } from "@/components/anim/FadeIn"; +import { usePermissions } from "@/hooks/use-permissions"; +import { useEmbeddedDashboard } from "@/lib/api-custom"; + +export const Route = createFileRoute("/_sidebar/insights")({ + component: () => , +}); + +function SectionError({ resetErrorBoundary }: { resetErrorBoundary: () => void }) { + return ( +
+

+ Failed to load Insights +

+ +
+ ); +} + +function EmptyState({ isAdmin }: { isAdmin: boolean }) { + return ( +
+
+
+ +
+
+

No dashboard configured

+

+ {isAdmin + ? "Pin a Databricks AI/BI dashboard ID in Configuration to render it here." + : "Ask an administrator to pin a dashboard in Configuration."} +

+
+ {isAdmin && ( + + )} +
+
+ ); +} + +function DashboardFrame() { + const { data, isLoading } = useEmbeddedDashboard(); + const { isAdmin } = usePermissions(); + + const embedUrl = useMemo(() => { + if (!data?.dashboard_id || !data.workspace_host) return null; + return `${data.workspace_host}/embed/dashboardsv3/${data.dashboard_id}`; + }, [data]); + + const openUrl = useMemo(() => { + if (!data?.dashboard_id || !data.workspace_host) return null; + return `${data.workspace_host}/dashboardsv3/${data.dashboard_id}`; + }, [data]); + + if (isLoading) { + return ; + } + + if (!data || !embedUrl) { + return ; + } + + const title = data.title || "Quality dashboard"; + + return ( +
+
+
+

{title}

+ {data.is_default && !data.is_set && ( + + Default + + )} +
+
+ {openUrl && ( + + )} + {isAdmin && ( + + )} +
+
+ {/* Same-workspace iframes inherit Databricks Apps' user-token-passthrough + cookies, so the dashboard sees each viewer's identity and UC enforces + row-level visibility on the data behind it. We deliberately don't + inject tokens or proxy the response — keeps PII off the app server. */} +