From 284058cac86c641040668ba8fc82eed0f76468a8 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 09:03:10 +0200 Subject: [PATCH 01/15] Add prek checks --- .gitignore | 3 + .prek/README.md | 118 ++++++++++++++++++++++ .prek/fingerprint.py | 87 ++++++++++++++++ .prek/gate.py | 212 +++++++++++++++++++++++++++++++++++++++ .prek/local.example.toml | 8 ++ .prek/pre-push-gate.sh | 4 + .prek/prek.toml | 11 ++ .prek/scopes.toml | 34 +++++++ CONTRIBUTING.md | 7 ++ Makefile | 23 ++++- 10 files changed, 505 insertions(+), 2 deletions(-) create mode 100644 .prek/README.md create mode 100644 .prek/fingerprint.py create mode 100644 .prek/gate.py create mode 100644 .prek/local.example.toml create mode 100755 .prek/pre-push-gate.sh create mode 100644 .prek/prek.toml create mode 100644 .prek/scopes.toml diff --git a/.gitignore b/.gitignore index a6090d76b1..934fbb428e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Prek automatic checks +.prek/local.toml +.prek/.state.toml # Claude Code .worktrees diff --git a/.prek/README.md b/.prek/README.md new file mode 100644 index 0000000000..e2da41c69a --- /dev/null +++ b/.prek/README.md @@ -0,0 +1,118 @@ +# Pre-push local verification + +Optional pre-push checks via [prek](https://github.com/pre-commit/prek): run `make lint` and/or +`make test-common-p` before you push, but only when files in each check’s scope have changed since +the last successful run. + +The hook is **opt-in**. Without `.prek/local.toml`, the pre-push hook does nothing. + +## Quick start + +1. Copy `local.example.toml` to `local.toml` and set each check `mode` (`off`, `auto`, or `confirm`). +2. From the repo root: `make setup-hooks` +3. Push as usual. Bypass once: `git push --no-verify` +4. Preview without pushing: `make prek-dry`. Run the gate manually: `make prek` +5. Remove the hook: `make uninstall-hooks` + +## Prerequisites + +- Root dev env: `make dev` (for `make lint` and `make test-common-p`) +- Docs lint is part of `make lint`. Set up the docs project once: `cd docs && make dev` +- Optional `[gate] only_when_pr_open = true`: requires [GitHub CLI](https://cli.github.com/) (`gh auth login`) + +## Configuration (`local.toml`) + +Copy from `local.example.toml`. Gitignored — per-developer only. + +| Section | Keys | Meaning | +|---------|------|---------| +| `[gate]` | `only_when_pr_open` | If `true`, skip all checks unless the current branch has an open PR (`gh pr view`) | +| `[lint]` | `mode` | How to handle stale lint scope | +| `[test_common_p]` | `mode` | How to handle stale common-test scope | + +### Modes + +| Mode | When scope is stale | +|------|---------------------| +| `off` | Never run this check | +| `auto` | Run the make target | +| `confirm` | Ask on the terminal; declining aborts the push | + +**Confirm prompts** look like: `Run make lint before push? [Y/n] ` + +- Enter or `y` / `yes` → run the check +- `n` / `no` → abort push +- Non-interactive stdin (no TTY) → treated as declined (push blocked) + +## What runs + +Checks run in order; a failed lint blocks tests. + +| Check | Make target | Command recorded in state | +|-------|-------------|---------------------------| +| `lint` | `lint` | `make lint` | +| `test_common_p` | `test-common-p` | `make test-common-p` | + +`make lint` includes root linters plus docs lint (`cd docs && make lint`). + +A check runs only when its **fingerprint** (hash of tracked files in scope) differs from the last +successful entry in `.prek/.state.toml` (also gitignored). Passing updates state for that check only. + +## Scopes (`scopes.toml`) + +Defines which tracked files invalidate each check. Edit when adding new trees that should trigger +re-lint or re-test. + +**Lint** — `dlt`, `tests`, `tools`, `docs` (`.py`, `.md`, `.ipynb`), plus root/docs config and +embedded-snippet lint setup files. + +**Common tests** — `dlt` and selected `tests/*` suites (see `scopes.toml`), plus `pyproject.toml`, +`uv.lock`, `tests/conftest.py`, `tests/load/test_dummy_client.py`. + +Inspect a fingerprint: + +```bash +uv run python .prek/fingerprint.py lint +uv run python .prek/fingerprint.py test_common_p +``` + +## Makefile targets + +| Target | Purpose | +|--------|---------| +| `make setup-hooks` | Install prek and the pre-push hook | +| `make uninstall-hooks` | Remove the pre-push hook | +| `make prek` | Run the gate now (same logic as on push) | +| `make prek-dry` | Show what would run; no make, no state update | + +prek is installed with `uv tool install`, not as a repo dependency. + +## Troubleshooting + +**Hook never runs checks** — Ensure `.prek/local.toml` exists and at least one check has `mode` not +`off`. Run `make prek-dry` to see whether the gate is active and which checks are stale. + +**Gate skipped** — With `only_when_pr_open = true`, there must be an open PR on the current branch. + +**Docs lint fails** — Run `cd docs && make dev`, then `cd docs && make lint` to see errors. + +**Want to re-run after a pass** — Delete the check’s section from `.prek/.state.toml`, or change a +file in that check’s scope. + +**Stale fingerprint / wrong cache** — Same as above; state stores the last successful fingerprint +per check. + +## Files in this directory + +| File | Role | +|------|------| +| `README.md` | This guide | +| `local.example.toml` | Config template | +| `scopes.toml` | Fingerprint inputs per check | +| `gate.py` | Gate logic, prompts, make invocation, state | +| `fingerprint.py` | Scope hashing | +| `pre-push-gate.sh` | prek entrypoint | +| `prek.toml` | prek hook definition | +| `plan.md` | Maintainer notes / design sketch | + +Gitignored: `local.toml`, `.state.toml` diff --git a/.prek/fingerprint.py b/.prek/fingerprint.py new file mode 100644 index 0000000000..0f541a7c98 --- /dev/null +++ b/.prek/fingerprint.py @@ -0,0 +1,87 @@ +"""Compute content fingerprints for pre-push scope checks.""" + +from __future__ import annotations + +import fnmatch +import hashlib +import os +import subprocess +import sys +import tomllib +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +PREK_DIR = Path(__file__).resolve().parent +SCOPES_PATH = PREK_DIR / "scopes.toml" + + +def _git_ls_files(pathspecs: list[str]) -> list[str]: + if not pathspecs: + return [] + result = subprocess.run( + ["git", "ls-files", "--", *pathspecs], + cwd=ROOT, + check=True, + capture_output=True, + text=True, + ) + return [line for line in result.stdout.splitlines() if line] + + +def _matches_globs(path: str, globs: list[str]) -> bool: + name = os.path.basename(path) + return any(fnmatch.fnmatch(name, pattern) for pattern in globs) + + +def resolve_scope_files(scope: dict[str, list[str]]) -> list[str]: + files: set[str] = set(scope.get("files", [])) + paths = scope.get("paths", []) + globs = scope.get("globs", []) + + for path_prefix in paths: + candidates = _git_ls_files([path_prefix]) + if globs: + files.update(path for path in candidates if _matches_globs(path, globs)) + else: + files.update(candidates) + + existing = [path for path in files if (ROOT / path).is_file()] + # LC_ALL=C byte order matches default sort for ASCII repo paths. + return sorted(existing) + + +def _file_digest(path: str) -> bytes: + digest = hashlib.sha256() + with open(ROOT / path, "rb") as file: + for chunk in iter(lambda: file.read(65536), b""): + digest.update(chunk) + return digest.digest() + + +def compute_fingerprint(scope_name: str) -> str: + with open(SCOPES_PATH, "rb") as file: + scopes = tomllib.load(file) + + try: + scope = scopes["scopes"][scope_name] + except KeyError as exc: + raise SystemExit(f"Unknown scope: {scope_name}") from exc + + aggregate = hashlib.sha256() + for path in resolve_scope_files(scope): + aggregate.update(path.encode()) + aggregate.update(b"\0") + aggregate.update(_file_digest(path)) + + return aggregate.hexdigest() + + +def main() -> None: + if len(sys.argv) != 2: + raise SystemExit(f"Usage: {sys.argv[0]} ") + + print(compute_fingerprint(sys.argv[1])) + + +if __name__ == "__main__": + main() diff --git a/.prek/gate.py b/.prek/gate.py new file mode 100644 index 0000000000..28200513fc --- /dev/null +++ b/.prek/gate.py @@ -0,0 +1,212 @@ +"""Pre-push gate: run lint/tests when scope fingerprints change.""" + +from __future__ import annotations + +import argparse +import subprocess +import sys +import tomllib +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Literal + +PREK_DIR = Path(__file__).resolve().parent +ROOT = PREK_DIR.parent +sys.path.insert(0, str(PREK_DIR)) + +from fingerprint import compute_fingerprint # noqa: E402 + +LOCAL_CONFIG_PATH = PREK_DIR / "local.toml" +STATE_PATH = PREK_DIR / ".state.toml" + +TMode = Literal["off", "auto", "confirm"] +TCheck = tuple[str, str, str] + +CHECKS: list[TCheck] = [ + ("lint", "lint", "make lint"), + ("test_common_p", "test-common-p", "make test-common-p"), +] +VALID_MODES = {"off", "auto", "confirm"} + + +def _load_local_config() -> dict[str, Any]: + if not LOCAL_CONFIG_PATH.is_file(): + return {} + with open(LOCAL_CONFIG_PATH, "rb") as file: + return tomllib.load(file) + + +def _load_state() -> dict[str, dict[str, str]]: + if not STATE_PATH.is_file(): + return {} + with open(STATE_PATH, "rb") as file: + return tomllib.load(file) + + +def _write_state(state: dict[str, dict[str, str]]) -> None: + lines: list[str] = [] + for check_name, data in state.items(): + lines.append(f"[{check_name}]") + for key, value in data.items(): + lines.append(f'{key} = "{value}"') + lines.append("") + STATE_PATH.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") + + +def _as_bool(value: Any, *, key: str) -> bool: + if isinstance(value, bool): + return value + raise SystemExit(f"Invalid boolean {value!r} for {key} in {LOCAL_CONFIG_PATH}") + + +def _only_when_pr_open(local_config: dict[str, Any]) -> bool: + gate = local_config.get("gate", {}) + if not isinstance(gate, dict): + raise SystemExit(f"Invalid [gate] section in {LOCAL_CONFIG_PATH}") + value = gate.get("only_when_pr_open", False) + return _as_bool(value, key="gate.only_when_pr_open") + + +def _has_open_pr() -> bool: + result = subprocess.run( + ["gh", "pr", "view", "--json", "state", "-q", ".state"], + cwd=ROOT, + capture_output=True, + text=True, + ) + if result.returncode != 0: + return False + return result.stdout.strip().upper() == "OPEN" + + +def _gate_active(local_config: dict[str, Any]) -> tuple[bool, str]: + if not _only_when_pr_open(local_config): + return True, "only_when_pr_open=false" + if _has_open_pr(): + return True, "open PR on current branch" + return False, "only_when_pr_open=true and no open PR for current branch" + + +def _get_mode(local_config: dict[str, Any], check_name: str) -> TMode: + section = local_config.get(check_name, {}) + if not isinstance(section, dict): + raise SystemExit(f"Invalid [{check_name}] section in {LOCAL_CONFIG_PATH}") + mode = section.get("mode", "off") + if mode not in VALID_MODES: + raise SystemExit(f"Invalid mode {mode!r} for check {check_name!r} in {LOCAL_CONFIG_PATH}") + return mode # type: ignore[return-value] + + +def _confirm_run(make_command: str) -> bool: + if not sys.stdin.isatty(): + return False + reply = input(f"Run {make_command} before push? [Y/n] ").strip().lower() + if not reply: + return True + return reply in {"y", "yes"} + + +def _run_make(target: str) -> int: + print(f"Running make {target}...", flush=True) + return subprocess.run(["make", target], cwd=ROOT).returncode + + +def _plan_checks( + local_config: dict[str, Any], state: dict[str, dict[str, str]] +) -> list[tuple[TCheck, TMode, str, str, bool]]: + """Return (check, mode, fingerprint, cached_fingerprint, is_stale) per configured check.""" + planned: list[tuple[TCheck, TMode, str, str, bool]] = [] + for check in CHECKS: + check_name = check[0] + mode = _get_mode(local_config, check_name) + if mode == "off": + continue + fingerprint = compute_fingerprint(check_name) + cached = state.get(check_name, {}).get("fingerprint", "") + planned.append((check, mode, fingerprint, cached, fingerprint != cached)) + return planned + + +def _dry_run() -> int: + if not LOCAL_CONFIG_PATH.is_file(): + print("prek gate (dry-run): no .prek/local.toml — hook would no-op on push") + return 0 + + local_config = _load_local_config() + active, reason = _gate_active(local_config) + print(f"prek gate (dry-run): {LOCAL_CONFIG_PATH}") + print(f"gate active: {active} ({reason})") + if not active: + return 0 + + planned = _plan_checks(local_config, _load_state()) + if not planned: + print("no checks enabled (all off or empty config)") + return 0 + + for (check_name, _make_target, make_command), mode, fingerprint, cached, stale in planned: + if not stale: + print(f"[{check_name}] mode={mode} up to date ({fingerprint[:12]}…)") + continue + action = f"would run {make_command}" + if mode == "confirm": + if sys.stdin.isatty(): + action = f"would prompt, then run {make_command}" + else: + action = f"would block push ({make_command}, non-interactive)" + cached_label = cached[:12] if cached else "none" + print( + f"[{check_name}] mode={mode} stale ({fingerprint[:12]}…, was {cached_label}…) → {action}" + ) + return 0 + + +def main() -> int: + if not LOCAL_CONFIG_PATH.is_file(): + return 0 + + local_config = _load_local_config() + active, reason = _gate_active(local_config) + if not active: + print(f"prek gate: skipped ({reason})", file=sys.stderr) + return 0 + + state = _load_state() + + for check_name, make_target, make_command in CHECKS: + mode = _get_mode(local_config, check_name) + if mode == "off": + continue + + fingerprint = compute_fingerprint(check_name) + cached = state.get(check_name, {}) + if cached.get("fingerprint") == fingerprint: + continue + + if mode == "confirm" and not _confirm_run(make_command): + print(f"Declined {make_command}. Push aborted.", file=sys.stderr) + return 1 + + if _run_make(make_target) != 0: + print(f"{make_command} failed. Push aborted.", file=sys.stderr) + return 1 + + state[check_name] = { + "fingerprint": fingerprint, + "passed_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(), + "command": make_command, + } + _write_state(state) + + return 0 + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Pre-push gate for lint and common tests") + parser.add_argument( + "--dry-run", + action="store_true", + help="Show which checks would run without executing them or updating state", + ) + args = parser.parse_args() + raise SystemExit(_dry_run() if args.dry_run else main()) diff --git a/.prek/local.example.toml b/.prek/local.example.toml new file mode 100644 index 0000000000..e608b23104 --- /dev/null +++ b/.prek/local.example.toml @@ -0,0 +1,8 @@ +[gate] +only_when_pr_open = true # if true, run checks only when the current branch has an open PR + +[lint] +mode = "auto" # off | auto | confirm + +[test_common_p] +mode = "confirm" # off | auto | confirm diff --git a/.prek/pre-push-gate.sh b/.prek/pre-push-gate.sh new file mode 100755 index 0000000000..9d081d2a27 --- /dev/null +++ b/.prek/pre-push-gate.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(git rev-parse --show-toplevel)" +exec uv run python .prek/gate.py diff --git a/.prek/prek.toml b/.prek/prek.toml new file mode 100644 index 0000000000..e7fd989c8f --- /dev/null +++ b/.prek/prek.toml @@ -0,0 +1,11 @@ +[[repos]] +repo = "local" + +[[repos.hooks]] +id = "pre-push-gate" +name = "dlt pre-push gate" +language = "system" +entry = "bash .prek/pre-push-gate.sh" +pass_filenames = false +always_run = true +stages = ["pre-push"] diff --git a/.prek/scopes.toml b/.prek/scopes.toml new file mode 100644 index 0000000000..8612224f56 --- /dev/null +++ b/.prek/scopes.toml @@ -0,0 +1,34 @@ +[scopes.lint] +paths = ["dlt", "tests", "tools", "docs"] +globs = ["*.py", "*.md", "*.ipynb"] +files = [ + "pyproject.toml", + "uv.lock", + "Makefile", + "docs/pyproject.toml", + "docs/uv.lock", + "docs/Makefile", + "docs/docs_tools/snippets/lint_setup/template.py", + "docs/docs_tools/snippets/lint_setup/mypy.ini", +] + +[scopes.test_common_p] +paths = [ + "dlt", + "tests/common", + "tests/normalize", + "tests/extract", + "tests/pipeline", + "tests/reflection", + "tests/sources", + "tests/workspace", + "tests/libs", + "tests/destinations", +] +globs = ["*.py"] +files = [ + "pyproject.toml", + "uv.lock", + "tests/conftest.py", + "tests/load/test_dummy_client.py", +] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ec96ea3561..e37395f01c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -152,6 +152,11 @@ Our goal is to maintain stability and compatibility across all environments. Ple `dlt` uses `mypy` and `flake8` (with several plugins) for linting. You can run the linter locally with `make lint`. We also run a code formatter with `black` which you can run with `make format`. The lint step will also ensure that the code is formatted correctly. It is good practice to run `make format && make lint` before every commit. +### Pre-push hooks (optional) + +You can run `make lint` and/or `make test-common-p` automatically before each push when tracked +files in scope change. Setup and configuration: [`.prek/README.md`](.prek/README.md) (`make setup-hooks`). + ## Testing `dlt` uses `pytest` for testing. @@ -180,6 +185,8 @@ If, for any reason, you need to access the `pytest-xdist` worker id, do it with You can view our GitHub Actions setup in `.github/workflows` to see which tests are run with which dependencies / extras installed, and which platforms and python versions are used for linting and testing. The main entry point is `.github/workflows/main.yml` which orchestrates all other workflows. Certain dependencies exist, for example no tests will be run if the linter reports problems. Some workflows use test matrixes to test several destinations or run tests on various operating systems and with various python versions or dependency resolution strategies. To reduce CI execution time and improve feedback cycles, parallel test execution via `pytest-xdist` has been enabled in CI. Try to run any test suite that is involved in your development work in parallel if possible, since that is how it will be run in CI. Some CI tests have been restricted the number of workers due to destination performance reasons. +PR label `test-remote-early`: jobs that normally wait for `test_common` (destination, sources, dbt runner, etc.) start in parallel with lint instead. Lint and common still run serially. Use this label only if you run lint and common locally (see [`.prek/README.md`](.prek/README.md)). + ### Common Components To test components that don’t require external resources, run: diff --git a/Makefile b/Makefile index f77ecd4203..51f16892a2 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ .DEFAULT_GOAL := help -.PHONY: install-uv has-uv dev lint test test-common test-common-p reset-test-storage recreate-compiled-deps build-library-prerelease build-library publish-library test-load-local test-load-local-p test-load-local-postgres test-load-local-postgres-p install-snowflake-extras test-remote-snowflake test-remote-snowflake-p install-common-core test-common-core install-common-core-source test-common-core-source install-common-source install-pipeline-min test-pipeline-min install-pipeline-arrow test-pipeline-arrow install-pipeline-min-arrow test-pipeline-min-arrow install-workspace test-workspace test-workspace-dashboard install-hub-minimal test-hub-minimal test-hub install-pipeline-full test-pipeline-full install-pipeline-full-sql test-pipeline-full-sql install-sqlalchemy2 test-with-sqlalchemy-2 test-dest-load test-dest-remote-essential test-dest-remote-nonessential test-dbt-no-venv test-dbt-runner-venv test-sources-load test-sources-sql-database +.PHONY: install-uv has-uv dev lint lint-docs test test-common test-common-p reset-test-storage recreate-compiled-deps build-library-prerelease build-library publish-library test-load-local test-load-local-p test-load-local-postgres test-load-local-postgres-p install-snowflake-extras test-remote-snowflake test-remote-snowflake-p install-common-core test-common-core install-common-core-source test-common-core-source install-common-source install-pipeline-min test-pipeline-min install-pipeline-arrow test-pipeline-arrow install-pipeline-min-arrow test-pipeline-min-arrow install-workspace test-workspace test-workspace-dashboard install-hub-minimal test-hub-minimal test-hub install-pipeline-full test-pipeline-full install-pipeline-full-sql test-pipeline-full-sql install-sqlalchemy2 test-with-sqlalchemy-2 test-dest-load test-dest-remote-essential test-dest-remote-nonessential test-dbt-no-venv test-dbt-runner-venv test-sources-load test-sources-sql-database setup-hooks uninstall-hooks prek prek-dry PYV=$(shell python3 -c "import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)") .SILENT:has-uv @@ -37,7 +37,10 @@ dev-airflow: has-uv ## Prepares development environment with airflow support dev-hub: has-uv ## Prepares development environment with hub support uv sync --all-extras --group workspace-deps --group dev --group providers --group pipeline --group sources --group sentry-sdk --group ibis --group adbc --group dashboard-tests -lint: lint-core lint-security lint-docstrings lint-lock lint-deps ## Runs all linters (mypy, ruff, flake8, bandit, docstrings, lockfile, deps) +lint: lint-core lint-security lint-docstrings lint-lock lint-deps lint-docs ## Runs all linters (mypy, ruff, flake8, bandit, docstrings, lockfile, deps, docs) + +lint-docs: ## Runs docs linting (embedded snippets, notebooks, docs tooling) + cd docs && $(MAKE) lint lint-lock: ## Checks uv lockfile is in sync uv lock --check @@ -151,6 +154,7 @@ TEST_COMMON_PATHS = \ tests/libs \ tests/destinations +test-common: PYTEST_MARKERS = not rfam test-common: ## Tests common components without external resources $(call RUN_XDIST_SAFE_SPLIT,$(TEST_COMMON_PATHS)) @@ -432,3 +436,18 @@ test-e2e-dashboard-headed: ## Runs dashboard e2e tests with visible browser create-test-pipelines: ## Creates test pipelines for manual dashboard testing uv run python tests/workspace/helpers/dashboard/example_pipelines.py + +PREK_VERSION ?= 0.4.2 + +prek: ## Run pre-push gate now (same as the git hook) + uv run python .prek/gate.py + +prek-dry: ## Show what the pre-push gate would run (no make, no state update) + uv run python .prek/gate.py --dry-run + +setup-hooks: ## Install prek and pre-push hook + uv tool install prek@$(PREK_VERSION) + prek install --hook-type pre-push --config .prek/prek.toml -f + +uninstall-hooks: ## Remove pre-push hook + prek uninstall --hook-type pre-push --config .prek/prek.toml || true From 038bd476bda48223680d9e109976e9d5c35451a9 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 12:39:05 +0200 Subject: [PATCH 02/15] Add better structure of commands --- .gitignore | 1 + .prek/README.md | 42 +++++++++++++------ .prek/gate.py | 46 +++++++++++++++++---- CONTRIBUTING.md | 4 +- Makefile | 43 ++++++++++++++++--- tests/load/ducklake/test_ducklake_client.py | 2 +- 6 files changed, 110 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index 934fbb428e..b94b0db950 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Prek automatic checks .prek/local.toml .prek/.state.toml +.prek/.enabled # Claude Code .worktrees diff --git a/.prek/README.md b/.prek/README.md index e2da41c69a..8add13d477 100644 --- a/.prek/README.md +++ b/.prek/README.md @@ -1,6 +1,6 @@ # Pre-push local verification -Optional pre-push checks via [prek](https://github.com/pre-commit/prek): run `make lint` and/or +Optional pre-push checks via [prek](https://github.com/pre-commit/prek): run `make fl` and/or `make test-common-p` before you push, but only when files in each check’s scope have changed since the last successful run. @@ -9,15 +9,15 @@ The hook is **opt-in**. Without `.prek/local.toml`, the pre-push hook does nothi ## Quick start 1. Copy `local.example.toml` to `local.toml` and set each check `mode` (`off`, `auto`, or `confirm`). -2. From the repo root: `make setup-hooks` +2. From the repo root: `make install-prepush-hooks` 3. Push as usual. Bypass once: `git push --no-verify` 4. Preview without pushing: `make prek-dry`. Run the gate manually: `make prek` -5. Remove the hook: `make uninstall-hooks` +5. Remove the hook: `make uninstall-prepush-hooks` ## Prerequisites -- Root dev env: `make dev` (for `make lint` and `make test-common-p`) -- Docs lint is part of `make lint`. Set up the docs project once: `cd docs && make dev` +- Root dev env: `make dev` (for `make fl` and `make test-common-p`) +- Docs Python env: `cd docs && make dev` (once). `make fl` also runs `npm install` in `docs/website` for Biome. - Optional `[gate] only_when_pr_open = true`: requires [GitHub CLI](https://cli.github.com/) (`gh auth login`) ## Configuration (`local.toml`) @@ -38,7 +38,7 @@ Copy from `local.example.toml`. Gitignored — per-developer only. | `auto` | Run the make target | | `confirm` | Ask on the terminal; declining aborts the push | -**Confirm prompts** look like: `Run make lint before push? [Y/n] ` +**Confirm prompts** look like: `Run make fl before push? [Y/n] ` - Enter or `y` / `yes` → run the check - `n` / `no` → abort push @@ -50,14 +50,23 @@ Checks run in order; a failed lint blocks tests. | Check | Make target | Command recorded in state | |-------|-------------|---------------------------| -| `lint` | `lint` | `make lint` | +| `lint` | `fl` | `make fl` | | `test_common_p` | `test-common-p` | `make test-common-p` | -`make lint` includes root linters plus docs lint (`cd docs && make lint`). +`make fl` runs format (root, docs, website deps) in parallel, then root and docs lint in parallel. A check runs only when its **fingerprint** (hash of tracked files in scope) differs from the last successful entry in `.prek/.state.toml` (also gitignored). Passing updates state for that check only. +After `make install-prepush-hooks`, successful `make fl` and `make test-common-p` also update +state (no extra commands). Plain `make lint` does not update prek state. + +## Unstaged changes on push + +prek may stash unstaged edits to `~/.cache/prek/patches/` while the hook runs, then restore them. +Built-in prek behavior (from pre-commit), not configurable. Keeps lint/tests from failing on WIP you +are not pushing. + ## Scopes (`scopes.toml`) Defines which tracked files invalidate each check. Edit when adding new trees that should trigger @@ -80,21 +89,30 @@ uv run python .prek/fingerprint.py test_common_p | Target | Purpose | |--------|---------| -| `make setup-hooks` | Install prek and the pre-push hook | -| `make uninstall-hooks` | Remove the pre-push hook | +| `make install-prepush-hooks` | Install prek pre-push hook and enable state recording (fails if another pre-push hook exists) | +| `make uninstall-prepush-hooks` | Remove the prek pre-push hook (no-op if none; fails if hook is not from prek) | | `make prek` | Run the gate now (same logic as on push) | | `make prek-dry` | Show what would run; no make, no state update | +| `make fl` | Format root + docs (parallel), then lint root + docs (parallel) | prek is installed with `uv tool install`, not as a repo dependency. ## Troubleshooting +**Existing pre-push hook** — `make install-prepush-hooks` refuses to install if `.git/hooks/pre-push` +already exists and is not from prek. prek cannot share the hook file with another tool. Remove or +relocate your hook first, or skip prek setup for now. + +**Uninstall with a foreign hook** — `make uninstall-prepush-hooks` only removes a prek-managed hook. +If `.git/hooks/pre-push` exists but was not installed via `make install-prepush-hooks`, uninstall +refuses to run so your hook is not deleted. + **Hook never runs checks** — Ensure `.prek/local.toml` exists and at least one check has `mode` not `off`. Run `make prek-dry` to see whether the gate is active and which checks are stale. **Gate skipped** — With `only_when_pr_open = true`, there must be an open PR on the current branch. -**Docs lint fails** — Run `cd docs && make dev`, then `cd docs && make lint` to see errors. +**Docs lint fails** — Run `cd docs && make dev`, then `make fl` (or `cd docs && make format && make lint`). **Want to re-run after a pass** — Delete the check’s section from `.prek/.state.toml`, or change a file in that check’s scope. @@ -115,4 +133,4 @@ per check. | `prek.toml` | prek hook definition | | `plan.md` | Maintainer notes / design sketch | -Gitignored: `local.toml`, `.state.toml` +Gitignored: `local.toml`, `.state.toml`, `.enabled` diff --git a/.prek/gate.py b/.prek/gate.py index 28200513fc..c7ab356b2e 100644 --- a/.prek/gate.py +++ b/.prek/gate.py @@ -18,14 +18,16 @@ LOCAL_CONFIG_PATH = PREK_DIR / "local.toml" STATE_PATH = PREK_DIR / ".state.toml" +ENABLED_PATH = PREK_DIR / ".enabled" TMode = Literal["off", "auto", "confirm"] TCheck = tuple[str, str, str] CHECKS: list[TCheck] = [ - ("lint", "lint", "make lint"), + ("lint", "fl", "make fl"), ("test_common_p", "test-common-p", "make test-common-p"), ] +CHECK_NAMES = frozenset(check[0] for check in CHECKS) VALID_MODES = {"off", "auto", "confirm"} @@ -111,6 +113,34 @@ def _run_make(target: str) -> int: return subprocess.run(["make", target], cwd=ROOT).returncode +def _make_command_for(check_name: str) -> str: + for name, _, make_command in CHECKS: + if name == check_name: + return make_command + valid = ", ".join(sorted(CHECK_NAMES)) + raise SystemExit(f"Unknown check {check_name!r}; expected one of: {valid}") + + +def _save_passed_check(check_name: str) -> None: + fingerprint = compute_fingerprint(check_name) + state = _load_state() + state[check_name] = { + "fingerprint": fingerprint, + "passed_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(), + "command": _make_command_for(check_name), + } + _write_state(state) + + +def record_check(check_name: str) -> int: + """Record a successful check from make fl / make test-common-p (requires .enabled).""" + if not ENABLED_PATH.is_file(): + return 0 + _make_command_for(check_name) + _save_passed_check(check_name) + return 0 + + def _plan_checks( local_config: dict[str, Any], state: dict[str, dict[str, str]] ) -> list[tuple[TCheck, TMode, str, str, bool]]: @@ -191,12 +221,7 @@ def main() -> int: print(f"{make_command} failed. Push aborted.", file=sys.stderr) return 1 - state[check_name] = { - "fingerprint": fingerprint, - "passed_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(), - "command": make_command, - } - _write_state(state) + _save_passed_check(check_name) return 0 @@ -208,5 +233,12 @@ def main() -> int: action="store_true", help="Show which checks would run without executing them or updating state", ) + parser.add_argument( + "--record", + metavar="CHECK", + help="Record a successful check (lint, test_common_p); used after make fl / make test-common-p", + ) args = parser.parse_args() + if args.record: + raise SystemExit(record_check(args.record)) raise SystemExit(_dry_run() if args.dry_run else main()) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e37395f01c..0b4045d0ee 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -154,8 +154,8 @@ Our goal is to maintain stability and compatibility across all environments. Ple ### Pre-push hooks (optional) -You can run `make lint` and/or `make test-common-p` automatically before each push when tracked -files in scope change. Setup and configuration: [`.prek/README.md`](.prek/README.md) (`make setup-hooks`). +You can run `make fl` and/or `make test-common-p` automatically before each push when tracked +files in scope change. Setup and configuration: [`.prek/README.md`](.prek/README.md) (`make install-prepush-hooks`). ## Testing diff --git a/Makefile b/Makefile index 51f16892a2..acadae9061 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ .DEFAULT_GOAL := help -.PHONY: install-uv has-uv dev lint lint-docs test test-common test-common-p reset-test-storage recreate-compiled-deps build-library-prerelease build-library publish-library test-load-local test-load-local-p test-load-local-postgres test-load-local-postgres-p install-snowflake-extras test-remote-snowflake test-remote-snowflake-p install-common-core test-common-core install-common-core-source test-common-core-source install-common-source install-pipeline-min test-pipeline-min install-pipeline-arrow test-pipeline-arrow install-pipeline-min-arrow test-pipeline-min-arrow install-workspace test-workspace test-workspace-dashboard install-hub-minimal test-hub-minimal test-hub install-pipeline-full test-pipeline-full install-pipeline-full-sql test-pipeline-full-sql install-sqlalchemy2 test-with-sqlalchemy-2 test-dest-load test-dest-remote-essential test-dest-remote-nonessential test-dbt-no-venv test-dbt-runner-venv test-sources-load test-sources-sql-database setup-hooks uninstall-hooks prek prek-dry +.PHONY: install-uv has-uv dev lint lint-root lint-docs format format-docs docs-website-deps fl test test-common test-common-p reset-test-storage recreate-compiled-deps build-library-prerelease build-library publish-library test-load-local test-load-local-p test-load-local-postgres test-load-local-postgres-p install-snowflake-extras test-remote-snowflake test-remote-snowflake-p install-common-core test-common-core install-common-core-source test-common-core-source install-common-source install-pipeline-min test-pipeline-min install-pipeline-arrow test-pipeline-arrow install-pipeline-min-arrow test-pipeline-min-arrow install-workspace test-workspace test-workspace-dashboard install-hub-minimal test-hub-minimal test-hub install-pipeline-full test-pipeline-full install-pipeline-full-sql test-pipeline-full-sql install-sqlalchemy2 test-with-sqlalchemy-2 test-dest-load test-dest-remote-essential test-dest-remote-nonessential test-dbt-no-venv test-dbt-runner-venv test-sources-load test-sources-sql-database install-prepush-hooks uninstall-prepush-hooks prek prek-dry PYV=$(shell python3 -c "import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)") .SILENT:has-uv @@ -37,7 +37,21 @@ dev-airflow: has-uv ## Prepares development environment with airflow support dev-hub: has-uv ## Prepares development environment with hub support uv sync --all-extras --group workspace-deps --group dev --group providers --group pipeline --group sources --group sentry-sdk --group ibis --group adbc --group dashboard-tests -lint: lint-core lint-security lint-docstrings lint-lock lint-deps lint-docs ## Runs all linters (mypy, ruff, flake8, bandit, docstrings, lockfile, deps, docs) +lint: lint-root lint-docs ## Runs all linters (mypy, ruff, flake8, bandit, docstrings, lockfile, deps, docs) + +lint-root: lint-core lint-security lint-docstrings lint-lock lint-deps ## Root linters only (no docs) + +format-docs: ## Formats docs tooling, website, examples, and notebooks + cd docs && $(MAKE) format + +docs-website-deps: ## Install docs website node deps (biome; used by make fl) + cd docs/website && npm install + +fl: ## Format then lint root and docs in parallel (prek pre-push gate) + set -e; \ + $(MAKE) format & $(MAKE) format-docs & $(MAKE) docs-website-deps & wait; \ + $(MAKE) lint-root & $(MAKE) -C docs lint & wait + @if [ -f .prek/.enabled ]; then uv run python .prek/gate.py --record lint; fi lint-docs: ## Runs docs linting (embedded snippets, notebooks, docs tooling) cd docs && $(MAKE) lint @@ -160,6 +174,7 @@ test-common: ## Tests common components without external resources test-common-p: ## Tests common components in parallel $(MAKE) test-common PYTEST_XDIST_N=auto + @if [ -f .prek/.enabled ]; then uv run python .prek/gate.py --record test_common_p; fi # ---------------------------------------------------------------------- # Local load tests @@ -445,9 +460,25 @@ prek: ## Run pre-push gate now (same as the git hook) prek-dry: ## Show what the pre-push gate would run (no make, no state update) uv run python .prek/gate.py --dry-run -setup-hooks: ## Install prek and pre-push hook +install-prepush-hooks: ## Install prek pre-push hook (fails if another pre-push hook exists) + @if [ -f .git/hooks/pre-push ] && ! grep -Fq 'File generated by prek' .git/hooks/pre-push 2>/dev/null; then \ + echo "Error: .git/hooks/pre-push already exists."; \ + echo "prek is not compatible with an existing pre-push hook."; \ + echo "Remove or relocate your hook, then run make install-prepush-hooks again."; \ + exit 1; \ + fi uv tool install prek@$(PREK_VERSION) prek install --hook-type pre-push --config .prek/prek.toml -f - -uninstall-hooks: ## Remove pre-push hook - prek uninstall --hook-type pre-push --config .prek/prek.toml || true + @touch .prek/.enabled + +uninstall-prepush-hooks: ## Remove prek pre-push hook (no-op if none; fails if hook is not from prek) + @if [ ! -f .git/hooks/pre-push ]; then \ + echo "No pre-push hook to remove."; \ + elif ! grep -Fq 'File generated by prek' .git/hooks/pre-push 2>/dev/null; then \ + echo "Error: .git/hooks/pre-push exists but was not installed by make install-prepush-hooks."; \ + echo "make uninstall-prepush-hooks will not remove it."; \ + exit 1; \ + else \ + prek uninstall --hook-type pre-push --config .prek/prek.toml; \ + fi + @rm -f .prek/.enabled diff --git a/tests/load/ducklake/test_ducklake_client.py b/tests/load/ducklake/test_ducklake_client.py index 8c942755ae..319ae51461 100644 --- a/tests/load/ducklake/test_ducklake_client.py +++ b/tests/load/ducklake/test_ducklake_client.py @@ -438,4 +438,4 @@ def test_ducklake_factory_instantiation() -> None: credentials = DuckLakeCredentials( "lake_catalog", catalog=catalog_credentials, - ) \ No newline at end of file + ) From 63e8e0259beae8b1f89523dedaae9d0a8631f7ca Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 13:09:16 +0200 Subject: [PATCH 03/15] Better code orga --- .prek/README.md | 11 +- .prek/fingerprint.py | 87 ------- .prek/gate.py | 244 ------------------ .prek/local.example.toml | 4 +- .prek/pre-push-gate.sh | 4 - .prek/prek.toml | 2 +- Makefile | 8 +- tools/prek.py | 517 +++++++++++++++++++++++++++++++++++++++ tools/tests/test_prek.py | 194 +++++++++++++++ 9 files changed, 723 insertions(+), 348 deletions(-) delete mode 100644 .prek/fingerprint.py delete mode 100644 .prek/gate.py delete mode 100755 .prek/pre-push-gate.sh create mode 100644 tools/prek.py create mode 100644 tools/tests/test_prek.py diff --git a/.prek/README.md b/.prek/README.md index 8add13d477..33eb15bd6c 100644 --- a/.prek/README.md +++ b/.prek/README.md @@ -81,8 +81,8 @@ embedded-snippet lint setup files. Inspect a fingerprint: ```bash -uv run python .prek/fingerprint.py lint -uv run python .prek/fingerprint.py test_common_p +uv run python -m tools.prek fingerprint lint +uv run python -m tools.prek fingerprint test_common_p ``` ## Makefile targets @@ -127,10 +127,9 @@ per check. | `README.md` | This guide | | `local.example.toml` | Config template | | `scopes.toml` | Fingerprint inputs per check | -| `gate.py` | Gate logic, prompts, make invocation, state | -| `fingerprint.py` | Scope hashing | -| `pre-push-gate.sh` | prek entrypoint | -| `prek.toml` | prek hook definition | +| `prek.toml` | prek hook definition (`uv run python -m tools.prek`) | | `plan.md` | Maintainer notes / design sketch | +Implementation and tests: `tools/prek.py` (run via `python -m tools.prek`). + Gitignored: `local.toml`, `.state.toml`, `.enabled` diff --git a/.prek/fingerprint.py b/.prek/fingerprint.py deleted file mode 100644 index 0f541a7c98..0000000000 --- a/.prek/fingerprint.py +++ /dev/null @@ -1,87 +0,0 @@ -"""Compute content fingerprints for pre-push scope checks.""" - -from __future__ import annotations - -import fnmatch -import hashlib -import os -import subprocess -import sys -import tomllib -from pathlib import Path - -ROOT = Path(__file__).resolve().parent.parent -PREK_DIR = Path(__file__).resolve().parent -SCOPES_PATH = PREK_DIR / "scopes.toml" - - -def _git_ls_files(pathspecs: list[str]) -> list[str]: - if not pathspecs: - return [] - result = subprocess.run( - ["git", "ls-files", "--", *pathspecs], - cwd=ROOT, - check=True, - capture_output=True, - text=True, - ) - return [line for line in result.stdout.splitlines() if line] - - -def _matches_globs(path: str, globs: list[str]) -> bool: - name = os.path.basename(path) - return any(fnmatch.fnmatch(name, pattern) for pattern in globs) - - -def resolve_scope_files(scope: dict[str, list[str]]) -> list[str]: - files: set[str] = set(scope.get("files", [])) - paths = scope.get("paths", []) - globs = scope.get("globs", []) - - for path_prefix in paths: - candidates = _git_ls_files([path_prefix]) - if globs: - files.update(path for path in candidates if _matches_globs(path, globs)) - else: - files.update(candidates) - - existing = [path for path in files if (ROOT / path).is_file()] - # LC_ALL=C byte order matches default sort for ASCII repo paths. - return sorted(existing) - - -def _file_digest(path: str) -> bytes: - digest = hashlib.sha256() - with open(ROOT / path, "rb") as file: - for chunk in iter(lambda: file.read(65536), b""): - digest.update(chunk) - return digest.digest() - - -def compute_fingerprint(scope_name: str) -> str: - with open(SCOPES_PATH, "rb") as file: - scopes = tomllib.load(file) - - try: - scope = scopes["scopes"][scope_name] - except KeyError as exc: - raise SystemExit(f"Unknown scope: {scope_name}") from exc - - aggregate = hashlib.sha256() - for path in resolve_scope_files(scope): - aggregate.update(path.encode()) - aggregate.update(b"\0") - aggregate.update(_file_digest(path)) - - return aggregate.hexdigest() - - -def main() -> None: - if len(sys.argv) != 2: - raise SystemExit(f"Usage: {sys.argv[0]} ") - - print(compute_fingerprint(sys.argv[1])) - - -if __name__ == "__main__": - main() diff --git a/.prek/gate.py b/.prek/gate.py deleted file mode 100644 index c7ab356b2e..0000000000 --- a/.prek/gate.py +++ /dev/null @@ -1,244 +0,0 @@ -"""Pre-push gate: run lint/tests when scope fingerprints change.""" - -from __future__ import annotations - -import argparse -import subprocess -import sys -import tomllib -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Literal - -PREK_DIR = Path(__file__).resolve().parent -ROOT = PREK_DIR.parent -sys.path.insert(0, str(PREK_DIR)) - -from fingerprint import compute_fingerprint # noqa: E402 - -LOCAL_CONFIG_PATH = PREK_DIR / "local.toml" -STATE_PATH = PREK_DIR / ".state.toml" -ENABLED_PATH = PREK_DIR / ".enabled" - -TMode = Literal["off", "auto", "confirm"] -TCheck = tuple[str, str, str] - -CHECKS: list[TCheck] = [ - ("lint", "fl", "make fl"), - ("test_common_p", "test-common-p", "make test-common-p"), -] -CHECK_NAMES = frozenset(check[0] for check in CHECKS) -VALID_MODES = {"off", "auto", "confirm"} - - -def _load_local_config() -> dict[str, Any]: - if not LOCAL_CONFIG_PATH.is_file(): - return {} - with open(LOCAL_CONFIG_PATH, "rb") as file: - return tomllib.load(file) - - -def _load_state() -> dict[str, dict[str, str]]: - if not STATE_PATH.is_file(): - return {} - with open(STATE_PATH, "rb") as file: - return tomllib.load(file) - - -def _write_state(state: dict[str, dict[str, str]]) -> None: - lines: list[str] = [] - for check_name, data in state.items(): - lines.append(f"[{check_name}]") - for key, value in data.items(): - lines.append(f'{key} = "{value}"') - lines.append("") - STATE_PATH.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") - - -def _as_bool(value: Any, *, key: str) -> bool: - if isinstance(value, bool): - return value - raise SystemExit(f"Invalid boolean {value!r} for {key} in {LOCAL_CONFIG_PATH}") - - -def _only_when_pr_open(local_config: dict[str, Any]) -> bool: - gate = local_config.get("gate", {}) - if not isinstance(gate, dict): - raise SystemExit(f"Invalid [gate] section in {LOCAL_CONFIG_PATH}") - value = gate.get("only_when_pr_open", False) - return _as_bool(value, key="gate.only_when_pr_open") - - -def _has_open_pr() -> bool: - result = subprocess.run( - ["gh", "pr", "view", "--json", "state", "-q", ".state"], - cwd=ROOT, - capture_output=True, - text=True, - ) - if result.returncode != 0: - return False - return result.stdout.strip().upper() == "OPEN" - - -def _gate_active(local_config: dict[str, Any]) -> tuple[bool, str]: - if not _only_when_pr_open(local_config): - return True, "only_when_pr_open=false" - if _has_open_pr(): - return True, "open PR on current branch" - return False, "only_when_pr_open=true and no open PR for current branch" - - -def _get_mode(local_config: dict[str, Any], check_name: str) -> TMode: - section = local_config.get(check_name, {}) - if not isinstance(section, dict): - raise SystemExit(f"Invalid [{check_name}] section in {LOCAL_CONFIG_PATH}") - mode = section.get("mode", "off") - if mode not in VALID_MODES: - raise SystemExit(f"Invalid mode {mode!r} for check {check_name!r} in {LOCAL_CONFIG_PATH}") - return mode # type: ignore[return-value] - - -def _confirm_run(make_command: str) -> bool: - if not sys.stdin.isatty(): - return False - reply = input(f"Run {make_command} before push? [Y/n] ").strip().lower() - if not reply: - return True - return reply in {"y", "yes"} - - -def _run_make(target: str) -> int: - print(f"Running make {target}...", flush=True) - return subprocess.run(["make", target], cwd=ROOT).returncode - - -def _make_command_for(check_name: str) -> str: - for name, _, make_command in CHECKS: - if name == check_name: - return make_command - valid = ", ".join(sorted(CHECK_NAMES)) - raise SystemExit(f"Unknown check {check_name!r}; expected one of: {valid}") - - -def _save_passed_check(check_name: str) -> None: - fingerprint = compute_fingerprint(check_name) - state = _load_state() - state[check_name] = { - "fingerprint": fingerprint, - "passed_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(), - "command": _make_command_for(check_name), - } - _write_state(state) - - -def record_check(check_name: str) -> int: - """Record a successful check from make fl / make test-common-p (requires .enabled).""" - if not ENABLED_PATH.is_file(): - return 0 - _make_command_for(check_name) - _save_passed_check(check_name) - return 0 - - -def _plan_checks( - local_config: dict[str, Any], state: dict[str, dict[str, str]] -) -> list[tuple[TCheck, TMode, str, str, bool]]: - """Return (check, mode, fingerprint, cached_fingerprint, is_stale) per configured check.""" - planned: list[tuple[TCheck, TMode, str, str, bool]] = [] - for check in CHECKS: - check_name = check[0] - mode = _get_mode(local_config, check_name) - if mode == "off": - continue - fingerprint = compute_fingerprint(check_name) - cached = state.get(check_name, {}).get("fingerprint", "") - planned.append((check, mode, fingerprint, cached, fingerprint != cached)) - return planned - - -def _dry_run() -> int: - if not LOCAL_CONFIG_PATH.is_file(): - print("prek gate (dry-run): no .prek/local.toml — hook would no-op on push") - return 0 - - local_config = _load_local_config() - active, reason = _gate_active(local_config) - print(f"prek gate (dry-run): {LOCAL_CONFIG_PATH}") - print(f"gate active: {active} ({reason})") - if not active: - return 0 - - planned = _plan_checks(local_config, _load_state()) - if not planned: - print("no checks enabled (all off or empty config)") - return 0 - - for (check_name, _make_target, make_command), mode, fingerprint, cached, stale in planned: - if not stale: - print(f"[{check_name}] mode={mode} up to date ({fingerprint[:12]}…)") - continue - action = f"would run {make_command}" - if mode == "confirm": - if sys.stdin.isatty(): - action = f"would prompt, then run {make_command}" - else: - action = f"would block push ({make_command}, non-interactive)" - cached_label = cached[:12] if cached else "none" - print( - f"[{check_name}] mode={mode} stale ({fingerprint[:12]}…, was {cached_label}…) → {action}" - ) - return 0 - - -def main() -> int: - if not LOCAL_CONFIG_PATH.is_file(): - return 0 - - local_config = _load_local_config() - active, reason = _gate_active(local_config) - if not active: - print(f"prek gate: skipped ({reason})", file=sys.stderr) - return 0 - - state = _load_state() - - for check_name, make_target, make_command in CHECKS: - mode = _get_mode(local_config, check_name) - if mode == "off": - continue - - fingerprint = compute_fingerprint(check_name) - cached = state.get(check_name, {}) - if cached.get("fingerprint") == fingerprint: - continue - - if mode == "confirm" and not _confirm_run(make_command): - print(f"Declined {make_command}. Push aborted.", file=sys.stderr) - return 1 - - if _run_make(make_target) != 0: - print(f"{make_command} failed. Push aborted.", file=sys.stderr) - return 1 - - _save_passed_check(check_name) - - return 0 - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Pre-push gate for lint and common tests") - parser.add_argument( - "--dry-run", - action="store_true", - help="Show which checks would run without executing them or updating state", - ) - parser.add_argument( - "--record", - metavar="CHECK", - help="Record a successful check (lint, test_common_p); used after make fl / make test-common-p", - ) - args = parser.parse_args() - if args.record: - raise SystemExit(record_check(args.record)) - raise SystemExit(_dry_run() if args.dry_run else main()) diff --git a/.prek/local.example.toml b/.prek/local.example.toml index e608b23104..54b35d63d6 100644 --- a/.prek/local.example.toml +++ b/.prek/local.example.toml @@ -1,8 +1,8 @@ [gate] -only_when_pr_open = true # if true, run checks only when the current branch has an open PR +only_when_pr_open = false # if true, run checks only when the current branch has an open PR [lint] mode = "auto" # off | auto | confirm [test_common_p] -mode = "confirm" # off | auto | confirm +mode = "off" # off | auto | confirm diff --git a/.prek/pre-push-gate.sh b/.prek/pre-push-gate.sh deleted file mode 100755 index 9d081d2a27..0000000000 --- a/.prek/pre-push-gate.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail -cd "$(git rev-parse --show-toplevel)" -exec uv run python .prek/gate.py diff --git a/.prek/prek.toml b/.prek/prek.toml index e7fd989c8f..bdf1dfdb32 100644 --- a/.prek/prek.toml +++ b/.prek/prek.toml @@ -5,7 +5,7 @@ repo = "local" id = "pre-push-gate" name = "dlt pre-push gate" language = "system" -entry = "bash .prek/pre-push-gate.sh" +entry = "uv run python -m tools.prek" pass_filenames = false always_run = true stages = ["pre-push"] diff --git a/Makefile b/Makefile index acadae9061..e2d2c4b399 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ fl: ## Format then lint root and docs in parallel (prek pre-push gate) set -e; \ $(MAKE) format & $(MAKE) format-docs & $(MAKE) docs-website-deps & wait; \ $(MAKE) lint-root & $(MAKE) -C docs lint & wait - @if [ -f .prek/.enabled ]; then uv run python .prek/gate.py --record lint; fi + @if [ -f .prek/.enabled ]; then uv run python -m tools.prek --record lint; fi lint-docs: ## Runs docs linting (embedded snippets, notebooks, docs tooling) cd docs && $(MAKE) lint @@ -174,7 +174,7 @@ test-common: ## Tests common components without external resources test-common-p: ## Tests common components in parallel $(MAKE) test-common PYTEST_XDIST_N=auto - @if [ -f .prek/.enabled ]; then uv run python .prek/gate.py --record test_common_p; fi + @if [ -f .prek/.enabled ]; then uv run python -m tools.prek --record test_common_p; fi # ---------------------------------------------------------------------- # Local load tests @@ -455,10 +455,10 @@ create-test-pipelines: ## Creates test pipelines for manual dashboard testing PREK_VERSION ?= 0.4.2 prek: ## Run pre-push gate now (same as the git hook) - uv run python .prek/gate.py + uv run python -m tools.prek prek-dry: ## Show what the pre-push gate would run (no make, no state update) - uv run python .prek/gate.py --dry-run + uv run python -m tools.prek --dry-run install-prepush-hooks: ## Install prek pre-push hook (fails if another pre-push hook exists) @if [ -f .git/hooks/pre-push ] && ! grep -Fq 'File generated by prek' .git/hooks/pre-push 2>/dev/null; then \ diff --git a/tools/prek.py b/tools/prek.py new file mode 100644 index 0000000000..3ab7ee4cd0 --- /dev/null +++ b/tools/prek.py @@ -0,0 +1,517 @@ +"""Pre-push gate: run lint/tests when scope fingerprints change.""" + +from __future__ import annotations + +import argparse +import fnmatch +import hashlib +import os +import subprocess +import sys +import tomllib +from collections.abc import Callable, Sequence +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Literal, NamedTuple + +Mode = Literal["off", "auto", "confirm"] +VALID_MODES = frozenset({"off", "auto", "confirm"}) + + +class Check(NamedTuple): + name: str + make_target: str + make_command: str + + +CHECKS: tuple[Check, ...] = ( + Check("lint", "fl", "make fl"), + Check("test_common_p", "test-common-p", "make test-common-p"), +) +CHECK_NAMES = frozenset(check.name for check in CHECKS) + + +class ScopeDef(NamedTuple): + files: tuple[str, ...] + paths: tuple[str, ...] + globs: tuple[str, ...] + + +class PlannedCheck(NamedTuple): + check: Check + mode: Mode + fingerprint: str + cached_fingerprint: str + stale: bool + + +class GateOutcome(NamedTuple): + exit_code: int + new_state: dict[str, dict[str, str]] + stderr_lines: tuple[str, ...] + + +class ConfigError(Exception): + """Invalid prek local configuration.""" + + +class UnknownScopeError(Exception): + """Scope name is missing from scopes.toml.""" + + +def repo_root() -> Path: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, + text=True, + ) + if result.returncode == 0: + return Path(result.stdout.strip()) + return Path.cwd() + + +def default_prek_dir() -> Path: + return repo_root() / ".prek" + + +def parse_bool(value: Any, *, key: str) -> bool: + if isinstance(value, bool): + return value + raise ConfigError(f"Invalid boolean {value!r} for {key}") + + +def parse_only_when_pr_open(local_config: dict[str, Any]) -> bool: + gate = local_config.get("gate", {}) + if not isinstance(gate, dict): + raise ConfigError("Invalid [gate] section") + return parse_bool(gate.get("only_when_pr_open", False), key="gate.only_when_pr_open") + + +def parse_mode(local_config: dict[str, Any], check_name: str) -> Mode: + section = local_config.get(check_name, {}) + if not isinstance(section, dict): + raise ConfigError(f"Invalid [{check_name}] section") + mode = section.get("mode", "off") + if mode not in VALID_MODES: + raise ConfigError(f"Invalid mode {mode!r} for check {check_name!r}") + return mode # type: ignore[return-value] + + +def make_command_for(check_name: str) -> str: + for check in CHECKS: + if check.name == check_name: + return check.make_command + valid = ", ".join(sorted(CHECK_NAMES)) + raise ConfigError(f"Unknown check {check_name!r}; expected one of: {valid}") + + +def load_toml(path: Path) -> dict: + with open(path, "rb") as file: + return tomllib.load(file) + + +def load_local_config(path: Path) -> dict | None: + if not path.is_file(): + return None + return load_toml(path) + + +def load_state(path: Path) -> dict[str, dict[str, str]]: + if not path.is_file(): + return {} + data = load_toml(path) + return {name: dict(section) for name, section in data.items() if isinstance(section, dict)} + + +def write_state(path: Path, state: dict[str, dict[str, str]]) -> None: + lines: list[str] = [] + for check_name, data in state.items(): + lines.append(f"[{check_name}]") + for key, value in data.items(): + lines.append(f'{key} = "{value}"') + lines.append("") + path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") + + +def scope_from_dict(scope: dict[str, list[str]]) -> ScopeDef: + return ScopeDef( + files=tuple(scope.get("files", [])), + paths=tuple(scope.get("paths", [])), + globs=tuple(scope.get("globs", [])), + ) + + +def matches_globs(path: str, globs: list[str]) -> bool: + name = os.path.basename(path) + return any(fnmatch.fnmatch(name, pattern) for pattern in globs) + + +def resolve_scope_files( + scope: ScopeDef, + *, + list_tracked: Callable[[list[str]], list[str]], + root: Path, +) -> list[str]: + files: set[str] = set(scope.files) + for path_prefix in scope.paths: + candidates = list_tracked([path_prefix]) + if scope.globs: + files.update(path for path in candidates if matches_globs(path, list(scope.globs))) + else: + files.update(candidates) + return sorted(path for path in files if (root / path).is_file()) + + +def fingerprint_files(paths: list[str], read_bytes: Callable[[str], bytes]) -> str: + aggregate = hashlib.sha256() + for path in paths: + aggregate.update(path.encode()) + aggregate.update(b"\0") + aggregate.update(read_bytes(path)) + return aggregate.hexdigest() + + +def load_scopes(scopes_path: Path) -> dict[str, ScopeDef]: + raw = load_toml(scopes_path) + scopes = raw.get("scopes", {}) + if not isinstance(scopes, dict): + raise ValueError("Invalid [scopes] section in scopes.toml") + return { + name: scope_from_dict(section) + for name, section in scopes.items() + if isinstance(section, dict) + } + + +def git_ls_files(root: Path, pathspecs: list[str]) -> list[str]: + if not pathspecs: + return [] + result = subprocess.run( + ["git", "ls-files", "--", *pathspecs], + cwd=root, + check=True, + capture_output=True, + text=True, + ) + return [line for line in result.stdout.splitlines() if line] + + +def make_fingerprint_fn(root: Path, scopes_path: Path) -> Callable[[str], str]: + scopes = load_scopes(scopes_path) + + def fingerprint(scope_name: str) -> str: + try: + scope = scopes[scope_name] + except KeyError as exc: + raise UnknownScopeError(f"Unknown scope: {scope_name}") from exc + paths = resolve_scope_files( + scope, + list_tracked=lambda pathspecs: git_ls_files(root, pathspecs), + root=root, + ) + return fingerprint_files(paths, lambda path: (root / path).read_bytes()) + + return fingerprint + + +def gate_active(*, only_when_pr_open: bool, has_open_pr: bool) -> tuple[bool, str]: + if not only_when_pr_open: + return True, "only_when_pr_open=false" + if has_open_pr: + return True, "open PR on current branch" + return False, "only_when_pr_open=true and no open PR for current branch" + + +def plan_checks( + checks: Sequence[Check], + local_config: dict, + state: dict[str, dict[str, str]], + fingerprint: Callable[[str], str], +) -> list[PlannedCheck]: + planned: list[PlannedCheck] = [] + for check in checks: + mode = parse_mode(local_config, check.name) + if mode == "off": + continue + current = fingerprint(check.name) + cached = state.get(check.name, {}).get("fingerprint", "") + planned.append(PlannedCheck(check, mode, current, cached, current != cached)) + return planned + + +def with_passed_check( + state: dict[str, dict[str, str]], + check_name: str, + *, + fingerprint: str, + command: str, + passed_at: datetime, +) -> dict[str, dict[str, str]]: + updated = {name: dict(data) for name, data in state.items()} + updated[check_name] = { + "fingerprint": fingerprint, + "passed_at": passed_at.replace(microsecond=0).isoformat(), + "command": command, + } + return updated + + +def dry_run_no_config_line() -> str: + return "prek gate (dry-run): no .prek/local.toml — hook would no-op on push" + + +def dry_run_lines( + planned: list[PlannedCheck], + *, + local_config_path: Path, + active: bool, + reason: str, + is_tty: bool, +) -> list[str]: + lines = [ + f"prek gate (dry-run): {local_config_path}", + f"gate active: {active} ({reason})", + ] + if not active: + return lines + if not planned: + lines.append("no checks enabled (all off or empty config)") + return lines + + for item in planned: + check_name, make_command = item.check.name, item.check.make_command + if not item.stale: + lines.append(f"[{check_name}] mode={item.mode} up to date ({item.fingerprint[:12]}…)") + continue + action = f"would run {make_command}" + if item.mode == "confirm": + action = ( + f"would prompt, then run {make_command}" + if is_tty + else f"would block push ({make_command}, non-interactive)" + ) + cached_label = item.cached_fingerprint[:12] if item.cached_fingerprint else "none" + lines.append( + f"[{check_name}] mode={item.mode} stale ({item.fingerprint[:12]}…, " + f"was {cached_label}…) → {action}" + ) + return lines + + +@dataclass(frozen=True) +class GateDeps: + run_make: Callable[[str], int] + has_open_pr: Callable[[], bool] + confirm: Callable[[str], bool] + fingerprint: Callable[[str], str] + now: Callable[[], datetime] + is_tty: Callable[[], bool] + + @classmethod + def from_repo(cls, root: Path) -> GateDeps: + prek_dir = root / ".prek" + return cls( + run_make=lambda target: _run_make_target(root, target), + has_open_pr=lambda: _has_open_pr(root), + confirm=_confirm_run, + fingerprint=make_fingerprint_fn(root, prek_dir / "scopes.toml"), + now=lambda: datetime.now(timezone.utc), + is_tty=sys.stdin.isatty, + ) + + +def _run_make_target(root: Path, target: str) -> int: + print(f"Running make {target}...", flush=True) + return subprocess.run(["make", target], cwd=root).returncode + + +def _has_open_pr(root: Path) -> bool: + result = subprocess.run( + ["gh", "pr", "view", "--json", "state", "-q", ".state"], + cwd=root, + capture_output=True, + text=True, + ) + if result.returncode != 0: + return False + return result.stdout.strip().upper() == "OPEN" + + +def _confirm_run(make_command: str) -> bool: + if not sys.stdin.isatty(): + return False + reply = input(f"Run {make_command} before push? [Y/n] ").strip().lower() + return not reply or reply in {"y", "yes"} + + +def run_gate( + *, + local_config: dict, + state: dict[str, dict[str, str]], + deps: GateDeps, + checks: Sequence[Check] = CHECKS, +) -> GateOutcome: + active, reason = gate_active( + only_when_pr_open=parse_only_when_pr_open(local_config), + has_open_pr=deps.has_open_pr(), + ) + if not active: + return GateOutcome(0, state, (f"prek gate: skipped ({reason})",)) + + new_state = {name: dict(data) for name, data in state.items()} + for check in checks: + mode = parse_mode(local_config, check.name) + if mode == "off": + continue + + fingerprint = deps.fingerprint(check.name) + if new_state.get(check.name, {}).get("fingerprint") == fingerprint: + continue + + if mode == "confirm" and not deps.confirm(check.make_command): + return GateOutcome(1, state, (f"Declined {check.make_command}. Push aborted.",)) + + if deps.run_make(check.make_target) != 0: + return GateOutcome(1, state, (f"{check.make_command} failed. Push aborted.",)) + + new_state = with_passed_check( + new_state, + check.name, + fingerprint=fingerprint, + command=check.make_command, + passed_at=deps.now(), + ) + + return GateOutcome(0, new_state, ()) + + +def record_passed_check( + *, + check_name: str, + state: dict[str, dict[str, str]], + hooks_enabled: bool, + deps: GateDeps, +) -> tuple[dict[str, dict[str, str]], str | None]: + try: + command = make_command_for(check_name) + except ConfigError as exc: + return state, str(exc) + + if not hooks_enabled: + return state, None + + return ( + with_passed_check( + state, + check_name, + fingerprint=deps.fingerprint(check_name), + command=command, + passed_at=deps.now(), + ), + None, + ) + + +def main(*, prek_dir: Path | None = None, argv: list[str] | None = None) -> int: + prek_dir = prek_dir or default_prek_dir() + parser = argparse.ArgumentParser(description="Pre-push gate for lint and common tests") + parser.add_argument("--dry-run", action="store_true", help="Show planned checks without running them") + parser.add_argument( + "--record", + metavar="CHECK", + help="Record a successful check (lint, test_common_p)", + ) + args = parser.parse_args(argv) + + root = prek_dir.parent + deps = GateDeps.from_repo(root) + local_path = prek_dir / "local.toml" + state_path = prek_dir / ".state.toml" + enabled_path = prek_dir / ".enabled" + + try: + if args.record: + return _run_record(args.record, deps, state_path, enabled_path) + if args.dry_run: + return _run_dry_run(deps, local_path, state_path) + return _run_gate(deps, local_path, state_path) + except ConfigError as exc: + print(str(exc), file=sys.stderr) + return 1 + + +def main_fingerprint(*, prek_dir: Path | None = None, argv: list[str] | None = None) -> int: + prek_dir = prek_dir or default_prek_dir() + if argv is None: + argv = sys.argv[1:] + if len(argv) != 1: + print("Usage: python -m tools.prek fingerprint ", file=sys.stderr) + return 1 + + root = prek_dir.parent + try: + print(make_fingerprint_fn(root, prek_dir / "scopes.toml")(argv[0])) + except UnknownScopeError as exc: + print(str(exc), file=sys.stderr) + return 1 + return 0 + + +def _run_record(check_name: str, deps: GateDeps, state_path: Path, enabled_path: Path) -> int: + state = load_state(state_path) + new_state, error = record_passed_check( + check_name=check_name, + state=state, + hooks_enabled=enabled_path.is_file(), + deps=deps, + ) + if error: + print(error, file=sys.stderr) + return 1 + if new_state != state: + write_state(state_path, new_state) + return 0 + + +def _run_dry_run(deps: GateDeps, local_config_path: Path, state_path: Path) -> int: + local_config = load_local_config(local_config_path) + if local_config is None: + print(dry_run_no_config_line()) + return 0 + + active, reason = gate_active( + only_when_pr_open=parse_only_when_pr_open(local_config), + has_open_pr=deps.has_open_pr(), + ) + planned = plan_checks(CHECKS, local_config, load_state(state_path), deps.fingerprint) + for line in dry_run_lines( + planned, + local_config_path=local_config_path, + active=active, + reason=reason, + is_tty=deps.is_tty(), + ): + print(line) + return 0 + + +def _run_gate(deps: GateDeps, local_config_path: Path, state_path: Path) -> int: + local_config = load_local_config(local_config_path) + if local_config is None: + return 0 + + state = load_state(state_path) + outcome = run_gate(local_config=local_config, state=state, deps=deps) + for line in outcome.stderr_lines: + print(line, file=sys.stderr) + if outcome.new_state != state: + write_state(state_path, outcome.new_state) + return outcome.exit_code + + +if __name__ == "__main__": + cli_argv = sys.argv[1:] + if cli_argv and cli_argv[0] == "fingerprint": + raise SystemExit(main_fingerprint(argv=cli_argv[1:])) + raise SystemExit(main(argv=cli_argv)) diff --git a/tools/tests/test_prek.py b/tools/tests/test_prek.py new file mode 100644 index 0000000000..a3e53963ce --- /dev/null +++ b/tools/tests/test_prek.py @@ -0,0 +1,194 @@ +"""Tests for tools/prek.py.""" + +from datetime import datetime, timezone +from pathlib import Path + +import pytest + +from tools.prek import ( + CHECKS, + ConfigError, + GateDeps, + ScopeDef, + dry_run_lines, + fingerprint_files, + gate_active, + load_state, + make_command_for, + matches_globs, + parse_bool, + parse_mode, + plan_checks, + record_passed_check, + repo_root, + resolve_scope_files, + run_gate, + with_passed_check, + write_state, +) + +FIXED_NOW = datetime(2026, 5, 29, 12, 0, tzinfo=timezone.utc) + + +def make_deps( + *, + run_make=None, + has_open_pr=lambda: True, + confirm=lambda _: True, + fingerprint=lambda name: f"fp-{name}", + is_tty=lambda: True, +) -> GateDeps: + return GateDeps( + run_make=run_make or (lambda _target: 0), + has_open_pr=has_open_pr, + confirm=confirm, + fingerprint=fingerprint, + now=lambda: FIXED_NOW, + is_tty=is_tty, + ) + + +def test_repo_root_uses_git_toplevel(monkeypatch: pytest.MonkeyPatch) -> None: + class Result: + returncode = 0 + stdout = "/repo\n" + + monkeypatch.setattr("tools.prek.subprocess.run", lambda *args, **kwargs: Result()) + assert repo_root() == Path("/repo") + + +@pytest.mark.parametrize( + ("value", "expected"), + [(True, True), (False, False)], + ids=["true", "false"], +) +def test_parse_bool(value: bool, expected: bool) -> None: + assert parse_bool(value, key="test.key") is expected + + +def test_parse_mode_and_make_command() -> None: + assert parse_mode({"lint": {"mode": "auto"}}, "lint") == "auto" + assert make_command_for("lint") == "make fl" + with pytest.raises(ConfigError, match="Unknown check"): + make_command_for("missing") + + +@pytest.mark.parametrize( + ("path", "globs", "expected"), + [ + ("tests/common/test_utils.py", ["*.py"], True), + ("tests/common/readme.md", ["*.py"], False), + ], + ids=["match", "no-match"], +) +def test_matches_globs(path: str, globs: list[str], expected: bool) -> None: + assert matches_globs(path, globs) is expected + + +def test_resolve_scope_files_and_fingerprint(tmp_path: Path) -> None: + (tmp_path / "root.toml").write_text("c", encoding="utf-8") + (tmp_path / "pkg").mkdir() + (tmp_path / "pkg" / "keep.py").write_text("a", encoding="utf-8") + + scope = ScopeDef(files=("root.toml",), paths=("pkg",), globs=("*.py",)) + paths = resolve_scope_files( + scope, + list_tracked=lambda _: ["pkg/keep.py", "pkg/skip.txt"], + root=tmp_path, + ) + assert paths == ["pkg/keep.py", "root.toml"] + + files = {"a.py": b"one", "b.py": b"two"} + assert fingerprint_files(["a.py", "b.py"], files.__getitem__) == fingerprint_files( + ["a.py", "b.py"], files.__getitem__ + ) + + +@pytest.mark.parametrize( + ("only_when_pr_open", "has_open_pr", "active"), + [(False, False, True), (True, False, False), (True, True, True)], + ids=["always", "no-pr", "open-pr"], +) +def test_gate_active(only_when_pr_open: bool, has_open_pr: bool, active: bool) -> None: + result, _reason = gate_active(only_when_pr_open=only_when_pr_open, has_open_pr=has_open_pr) + assert result is active + + +def test_plan_checks_and_dry_run() -> None: + planned = plan_checks( + CHECKS, + {"lint": {"mode": "confirm"}, "test_common_p": {"mode": "off"}}, + {}, + lambda _name: "fingerprint-value", + ) + lines = dry_run_lines( + planned, + local_config_path=Path(".prek/local.toml"), + active=True, + reason="only_when_pr_open=false", + is_tty=False, + ) + assert any("would block push" in line for line in lines) + + +def test_run_gate_flow() -> None: + calls: list[str] = [] + deps = make_deps(run_make=lambda target: calls.append(target) or 0) + + skipped = run_gate( + local_config={"gate": {"only_when_pr_open": True}, "lint": {"mode": "auto"}}, + state={}, + deps=make_deps(has_open_pr=lambda: False), + ) + assert skipped.exit_code == 0 + assert "skipped" in skipped.stderr_lines[0] + + passed = run_gate(local_config={"lint": {"mode": "auto"}}, state={}, deps=deps) + assert passed.exit_code == 0 + assert calls == ["fl"] + assert passed.new_state["lint"]["fingerprint"] == "fp-lint" + + failed = run_gate( + local_config={"lint": {"mode": "auto"}}, + state={"lint": {"fingerprint": "old"}}, + deps=make_deps(run_make=lambda _target: 1), + ) + assert failed.exit_code == 1 + assert failed.new_state == {"lint": {"fingerprint": "old"}} + + declined = run_gate( + local_config={"lint": {"mode": "confirm"}}, + state={}, + deps=make_deps(confirm=lambda _: False), + ) + assert declined.exit_code == 1 + + +def test_record_passed_check() -> None: + deps = make_deps() + unchanged, error = record_passed_check( + check_name="lint", state={}, hooks_enabled=False, deps=deps + ) + assert error is None and unchanged == {} + + updated, error = record_passed_check( + check_name="lint", state={}, hooks_enabled=True, deps=deps + ) + assert error is None + assert updated["lint"]["command"] == "make fl" + + _, error = record_passed_check(check_name="missing", state={}, hooks_enabled=True, deps=deps) + assert error is not None + + +def test_with_passed_check_and_state_io(tmp_path: Path) -> None: + state = {"lint": {"fingerprint": "old", "passed_at": "t0", "command": "make fl"}} + updated = with_passed_check( + state, "lint", fingerprint="new", command="make fl", passed_at=FIXED_NOW + ) + assert state["lint"]["fingerprint"] == "old" + assert updated["lint"]["fingerprint"] == "new" + + state_path = tmp_path / ".state.toml" + write_state(state_path, updated) + assert load_state(state_path) == updated From 0c686732082621986f9ac3225e333c016e499e30 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 13:13:57 +0200 Subject: [PATCH 04/15] Fix typing --- tools/prek.py | 20 +++++++++++--------- tools/tests/test_prek.py | 23 ++++++++++++++--------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/tools/prek.py b/tools/prek.py index 3ab7ee4cd0..16545889fc 100644 --- a/tools/prek.py +++ b/tools/prek.py @@ -8,12 +8,12 @@ import os import subprocess import sys -import tomllib +import tomllib # type: ignore[import-untyped] from collections.abc import Callable, Sequence from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import Any, Literal, NamedTuple +from typing import Any, Literal, NamedTuple, cast Mode = Literal["off", "auto", "confirm"] VALID_MODES = frozenset({"off", "auto", "confirm"}) @@ -95,7 +95,7 @@ def parse_mode(local_config: dict[str, Any], check_name: str) -> Mode: mode = section.get("mode", "off") if mode not in VALID_MODES: raise ConfigError(f"Invalid mode {mode!r} for check {check_name!r}") - return mode # type: ignore[return-value] + return cast(Mode, mode) def make_command_for(check_name: str) -> str: @@ -106,12 +106,12 @@ def make_command_for(check_name: str) -> str: raise ConfigError(f"Unknown check {check_name!r}; expected one of: {valid}") -def load_toml(path: Path) -> dict: +def load_toml(path: Path) -> dict[str, Any]: with open(path, "rb") as file: - return tomllib.load(file) + return cast(dict[str, Any], tomllib.load(file)) -def load_local_config(path: Path) -> dict | None: +def load_local_config(path: Path) -> dict[str, Any] | None: if not path.is_file(): return None return load_toml(path) @@ -225,7 +225,7 @@ def gate_active(*, only_when_pr_open: bool, has_open_pr: bool) -> tuple[bool, st def plan_checks( checks: Sequence[Check], - local_config: dict, + local_config: dict[str, Any], state: dict[str, dict[str, str]], fingerprint: Callable[[str], str], ) -> list[PlannedCheck]: @@ -347,7 +347,7 @@ def _confirm_run(make_command: str) -> bool: def run_gate( *, - local_config: dict, + local_config: dict[str, Any], state: dict[str, dict[str, str]], deps: GateDeps, checks: Sequence[Check] = CHECKS, @@ -416,7 +416,9 @@ def record_passed_check( def main(*, prek_dir: Path | None = None, argv: list[str] | None = None) -> int: prek_dir = prek_dir or default_prek_dir() parser = argparse.ArgumentParser(description="Pre-push gate for lint and common tests") - parser.add_argument("--dry-run", action="store_true", help="Show planned checks without running them") + parser.add_argument( + "--dry-run", action="store_true", help="Show planned checks without running them" + ) parser.add_argument( "--record", metavar="CHECK", diff --git a/tools/tests/test_prek.py b/tools/tests/test_prek.py index a3e53963ce..eca16e5a7f 100644 --- a/tools/tests/test_prek.py +++ b/tools/tests/test_prek.py @@ -1,7 +1,9 @@ """Tests for tools/prek.py.""" +from collections.abc import Callable from datetime import datetime, timezone from pathlib import Path +from typing import Optional import pytest @@ -32,11 +34,11 @@ def make_deps( *, - run_make=None, - has_open_pr=lambda: True, - confirm=lambda _: True, - fingerprint=lambda name: f"fp-{name}", - is_tty=lambda: True, + run_make: Optional[Callable[[str], int]] = None, + has_open_pr: Callable[[], bool] = lambda: True, + confirm: Callable[[str], bool] = lambda _: True, + fingerprint: Callable[[str], str] = lambda name: f"fp-{name}", + is_tty: Callable[[], bool] = lambda: True, ) -> GateDeps: return GateDeps( run_make=run_make or (lambda _target: 0), @@ -133,7 +135,12 @@ def test_plan_checks_and_dry_run() -> None: def test_run_gate_flow() -> None: calls: list[str] = [] - deps = make_deps(run_make=lambda target: calls.append(target) or 0) + + def record_make(target: str) -> int: + calls.append(target) + return 0 + + deps = make_deps(run_make=record_make) skipped = run_gate( local_config={"gate": {"only_when_pr_open": True}, "lint": {"mode": "auto"}}, @@ -171,9 +178,7 @@ def test_record_passed_check() -> None: ) assert error is None and unchanged == {} - updated, error = record_passed_check( - check_name="lint", state={}, hooks_enabled=True, deps=deps - ) + updated, error = record_passed_check(check_name="lint", state={}, hooks_enabled=True, deps=deps) assert error is None assert updated["lint"]["command"] == "make fl" From 7a718942896cc2a0e6162be5b6ac30d8291cb290 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 13:30:45 +0200 Subject: [PATCH 05/15] FL --- tools/prek.py | 13 +++++++++---- tools/tests/test_prek.py | 4 ++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/prek.py b/tools/prek.py index 16545889fc..e84e6d2208 100644 --- a/tools/prek.py +++ b/tools/prek.py @@ -1,5 +1,8 @@ """Pre-push gate: run lint/tests when scope fingerprints change.""" +# ruff: noqa: T201 +# flake8: noqa: T201 + from __future__ import annotations import argparse @@ -11,10 +14,12 @@ import tomllib # type: ignore[import-untyped] from collections.abc import Callable, Sequence from dataclasses import dataclass -from datetime import datetime, timezone from pathlib import Path from typing import Any, Literal, NamedTuple, cast +import pendulum +from pendulum.datetime import DateTime + Mode = Literal["off", "auto", "confirm"] VALID_MODES = frozenset({"off", "auto", "confirm"}) @@ -246,7 +251,7 @@ def with_passed_check( *, fingerprint: str, command: str, - passed_at: datetime, + passed_at: DateTime, ) -> dict[str, dict[str, str]]: updated = {name: dict(data) for name, data in state.items()} updated[check_name] = { @@ -305,7 +310,7 @@ class GateDeps: has_open_pr: Callable[[], bool] confirm: Callable[[str], bool] fingerprint: Callable[[str], str] - now: Callable[[], datetime] + now: Callable[[], DateTime] is_tty: Callable[[], bool] @classmethod @@ -316,7 +321,7 @@ def from_repo(cls, root: Path) -> GateDeps: has_open_pr=lambda: _has_open_pr(root), confirm=_confirm_run, fingerprint=make_fingerprint_fn(root, prek_dir / "scopes.toml"), - now=lambda: datetime.now(timezone.utc), + now=lambda: pendulum.now("UTC"), is_tty=sys.stdin.isatty, ) diff --git a/tools/tests/test_prek.py b/tools/tests/test_prek.py index eca16e5a7f..d1a460de49 100644 --- a/tools/tests/test_prek.py +++ b/tools/tests/test_prek.py @@ -1,10 +1,10 @@ """Tests for tools/prek.py.""" from collections.abc import Callable -from datetime import datetime, timezone from pathlib import Path from typing import Optional +import pendulum import pytest from tools.prek import ( @@ -29,7 +29,7 @@ write_state, ) -FIXED_NOW = datetime(2026, 5, 29, 12, 0, tzinfo=timezone.utc) +FIXED_NOW = pendulum.datetime(2026, 5, 29, 12, 0, 0, tz=pendulum.UTC) def make_deps( From 7a04500f9efc3b56c7ab2a07ce3d6980a502ca08 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 13:41:32 +0200 Subject: [PATCH 06/15] Fix makefile and contributing --- CONTRIBUTING.md | 2 +- Makefile | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0b4045d0ee..bf4b5e304b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -185,7 +185,7 @@ If, for any reason, you need to access the `pytest-xdist` worker id, do it with You can view our GitHub Actions setup in `.github/workflows` to see which tests are run with which dependencies / extras installed, and which platforms and python versions are used for linting and testing. The main entry point is `.github/workflows/main.yml` which orchestrates all other workflows. Certain dependencies exist, for example no tests will be run if the linter reports problems. Some workflows use test matrixes to test several destinations or run tests on various operating systems and with various python versions or dependency resolution strategies. To reduce CI execution time and improve feedback cycles, parallel test execution via `pytest-xdist` has been enabled in CI. Try to run any test suite that is involved in your development work in parallel if possible, since that is how it will be run in CI. Some CI tests have been restricted the number of workers due to destination performance reasons. -PR label `test-remote-early`: jobs that normally wait for `test_common` (destination, sources, dbt runner, etc.) start in parallel with lint instead. Lint and common still run serially. Use this label only if you run lint and common locally (see [`.prek/README.md`](.prek/README.md)). +PR label `test-destinations-early`: jobs that normally wait for `test_common` (destination, sources, dbt runner, etc.) start in parallel with lint instead. Lint and common still run serially. Use this label only if you run lint and common locally (see [`.prek/README.md`](.prek/README.md)). ### Common Components diff --git a/Makefile b/Makefile index e2d2c4b399..0e4eeb19fb 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ .DEFAULT_GOAL := help -.PHONY: install-uv has-uv dev lint lint-root lint-docs format format-docs docs-website-deps fl test test-common test-common-p reset-test-storage recreate-compiled-deps build-library-prerelease build-library publish-library test-load-local test-load-local-p test-load-local-postgres test-load-local-postgres-p install-snowflake-extras test-remote-snowflake test-remote-snowflake-p install-common-core test-common-core install-common-core-source test-common-core-source install-common-source install-pipeline-min test-pipeline-min install-pipeline-arrow test-pipeline-arrow install-pipeline-min-arrow test-pipeline-min-arrow install-workspace test-workspace test-workspace-dashboard install-hub-minimal test-hub-minimal test-hub install-pipeline-full test-pipeline-full install-pipeline-full-sql test-pipeline-full-sql install-sqlalchemy2 test-with-sqlalchemy-2 test-dest-load test-dest-remote-essential test-dest-remote-nonessential test-dbt-no-venv test-dbt-runner-venv test-sources-load test-sources-sql-database install-prepush-hooks uninstall-prepush-hooks prek prek-dry +.PHONY: install-uv has-uv dev lint lint-full lint-docs format format-docs docs-website-deps fl test test-common test-common-p reset-test-storage recreate-compiled-deps build-library-prerelease build-library publish-library test-load-local test-load-local-p test-load-local-postgres test-load-local-postgres-p install-snowflake-extras test-remote-snowflake test-remote-snowflake-p install-common-core test-common-core install-common-core-source test-common-core-source install-common-source install-pipeline-min test-pipeline-min install-pipeline-arrow test-pipeline-arrow install-pipeline-min-arrow test-pipeline-min-arrow install-workspace test-workspace test-workspace-dashboard install-hub-minimal test-hub-minimal test-hub install-pipeline-full test-pipeline-full install-pipeline-full-sql test-pipeline-full-sql install-sqlalchemy2 test-with-sqlalchemy-2 test-dest-load test-dest-remote-essential test-dest-remote-nonessential test-dbt-no-venv test-dbt-runner-venv test-sources-load test-sources-sql-database install-prepush-hooks uninstall-prepush-hooks prek prek-dry PYV=$(shell python3 -c "import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)") .SILENT:has-uv @@ -37,9 +37,12 @@ dev-airflow: has-uv ## Prepares development environment with airflow support dev-hub: has-uv ## Prepares development environment with hub support uv sync --all-extras --group workspace-deps --group dev --group providers --group pipeline --group sources --group sentry-sdk --group ibis --group adbc --group dashboard-tests -lint: lint-root lint-docs ## Runs all linters (mypy, ruff, flake8, bandit, docstrings, lockfile, deps, docs) +lint: lint-core lint-security lint-docstrings lint-lock lint-deps ## Runs all linters (mypy, ruff, flake8, bandit, docstrings, lockfile, deps) -lint-root: lint-core lint-security lint-docstrings lint-lock lint-deps ## Root linters only (no docs) +lint-full: lint lint-docs ## Root + docs lint (sequential) + +lint-docs: ## Runs docs linting (embedded snippets, notebooks, docs tooling) + cd docs && $(MAKE) lint format-docs: ## Formats docs tooling, website, examples, and notebooks cd docs && $(MAKE) format @@ -50,12 +53,9 @@ docs-website-deps: ## Install docs website node deps (biome; used by make fl) fl: ## Format then lint root and docs in parallel (prek pre-push gate) set -e; \ $(MAKE) format & $(MAKE) format-docs & $(MAKE) docs-website-deps & wait; \ - $(MAKE) lint-root & $(MAKE) -C docs lint & wait + $(MAKE) lint & $(MAKE) -C docs lint & wait @if [ -f .prek/.enabled ]; then uv run python -m tools.prek --record lint; fi -lint-docs: ## Runs docs linting (embedded snippets, notebooks, docs tooling) - cd docs && $(MAKE) lint - lint-lock: ## Checks uv lockfile is in sync uv lock --check From 3c295c0ff96d8087c847d662a563492930094249 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 13:43:36 +0200 Subject: [PATCH 07/15] Update toml --- .prek/scopes.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.prek/scopes.toml b/.prek/scopes.toml index 8612224f56..f9f943bcf9 100644 --- a/.prek/scopes.toml +++ b/.prek/scopes.toml @@ -4,10 +4,8 @@ globs = ["*.py", "*.md", "*.ipynb"] files = [ "pyproject.toml", "uv.lock", - "Makefile", "docs/pyproject.toml", "docs/uv.lock", - "docs/Makefile", "docs/docs_tools/snippets/lint_setup/template.py", "docs/docs_tools/snippets/lint_setup/mypy.ini", ] From aa5b63024e8907f1bed05bd32d6e305562ac02c8 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 14:22:29 +0200 Subject: [PATCH 08/15] Use tomli to support 3.10 --- pyproject.toml | 1 + tools/prek.py | 4 ++-- uv.lock | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 993bb6eeae..85f3065a55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -246,6 +246,7 @@ dev = [ "pytest-order>=1.0.0", "pytest-cases>=3.8.6", "pytest-forked>=1.3.0", + "tomli>=2.0.1,<3", "types-PyYAML>=6.0.7", "types-cachetools>=4.2.9", "types-protobuf>=3.19.8", diff --git a/tools/prek.py b/tools/prek.py index e84e6d2208..d592a6e367 100644 --- a/tools/prek.py +++ b/tools/prek.py @@ -11,7 +11,7 @@ import os import subprocess import sys -import tomllib # type: ignore[import-untyped] +import tomli as tomllib from collections.abc import Callable, Sequence from dataclasses import dataclass from pathlib import Path @@ -113,7 +113,7 @@ def make_command_for(check_name: str) -> str: def load_toml(path: Path) -> dict[str, Any]: with open(path, "rb") as file: - return cast(dict[str, Any], tomllib.load(file)) + return tomllib.load(file) def load_local_config(path: Path) -> dict[str, Any] | None: diff --git a/uv.lock b/uv.lock index 95c2348184..c2e6743e4e 100644 --- a/uv.lock +++ b/uv.lock @@ -2727,6 +2727,7 @@ dev = [ { name = "requests-mock" }, { name = "ruff" }, { name = "sqlfluff" }, + { name = "tomli" }, { name = "types-cachetools" }, { name = "types-click" }, { name = "types-croniter" }, @@ -2965,6 +2966,7 @@ dev = [ { name = "requests-mock", specifier = ">=1.10.0,<2" }, { name = "ruff", specifier = ">=0.3.2,<0.4" }, { name = "sqlfluff", specifier = ">=2.3.2,<3" }, + { name = "tomli", specifier = ">=2.0.1,<3" }, { name = "types-cachetools", specifier = ">=4.2.9" }, { name = "types-click", specifier = ">=7.1.8,<8" }, { name = "types-croniter", specifier = ">=6.0.0" }, From 15ba2a0dbc5ec5351bef2d23f0721ed324445410 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 14:24:57 +0200 Subject: [PATCH 09/15] ADdd tomli to uv lock from docs too --- docs/uv.lock | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/uv.lock b/docs/uv.lock index 2520d7ee4a..00a591c540 100644 --- a/docs/uv.lock +++ b/docs/uv.lock @@ -1096,6 +1096,7 @@ dev = [ { name = "requests-mock", specifier = ">=1.10.0,<2" }, { name = "ruff", specifier = ">=0.3.2,<0.4" }, { name = "sqlfluff", specifier = ">=2.3.2,<3" }, + { name = "tomli", specifier = ">=2.0.1,<3" }, { name = "types-cachetools", specifier = ">=4.2.9" }, { name = "types-click", specifier = ">=7.1.8,<8" }, { name = "types-croniter", specifier = ">=6.0.0" }, @@ -1768,6 +1769,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7d/ed/6bfa4109fcb23a58819600392564fea69cdc6551ffd5e69ccf1d52a40cbc/greenlet-3.2.4-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:8c68325b0d0acf8d91dde4e6f930967dd52a5302cd4062932a6b2e7c2969f47c", size = 271061, upload-time = "2025-08-07T13:17:15.373Z" }, { url = "https://files.pythonhosted.org/packages/2a/fc/102ec1a2fc015b3a7652abab7acf3541d58c04d3d17a8d3d6a44adae1eb1/greenlet-3.2.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:94385f101946790ae13da500603491f04a76b6e4c059dab271b3ce2e283b2590", size = 629475, upload-time = "2025-08-07T13:42:54.009Z" }, { url = "https://files.pythonhosted.org/packages/c5/26/80383131d55a4ac0fb08d71660fd77e7660b9db6bdb4e8884f46d9f2cc04/greenlet-3.2.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f10fd42b5ee276335863712fa3da6608e93f70629c631bf77145021600abc23c", size = 640802, upload-time = "2025-08-07T13:45:25.52Z" }, + { url = "https://files.pythonhosted.org/packages/9f/7c/e7833dbcd8f376f3326bd728c845d31dcde4c84268d3921afcae77d90d08/greenlet-3.2.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c8c9e331e58180d0d83c5b7999255721b725913ff6bc6cf39fa2a45841a4fd4b", size = 636703, upload-time = "2025-08-07T13:53:12.622Z" }, { url = "https://files.pythonhosted.org/packages/e9/49/547b93b7c0428ede7b3f309bc965986874759f7d89e4e04aeddbc9699acb/greenlet-3.2.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:58b97143c9cc7b86fc458f215bd0932f1757ce649e05b640fea2e79b54cedb31", size = 635417, upload-time = "2025-08-07T13:18:25.189Z" }, { url = "https://files.pythonhosted.org/packages/7f/91/ae2eb6b7979e2f9b035a9f612cf70f1bf54aad4e1d125129bef1eae96f19/greenlet-3.2.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2ca18a03a8cfb5b25bc1cbe20f3d9a4c80d8c3b13ba3df49ac3961af0b1018d", size = 584358, upload-time = "2025-08-07T13:18:23.708Z" }, { url = "https://files.pythonhosted.org/packages/f7/85/433de0c9c0252b22b16d413c9407e6cb3b41df7389afc366ca204dbc1393/greenlet-3.2.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fe0a28a7b952a21e2c062cd5756d34354117796c6d9215a87f55e38d15402c5", size = 1113550, upload-time = "2025-08-07T13:42:37.467Z" }, @@ -1778,6 +1780,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/de/f28ced0a67749cac23fecb02b694f6473f47686dff6afaa211d186e2ef9c/greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2", size = 272305, upload-time = "2025-08-07T13:15:41.288Z" }, { url = "https://files.pythonhosted.org/packages/09/16/2c3792cba130000bf2a31c5272999113f4764fd9d874fb257ff588ac779a/greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246", size = 632472, upload-time = "2025-08-07T13:42:55.044Z" }, { url = "https://files.pythonhosted.org/packages/ae/8f/95d48d7e3d433e6dae5b1682e4292242a53f22df82e6d3dda81b1701a960/greenlet-3.2.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:94abf90142c2a18151632371140b3dba4dee031633fe614cb592dbb6c9e17bc3", size = 644646, upload-time = "2025-08-07T13:45:26.523Z" }, + { url = "https://files.pythonhosted.org/packages/d5/5e/405965351aef8c76b8ef7ad370e5da58d57ef6068df197548b015464001a/greenlet-3.2.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:4d1378601b85e2e5171b99be8d2dc85f594c79967599328f95c1dc1a40f1c633", size = 640519, upload-time = "2025-08-07T13:53:13.928Z" }, { url = "https://files.pythonhosted.org/packages/25/5d/382753b52006ce0218297ec1b628e048c4e64b155379331f25a7316eb749/greenlet-3.2.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0db5594dce18db94f7d1650d7489909b57afde4c580806b8d9203b6e79cdc079", size = 639707, upload-time = "2025-08-07T13:18:27.146Z" }, { url = "https://files.pythonhosted.org/packages/1f/8e/abdd3f14d735b2929290a018ecf133c901be4874b858dd1c604b9319f064/greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8", size = 587684, upload-time = "2025-08-07T13:18:25.164Z" }, { url = "https://files.pythonhosted.org/packages/5d/65/deb2a69c3e5996439b0176f6651e0052542bb6c8f8ec2e3fba97c9768805/greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52", size = 1116647, upload-time = "2025-08-07T13:42:38.655Z" }, @@ -1788,6 +1791,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" }, { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" }, { url = "https://files.pythonhosted.org/packages/3b/16/035dcfcc48715ccd345f3a93183267167cdd162ad123cd93067d86f27ce4/greenlet-3.2.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f28588772bb5fb869a8eb331374ec06f24a83a9c25bfa1f38b6993afe9c1e968", size = 655185, upload-time = "2025-08-07T13:45:27.624Z" }, + { url = "https://files.pythonhosted.org/packages/31/da/0386695eef69ffae1ad726881571dfe28b41970173947e7c558d9998de0f/greenlet-3.2.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5c9320971821a7cb77cfab8d956fa8e39cd07ca44b6070db358ceb7f8797c8c9", size = 649926, upload-time = "2025-08-07T13:53:15.251Z" }, { url = "https://files.pythonhosted.org/packages/68/88/69bf19fd4dc19981928ceacbc5fd4bb6bc2215d53199e367832e98d1d8fe/greenlet-3.2.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c60a6d84229b271d44b70fb6e5fa23781abb5d742af7b808ae3f6efd7c9c60f6", size = 651839, upload-time = "2025-08-07T13:18:30.281Z" }, { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" }, { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" }, From 0f1999f079e3372ea814867a91359d830f87bb1e Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 14:46:26 +0200 Subject: [PATCH 10/15] Parallel linting with make fl --- Makefile | 12 +++++++++--- docs/Makefile | 9 ++++++++- pyproject.toml | 1 + 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 0e4eeb19fb..0052495585 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ .DEFAULT_GOAL := help -.PHONY: install-uv has-uv dev lint lint-full lint-docs format format-docs docs-website-deps fl test test-common test-common-p reset-test-storage recreate-compiled-deps build-library-prerelease build-library publish-library test-load-local test-load-local-p test-load-local-postgres test-load-local-postgres-p install-snowflake-extras test-remote-snowflake test-remote-snowflake-p install-common-core test-common-core install-common-core-source test-common-core-source install-common-source install-pipeline-min test-pipeline-min install-pipeline-arrow test-pipeline-arrow install-pipeline-min-arrow test-pipeline-min-arrow install-workspace test-workspace test-workspace-dashboard install-hub-minimal test-hub-minimal test-hub install-pipeline-full test-pipeline-full install-pipeline-full-sql test-pipeline-full-sql install-sqlalchemy2 test-with-sqlalchemy-2 test-dest-load test-dest-remote-essential test-dest-remote-nonessential test-dbt-no-venv test-dbt-runner-venv test-sources-load test-sources-sql-database install-prepush-hooks uninstall-prepush-hooks prek prek-dry +.PHONY: install-uv has-uv dev lint lint-parallel lint-full lint-docs format format-docs docs-website-deps fl test test-common test-common-p reset-test-storage recreate-compiled-deps build-library-prerelease build-library publish-library test-load-local test-load-local-p test-load-local-postgres test-load-local-postgres-p install-snowflake-extras test-remote-snowflake test-remote-snowflake-p install-common-core test-common-core install-common-core-source test-common-core-source install-common-source install-pipeline-min test-pipeline-min install-pipeline-arrow test-pipeline-arrow install-pipeline-min-arrow test-pipeline-min-arrow install-workspace test-workspace test-workspace-dashboard install-hub-minimal test-hub-minimal test-hub install-pipeline-full test-pipeline-full install-pipeline-full-sql test-pipeline-full-sql install-sqlalchemy2 test-with-sqlalchemy-2 test-dest-load test-dest-remote-essential test-dest-remote-nonessential test-dbt-no-venv test-dbt-runner-venv test-sources-load test-sources-sql-database install-prepush-hooks uninstall-prepush-hooks prek prek-dry PYV=$(shell python3 -c "import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)") .SILENT:has-uv @@ -37,7 +37,13 @@ dev-airflow: has-uv ## Prepares development environment with airflow support dev-hub: has-uv ## Prepares development environment with hub support uv sync --all-extras --group workspace-deps --group dev --group providers --group pipeline --group sources --group sentry-sdk --group ibis --group adbc --group dashboard-tests -lint: lint-core lint-security lint-docstrings lint-lock lint-deps ## Runs all linters (mypy, ruff, flake8, bandit, docstrings, lockfile, deps) +LINT_TARGETS := lint-core lint-security lint-docstrings lint-lock lint-deps + +lint: $(LINT_TARGETS) ## Runs all linters (mypy, ruff, flake8, bandit, docstrings, lockfile, deps) + @: + +lint-parallel: ## Runs all linters in parallel (used by make fl) + $(MAKE) -j $(words $(LINT_TARGETS)) lint lint-full: lint lint-docs ## Root + docs lint (sequential) @@ -53,7 +59,7 @@ docs-website-deps: ## Install docs website node deps (biome; used by make fl) fl: ## Format then lint root and docs in parallel (prek pre-push gate) set -e; \ $(MAKE) format & $(MAKE) format-docs & $(MAKE) docs-website-deps & wait; \ - $(MAKE) lint & $(MAKE) -C docs lint & wait + $(MAKE) lint-parallel & $(MAKE) -C docs lint-parallel & wait @if [ -f .prek/.enabled ]; then uv run python -m tools.prek --record lint; fi lint-lock: ## Checks uv lockfile is in sync diff --git a/docs/Makefile b/docs/Makefile index 69a5f82277..6dda788026 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -1,4 +1,5 @@ .DEFAULT_GOAL := help +.PHONY: lint lint-parallel lint-core lint-embedded-snippets lint-notebooks lint-website # Add " ## description" after any target name to include it in `make help` output. # Example: my-target: ## Does something useful @@ -8,7 +9,13 @@ help: ## Shows this help message dev: ## Prepares development environment for docs tooling uv sync -lint: lint-core lint-embedded-snippets lint-notebooks lint-website ## Runs all linters +LINT_TARGETS := lint-core lint-embedded-snippets lint-notebooks lint-website + +lint: $(LINT_TARGETS) ## Runs all linters + @: + +lint-parallel: ## Runs all linters in parallel (used by make fl) + $(MAKE) -j $(words $(LINT_TARGETS)) lint lint-website: ## Lints the docusaurus website JS/TS sources with Biome cd website && npm run lint diff --git a/pyproject.toml b/pyproject.toml index 85f3065a55..f1e07bd7e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -393,6 +393,7 @@ exclude = [ "docs/website/.dlt-repo", "tests/reflection/module_cases/syntax_error.py", "docs/website/docs/general-usage/transformations/transformation-snippets.py", + "docs/education/**/*nbqa_ipynb.py", # nbqa temp files while lint-notebooks runs in parallel ] [tool.ruff.lint] From 1d624e1ae71da86d66f07fddadcf1a3eb591aa15 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 21:07:30 +0200 Subject: [PATCH 11/15] Move config to pyproject.toml. Add multiline cache --- .prek/README.md | 30 +++++--- .prek/scopes.toml | 32 --------- pyproject.toml | 33 +++++++++ tools/prek.py | 144 +++++++++++++++++++++++++++++---------- tools/tests/test_prek.py | 75 ++++++++++++++++++-- 5 files changed, 231 insertions(+), 83 deletions(-) delete mode 100644 .prek/scopes.toml diff --git a/.prek/README.md b/.prek/README.md index 33eb15bd6c..5a795c6b1c 100644 --- a/.prek/README.md +++ b/.prek/README.md @@ -55,8 +55,20 @@ Checks run in order; a failed lint blocks tests. `make fl` runs format (root, docs, website deps) in parallel, then root and docs lint in parallel. -A check runs only when its **fingerprint** (hash of tracked files in scope) differs from the last -successful entry in `.prek/.state.toml` (also gitignored). Passing updates state for that check only. +A check runs only when its **fingerprint** (hash of tracked files in scope) is not among the +last 50 successful passes stored in `.prek/.state.toml` (also gitignored). Each pass records +`fingerprint`, `passed_at`, and `command`. That history lets branch switches and reverts reuse +a prior pass when the scoped tree matches again. Passing prepends a record and trims the list +to 50 entries per check. + +Example: + +```toml +[[lint.passes]] +fingerprint = "abc123..." +passed_at = "2026-05-29T12:00:00+00:00" +command = "make fl" +``` After `make install-prepush-hooks`, successful `make fl` and `make test-common-p` also update state (no extra commands). Plain `make lint` does not update prek state. @@ -67,15 +79,17 @@ prek may stash unstaged edits to `~/.cache/prek/patches/` while the hook runs, t Built-in prek behavior (from pre-commit), not configurable. Keeps lint/tests from failing on WIP you are not pushing. -## Scopes (`scopes.toml`) +## Scopes (`pyproject.toml`) -Defines which tracked files invalidate each check. Edit when adding new trees that should trigger +Defines which tracked files invalidate each check (`[tool.prek.scopes.lint]` and +`[tool.prek.scopes.test_common_p]`). Edit when adding new trees that should trigger re-lint or re-test. **Lint** — `dlt`, `tests`, `tools`, `docs` (`.py`, `.md`, `.ipynb`), plus root/docs config and embedded-snippet lint setup files. -**Common tests** — `dlt` and selected `tests/*` suites (see `scopes.toml`), plus `pyproject.toml`, +**Common tests** — `dlt` and selected `tests/*` suites (see `[tool.prek.scopes]` in +`pyproject.toml`), plus `pyproject.toml`, `uv.lock`, `tests/conftest.py`, `tests/load/test_dummy_client.py`. Inspect a fingerprint: @@ -117,8 +131,8 @@ refuses to run so your hook is not deleted. **Want to re-run after a pass** — Delete the check’s section from `.prek/.state.toml`, or change a file in that check’s scope. -**Stale fingerprint / wrong cache** — Same as above; state stores the last successful fingerprint -per check. +**Stale fingerprint / wrong cache** — Delete the check’s section from `.prek/.state.toml`, or change a +file in that check’s scope. State keeps up to 50 pass records per check for branch hopping. ## Files in this directory @@ -126,9 +140,7 @@ per check. |------|------| | `README.md` | This guide | | `local.example.toml` | Config template | -| `scopes.toml` | Fingerprint inputs per check | | `prek.toml` | prek hook definition (`uv run python -m tools.prek`) | -| `plan.md` | Maintainer notes / design sketch | Implementation and tests: `tools/prek.py` (run via `python -m tools.prek`). diff --git a/.prek/scopes.toml b/.prek/scopes.toml deleted file mode 100644 index f9f943bcf9..0000000000 --- a/.prek/scopes.toml +++ /dev/null @@ -1,32 +0,0 @@ -[scopes.lint] -paths = ["dlt", "tests", "tools", "docs"] -globs = ["*.py", "*.md", "*.ipynb"] -files = [ - "pyproject.toml", - "uv.lock", - "docs/pyproject.toml", - "docs/uv.lock", - "docs/docs_tools/snippets/lint_setup/template.py", - "docs/docs_tools/snippets/lint_setup/mypy.ini", -] - -[scopes.test_common_p] -paths = [ - "dlt", - "tests/common", - "tests/normalize", - "tests/extract", - "tests/pipeline", - "tests/reflection", - "tests/sources", - "tests/workspace", - "tests/libs", - "tests/destinations", -] -globs = ["*.py"] -files = [ - "pyproject.toml", - "uv.lock", - "tests/conftest.py", - "tests/load/test_dummy_client.py", -] diff --git a/pyproject.toml b/pyproject.toml index f1e07bd7e6..1c2fa867d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -424,6 +424,39 @@ ignore = [ line-length = 100 preview = true +[tool.prek.scopes.lint] +paths = ["dlt", "tests", "tools", "docs"] +globs = ["*.py", "*.md", "*.ipynb"] +files = [ + "pyproject.toml", + "uv.lock", + "docs/pyproject.toml", + "docs/uv.lock", + "docs/docs_tools/snippets/lint_setup/template.py", + "docs/docs_tools/snippets/lint_setup/mypy.ini", +] + +[tool.prek.scopes.test_common_p] +paths = [ + "dlt", + "tests/common", + "tests/normalize", + "tests/extract", + "tests/pipeline", + "tests/reflection", + "tests/sources", + "tests/workspace", + "tests/libs", + "tests/destinations", +] +globs = ["*.py"] +files = [ + "pyproject.toml", + "uv.lock", + "tests/conftest.py", + "tests/load/test_dummy_client.py", +] + [tool.isort] color_output = true line_length = 100 diff --git a/tools/prek.py b/tools/prek.py index d592a6e367..c697d11262 100644 --- a/tools/prek.py +++ b/tools/prek.py @@ -15,13 +15,27 @@ from collections.abc import Callable, Sequence from dataclasses import dataclass from pathlib import Path -from typing import Any, Literal, NamedTuple, cast +from typing import Any, Literal, NamedTuple, TypedDict, cast import pendulum from pendulum.datetime import DateTime Mode = Literal["off", "auto", "confirm"] VALID_MODES = frozenset({"off", "auto", "confirm"}) +MAX_PASSED_FINGERPRINTS = 50 + + +class PassRecord(TypedDict): + fingerprint: str + passed_at: str + command: str + + +class CheckState(TypedDict): + passes: list[PassRecord] + + +State = dict[str, CheckState] class Check(NamedTuple): @@ -53,7 +67,7 @@ class PlannedCheck(NamedTuple): class GateOutcome(NamedTuple): exit_code: int - new_state: dict[str, dict[str, str]] + new_state: State stderr_lines: tuple[str, ...] @@ -62,7 +76,7 @@ class ConfigError(Exception): class UnknownScopeError(Exception): - """Scope name is missing from scopes.toml.""" + """Scope name is missing from [tool.prek.scopes] in pyproject.toml.""" def repo_root() -> Path: @@ -122,21 +136,62 @@ def load_local_config(path: Path) -> dict[str, Any] | None: return load_toml(path) -def load_state(path: Path) -> dict[str, dict[str, str]]: +def load_state(path: Path) -> State: if not path.is_file(): return {} data = load_toml(path) - return {name: dict(section) for name, section in data.items() if isinstance(section, dict)} + return { + name: normalize_check_state(section) + for name, section in data.items() + if isinstance(section, dict) + } + + +def normalize_pass_record(raw: dict[str, Any]) -> PassRecord | None: + fingerprint = raw.get("fingerprint") + if not isinstance(fingerprint, str) or not fingerprint: + return None + passed_at = raw.get("passed_at") + command = raw.get("command") + return PassRecord( + fingerprint=fingerprint, + passed_at=passed_at if isinstance(passed_at, str) else "", + command=command if isinstance(command, str) else "", + ) + +def normalize_check_state(section: dict[str, Any]) -> CheckState: + passes: list[PassRecord] = [] + raw_passes = section.get("passes") + if isinstance(raw_passes, list): + for item in raw_passes: + if isinstance(item, dict): + record = normalize_pass_record(item) + if record is not None: + passes.append(record) + return CheckState(passes=passes[:MAX_PASSED_FINGERPRINTS]) -def write_state(path: Path, state: dict[str, dict[str, str]]) -> None: + +def passed_fingerprints(check_state: CheckState | None) -> list[str]: + if not check_state: + return [] + return [record["fingerprint"] for record in check_state["passes"]] + + +def fingerprint_is_known(check_state: CheckState | None, fingerprint: str) -> bool: + return fingerprint in passed_fingerprints(check_state) + + +def write_state(path: Path, state: State) -> None: lines: list[str] = [] for check_name, data in state.items(): - lines.append(f"[{check_name}]") - for key, value in data.items(): - lines.append(f'{key} = "{value}"') - lines.append("") - path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") + for record in data["passes"]: + lines.append(f"[[{check_name}.passes]]") + lines.append(f'fingerprint = "{record["fingerprint"]}"') + lines.append(f'passed_at = "{record["passed_at"]}"') + lines.append(f'command = "{record["command"]}"') + lines.append("") + path.write_text("\n".join(lines).rstrip() + ("\n" if lines else ""), encoding="utf-8") def scope_from_dict(scope: dict[str, list[str]]) -> ScopeDef: @@ -177,11 +232,13 @@ def fingerprint_files(paths: list[str], read_bytes: Callable[[str], bytes]) -> s return aggregate.hexdigest() -def load_scopes(scopes_path: Path) -> dict[str, ScopeDef]: - raw = load_toml(scopes_path) - scopes = raw.get("scopes", {}) +def load_scopes(config_path: Path) -> dict[str, ScopeDef]: + raw = load_toml(config_path) + tool = raw.get("tool", {}) + prek = tool.get("prek", {}) if isinstance(tool, dict) else {} + scopes = prek.get("scopes", {}) if not isinstance(scopes, dict): - raise ValueError("Invalid [scopes] section in scopes.toml") + raise ValueError("Invalid [tool.prek.scopes] section in pyproject.toml") return { name: scope_from_dict(section) for name, section in scopes.items() @@ -202,8 +259,8 @@ def git_ls_files(root: Path, pathspecs: list[str]) -> list[str]: return [line for line in result.stdout.splitlines() if line] -def make_fingerprint_fn(root: Path, scopes_path: Path) -> Callable[[str], str]: - scopes = load_scopes(scopes_path) +def make_fingerprint_fn(root: Path, config_path: Path) -> Callable[[str], str]: + scopes = load_scopes(config_path) def fingerprint(scope_name: str) -> str: try: @@ -231,7 +288,7 @@ def gate_active(*, only_when_pr_open: bool, has_open_pr: bool) -> tuple[bool, st def plan_checks( checks: Sequence[Check], local_config: dict[str, Any], - state: dict[str, dict[str, str]], + state: State, fingerprint: Callable[[str], str], ) -> list[PlannedCheck]: planned: list[PlannedCheck] = [] @@ -240,25 +297,40 @@ def plan_checks( if mode == "off": continue current = fingerprint(check.name) - cached = state.get(check.name, {}).get("fingerprint", "") - planned.append(PlannedCheck(check, mode, current, cached, current != cached)) + history = passed_fingerprints(state.get(check.name)) + cached = history[0] if history else "" + planned.append( + PlannedCheck( + check, + mode, + current, + cached, + not fingerprint_is_known(state.get(check.name), current), + ) + ) return planned def with_passed_check( - state: dict[str, dict[str, str]], + state: State, check_name: str, *, fingerprint: str, command: str, passed_at: DateTime, -) -> dict[str, dict[str, str]]: - updated = {name: dict(data) for name, data in state.items()} - updated[check_name] = { - "fingerprint": fingerprint, - "passed_at": passed_at.replace(microsecond=0).isoformat(), - "command": command, - } +) -> State: + updated: State = {name: CheckState(passes=list(data["passes"])) for name, data in state.items()} + passes = list(updated.get(check_name, CheckState(passes=[]))["passes"]) + passes = [record for record in passes if record["fingerprint"] != fingerprint] + passes.insert( + 0, + PassRecord( + fingerprint=fingerprint, + passed_at=passed_at.replace(microsecond=0).isoformat(), + command=command, + ), + ) + updated[check_name] = CheckState(passes=passes[:MAX_PASSED_FINGERPRINTS]) return updated @@ -320,7 +392,7 @@ def from_repo(cls, root: Path) -> GateDeps: run_make=lambda target: _run_make_target(root, target), has_open_pr=lambda: _has_open_pr(root), confirm=_confirm_run, - fingerprint=make_fingerprint_fn(root, prek_dir / "scopes.toml"), + fingerprint=make_fingerprint_fn(root, root / "pyproject.toml"), now=lambda: pendulum.now("UTC"), is_tty=sys.stdin.isatty, ) @@ -353,7 +425,7 @@ def _confirm_run(make_command: str) -> bool: def run_gate( *, local_config: dict[str, Any], - state: dict[str, dict[str, str]], + state: State, deps: GateDeps, checks: Sequence[Check] = CHECKS, ) -> GateOutcome: @@ -364,14 +436,16 @@ def run_gate( if not active: return GateOutcome(0, state, (f"prek gate: skipped ({reason})",)) - new_state = {name: dict(data) for name, data in state.items()} + new_state: State = { + name: CheckState(passes=list(data["passes"])) for name, data in state.items() + } for check in checks: mode = parse_mode(local_config, check.name) if mode == "off": continue fingerprint = deps.fingerprint(check.name) - if new_state.get(check.name, {}).get("fingerprint") == fingerprint: + if fingerprint_is_known(new_state.get(check.name), fingerprint): continue if mode == "confirm" and not deps.confirm(check.make_command): @@ -394,10 +468,10 @@ def run_gate( def record_passed_check( *, check_name: str, - state: dict[str, dict[str, str]], + state: State, hooks_enabled: bool, deps: GateDeps, -) -> tuple[dict[str, dict[str, str]], str | None]: +) -> tuple[State, str | None]: try: command = make_command_for(check_name) except ConfigError as exc: @@ -458,7 +532,7 @@ def main_fingerprint(*, prek_dir: Path | None = None, argv: list[str] | None = N root = prek_dir.parent try: - print(make_fingerprint_fn(root, prek_dir / "scopes.toml")(argv[0])) + print(make_fingerprint_fn(root, root / "pyproject.toml")(argv[0])) except UnknownScopeError as exc: print(str(exc), file=sys.stderr) return 1 diff --git a/tools/tests/test_prek.py b/tools/tests/test_prek.py index d1a460de49..87a6e502c6 100644 --- a/tools/tests/test_prek.py +++ b/tools/tests/test_prek.py @@ -10,8 +10,12 @@ from tools.prek import ( CHECKS, ConfigError, + CheckState, GateDeps, + MAX_PASSED_FINGERPRINTS, + PassRecord, ScopeDef, + State, dry_run_lines, fingerprint_files, gate_active, @@ -20,6 +24,7 @@ matches_globs, parse_bool, parse_mode, + passed_fingerprints, plan_checks, record_passed_check, repo_root, @@ -153,15 +158,22 @@ def record_make(target: str) -> int: passed = run_gate(local_config={"lint": {"mode": "auto"}}, state={}, deps=deps) assert passed.exit_code == 0 assert calls == ["fl"] - assert passed.new_state["lint"]["fingerprint"] == "fp-lint" + assert passed.new_state["lint"]["passes"][0]["fingerprint"] == "fp-lint" + assert passed.new_state["lint"]["passes"][0]["command"] == "make fl" failed = run_gate( local_config={"lint": {"mode": "auto"}}, - state={"lint": {"fingerprint": "old"}}, + state={ + "lint": CheckState( + passes=[ + PassRecord(fingerprint="other", passed_at="t0", command="make fl"), + ] + ) + }, deps=make_deps(run_make=lambda _target: 1), ) assert failed.exit_code == 1 - assert failed.new_state == {"lint": {"fingerprint": "old"}} + assert failed.new_state["lint"]["passes"][0]["fingerprint"] == "other" declined = run_gate( local_config={"lint": {"mode": "confirm"}}, @@ -180,20 +192,69 @@ def test_record_passed_check() -> None: updated, error = record_passed_check(check_name="lint", state={}, hooks_enabled=True, deps=deps) assert error is None - assert updated["lint"]["command"] == "make fl" + assert updated["lint"]["passes"][0]["command"] == "make fl" _, error = record_passed_check(check_name="missing", state={}, hooks_enabled=True, deps=deps) assert error is not None def test_with_passed_check_and_state_io(tmp_path: Path) -> None: - state = {"lint": {"fingerprint": "old", "passed_at": "t0", "command": "make fl"}} + state: State = { + "lint": CheckState( + passes=[ + PassRecord(fingerprint="old", passed_at="t0", command="make fl"), + ] + ) + } updated = with_passed_check( state, "lint", fingerprint="new", command="make fl", passed_at=FIXED_NOW ) - assert state["lint"]["fingerprint"] == "old" - assert updated["lint"]["fingerprint"] == "new" + assert state["lint"]["passes"][0]["fingerprint"] == "old" + assert updated["lint"]["passes"][0]["fingerprint"] == "new" + assert updated["lint"]["passes"][1]["fingerprint"] == "old" state_path = tmp_path / ".state.toml" write_state(state_path, updated) assert load_state(state_path) == updated + + +def test_fingerprint_history_skips_known_tree() -> None: + deps = make_deps(fingerprint=lambda name: {"lint": "fp-a", "test_common_p": "fp-b"}[name]) + state: State = { + "lint": CheckState( + passes=[ + PassRecord(fingerprint="fp-a", passed_at="t1", command="make fl"), + PassRecord(fingerprint="fp-other", passed_at="t0", command="make fl"), + ] + ) + } + + outcome = run_gate(local_config={"lint": {"mode": "auto"}}, state=state, deps=deps) + assert outcome.exit_code == 0 + assert outcome.new_state == state + + planned = plan_checks( + CHECKS, + {"lint": {"mode": "auto"}}, + state, + deps.fingerprint, + ) + assert planned[0].stale is False + + +def test_fingerprint_history_caps_at_max() -> None: + history = [ + PassRecord(fingerprint=f"fp-{index}", passed_at=f"t{index}", command="make fl") + for index in range(MAX_PASSED_FINGERPRINTS + 5) + ] + state: State = {"lint": CheckState(passes=history)} + updated = with_passed_check( + state, + "lint", + fingerprint="fp-new", + command="make fl", + passed_at=FIXED_NOW, + ) + assert len(updated["lint"]["passes"]) == MAX_PASSED_FINGERPRINTS + assert updated["lint"]["passes"][0]["fingerprint"] == "fp-new" + assert updated["lint"]["passes"][-1]["fingerprint"] == f"fp-{MAX_PASSED_FINGERPRINTS - 2}" From 2e0f2ae0670207c39c24e07770730a9bfb35ce63 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 21:10:01 +0200 Subject: [PATCH 12/15] Fix docs --- .prek/README.md | 20 +++++++++++++------- tests/load/ducklake/test_ducklake_client.py | 2 +- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.prek/README.md b/.prek/README.md index 5a795c6b1c..91b97c3564 100644 --- a/.prek/README.md +++ b/.prek/README.md @@ -1,8 +1,14 @@ # Pre-push local verification -Optional pre-push checks via [prek](https://github.com/pre-commit/prek): run `make fl` and/or -`make test-common-p` before you push, but only when files in each check’s scope have changed since -the last successful run. +> **Not a general prek/pre-commit rollout.** prek is used only to install a **pre-push** hook; gate logic lives in `tools/prek.py`. This is not incremental formatting or per-file lint on staged changes — it runs full `make fl` / `make test-common-p` when the hook decides they are needed. + +Optional pre-push checks: run `make fl` and/or `make test-common-p` on push when the current tree’s +fingerprint is not already in local pass history (up to 50 recorded passes per check in +`.prek/.state.toml`). + +**Manual commands always run.** `make fl` and `make test-common-p` execute in full whenever you +invoke them. The cache applies only to pre-push (`git push`), `make prek`, and `make prek-dry`. +Successful manual runs still record passes (after hook install) so the next push can skip. The hook is **opt-in**. Without `.prek/local.toml`, the pre-push hook does nothing. @@ -128,11 +134,11 @@ refuses to run so your hook is not deleted. **Docs lint fails** — Run `cd docs && make dev`, then `make fl` (or `cd docs && make format && make lint`). -**Want to re-run after a pass** — Delete the check’s section from `.prek/.state.toml`, or change a -file in that check’s scope. +**Want to re-run after a pass** — Delete `.prek/.state.toml`, remove all `[[lint.passes]]` or +`[[test_common_p.passes]]` entries for that check, or change a file in that check’s scope. -**Stale fingerprint / wrong cache** — Delete the check’s section from `.prek/.state.toml`, or change a -file in that check’s scope. State keeps up to 50 pass records per check for branch hopping. +**Stale fingerprint / wrong cache** — Same as above. State keeps up to 50 pass records per check +for branch hopping. ## Files in this directory diff --git a/tests/load/ducklake/test_ducklake_client.py b/tests/load/ducklake/test_ducklake_client.py index 319ae51461..8c942755ae 100644 --- a/tests/load/ducklake/test_ducklake_client.py +++ b/tests/load/ducklake/test_ducklake_client.py @@ -438,4 +438,4 @@ def test_ducklake_factory_instantiation() -> None: credentials = DuckLakeCredentials( "lake_catalog", catalog=catalog_credentials, - ) + ) \ No newline at end of file From 96708d4c2914ab8b7a399868a46bd3907decd8d6 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 21:10:35 +0200 Subject: [PATCH 13/15] FL --- tests/load/ducklake/test_ducklake_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load/ducklake/test_ducklake_client.py b/tests/load/ducklake/test_ducklake_client.py index 8c942755ae..319ae51461 100644 --- a/tests/load/ducklake/test_ducklake_client.py +++ b/tests/load/ducklake/test_ducklake_client.py @@ -438,4 +438,4 @@ def test_ducklake_factory_instantiation() -> None: credentials = DuckLakeCredentials( "lake_catalog", catalog=catalog_credentials, - ) \ No newline at end of file + ) From 9363cf9a7626c406dfc41df92afdfc2c30cfdb66 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 22:17:14 +0200 Subject: [PATCH 14/15] Rename scopes --- .prek/README.md | 20 ++++++------- pyproject.toml | 4 +-- tools/prek.py | 65 +++++++++++++++++++++------------------- tools/tests/test_prek.py | 12 ++++---- 4 files changed, 53 insertions(+), 48 deletions(-) diff --git a/.prek/README.md b/.prek/README.md index 91b97c3564..c0fd764648 100644 --- a/.prek/README.md +++ b/.prek/README.md @@ -33,12 +33,12 @@ Copy from `local.example.toml`. Gitignored — per-developer only. | Section | Keys | Meaning | |---------|------|---------| | `[gate]` | `only_when_pr_open` | If `true`, skip all checks unless the current branch has an open PR (`gh pr view`) | -| `[lint]` | `mode` | How to handle stale lint scope | -| `[test_common_p]` | `mode` | How to handle stale common-test scope | +| `[lint]` | `mode` | How to handle a stale lint fingerprint | +| `[test_common_p]` | `mode` | How to handle a stale common-test fingerprint | ### Modes -| Mode | When scope is stale | +| Mode | When the fingerprint is stale | |------|---------------------| | `off` | Never run this check | | `auto` | Run the make target | @@ -61,10 +61,10 @@ Checks run in order; a failed lint blocks tests. `make fl` runs format (root, docs, website deps) in parallel, then root and docs lint in parallel. -A check runs only when its **fingerprint** (hash of tracked files in scope) is not among the +A check runs only when its **fingerprint** (hash of tracked files listed for that check) is not among the last 50 successful passes stored in `.prek/.state.toml` (also gitignored). Each pass records `fingerprint`, `passed_at`, and `command`. That history lets branch switches and reverts reuse -a prior pass when the scoped tree matches again. Passing prepends a record and trims the list +a prior pass when the tree matches again. Passing prepends a record and trims the list to 50 entries per check. Example: @@ -85,16 +85,16 @@ prek may stash unstaged edits to `~/.cache/prek/patches/` while the hook runs, t Built-in prek behavior (from pre-commit), not configurable. Keeps lint/tests from failing on WIP you are not pushing. -## Scopes (`pyproject.toml`) +## Fingerprint inputs (`pyproject.toml`) -Defines which tracked files invalidate each check (`[tool.prek.scopes.lint]` and -`[tool.prek.scopes.test_common_p]`). Edit when adding new trees that should trigger +Defines which tracked files feed each check fingerprint (`[tool.dlt.prepush.fingerprints.lint]` and +`[tool.dlt.prepush.fingerprints.test_common_p]`). Edit when adding new trees that should trigger re-lint or re-test. **Lint** — `dlt`, `tests`, `tools`, `docs` (`.py`, `.md`, `.ipynb`), plus root/docs config and embedded-snippet lint setup files. -**Common tests** — `dlt` and selected `tests/*` suites (see `[tool.prek.scopes]` in +**Common tests** — `dlt` and selected `tests/*` suites (see `[tool.dlt.prepush.fingerprints]` in `pyproject.toml`), plus `pyproject.toml`, `uv.lock`, `tests/conftest.py`, `tests/load/test_dummy_client.py`. @@ -135,7 +135,7 @@ refuses to run so your hook is not deleted. **Docs lint fails** — Run `cd docs && make dev`, then `make fl` (or `cd docs && make format && make lint`). **Want to re-run after a pass** — Delete `.prek/.state.toml`, remove all `[[lint.passes]]` or -`[[test_common_p.passes]]` entries for that check, or change a file in that check’s scope. +`[[test_common_p.passes]]` entries for that check, or change a tracked file in that check’s fingerprint inputs. **Stale fingerprint / wrong cache** — Same as above. State keeps up to 50 pass records per check for branch hopping. diff --git a/pyproject.toml b/pyproject.toml index 1c2fa867d8..9e9421c81c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -424,7 +424,7 @@ ignore = [ line-length = 100 preview = true -[tool.prek.scopes.lint] +[tool.dlt.prepush.fingerprints.lint] paths = ["dlt", "tests", "tools", "docs"] globs = ["*.py", "*.md", "*.ipynb"] files = [ @@ -436,7 +436,7 @@ files = [ "docs/docs_tools/snippets/lint_setup/mypy.ini", ] -[tool.prek.scopes.test_common_p] +[tool.dlt.prepush.fingerprints.test_common_p] paths = [ "dlt", "tests/common", diff --git a/tools/prek.py b/tools/prek.py index c697d11262..278b46061f 100644 --- a/tools/prek.py +++ b/tools/prek.py @@ -1,4 +1,4 @@ -"""Pre-push gate: run lint/tests when scope fingerprints change.""" +"""Pre-push gate: run lint/tests when tracked fingerprint inputs change.""" # ruff: noqa: T201 # flake8: noqa: T201 @@ -51,7 +51,7 @@ class Check(NamedTuple): CHECK_NAMES = frozenset(check.name for check in CHECKS) -class ScopeDef(NamedTuple): +class FingerprintDef(NamedTuple): files: tuple[str, ...] paths: tuple[str, ...] globs: tuple[str, ...] @@ -75,8 +75,8 @@ class ConfigError(Exception): """Invalid prek local configuration.""" -class UnknownScopeError(Exception): - """Scope name is missing from [tool.prek.scopes] in pyproject.toml.""" +class UnknownCheckError(Exception): + """Check name is missing from [tool.dlt.prepush.fingerprints] in pyproject.toml.""" def repo_root() -> Path: @@ -194,11 +194,11 @@ def write_state(path: Path, state: State) -> None: path.write_text("\n".join(lines).rstrip() + ("\n" if lines else ""), encoding="utf-8") -def scope_from_dict(scope: dict[str, list[str]]) -> ScopeDef: - return ScopeDef( - files=tuple(scope.get("files", [])), - paths=tuple(scope.get("paths", [])), - globs=tuple(scope.get("globs", [])), +def fingerprint_def_from_dict(raw: dict[str, list[str]]) -> FingerprintDef: + return FingerprintDef( + files=tuple(raw.get("files", [])), + paths=tuple(raw.get("paths", [])), + globs=tuple(raw.get("globs", [])), ) @@ -207,17 +207,21 @@ def matches_globs(path: str, globs: list[str]) -> bool: return any(fnmatch.fnmatch(name, pattern) for pattern in globs) -def resolve_scope_files( - scope: ScopeDef, +def resolve_fingerprint_files( + fingerprint_def: FingerprintDef, *, list_tracked: Callable[[list[str]], list[str]], root: Path, ) -> list[str]: - files: set[str] = set(scope.files) - for path_prefix in scope.paths: + files: set[str] = set(fingerprint_def.files) + for path_prefix in fingerprint_def.paths: candidates = list_tracked([path_prefix]) - if scope.globs: - files.update(path for path in candidates if matches_globs(path, list(scope.globs))) + if fingerprint_def.globs: + files.update( + path + for path in candidates + if matches_globs(path, list(fingerprint_def.globs)) + ) else: files.update(candidates) return sorted(path for path in files if (root / path).is_file()) @@ -232,16 +236,17 @@ def fingerprint_files(paths: list[str], read_bytes: Callable[[str], bytes]) -> s return aggregate.hexdigest() -def load_scopes(config_path: Path) -> dict[str, ScopeDef]: +def load_fingerprint_defs(config_path: Path) -> dict[str, FingerprintDef]: raw = load_toml(config_path) tool = raw.get("tool", {}) - prek = tool.get("prek", {}) if isinstance(tool, dict) else {} - scopes = prek.get("scopes", {}) - if not isinstance(scopes, dict): - raise ValueError("Invalid [tool.prek.scopes] section in pyproject.toml") + dlt = tool.get("dlt", {}) if isinstance(tool, dict) else {} + prepush = dlt.get("prepush", {}) if isinstance(dlt, dict) else {} + fingerprints = prepush.get("fingerprints", {}) + if not isinstance(fingerprints, dict): + raise ValueError("Invalid [tool.dlt.prepush.fingerprints] section in pyproject.toml") return { - name: scope_from_dict(section) - for name, section in scopes.items() + name: fingerprint_def_from_dict(section) + for name, section in fingerprints.items() if isinstance(section, dict) } @@ -260,15 +265,15 @@ def git_ls_files(root: Path, pathspecs: list[str]) -> list[str]: def make_fingerprint_fn(root: Path, config_path: Path) -> Callable[[str], str]: - scopes = load_scopes(config_path) + fingerprint_defs = load_fingerprint_defs(config_path) - def fingerprint(scope_name: str) -> str: + def fingerprint(check_name: str) -> str: try: - scope = scopes[scope_name] + fingerprint_def = fingerprint_defs[check_name] except KeyError as exc: - raise UnknownScopeError(f"Unknown scope: {scope_name}") from exc - paths = resolve_scope_files( - scope, + raise UnknownCheckError(f"Unknown check: {check_name}") from exc + paths = resolve_fingerprint_files( + fingerprint_def, list_tracked=lambda pathspecs: git_ls_files(root, pathspecs), root=root, ) @@ -527,13 +532,13 @@ def main_fingerprint(*, prek_dir: Path | None = None, argv: list[str] | None = N if argv is None: argv = sys.argv[1:] if len(argv) != 1: - print("Usage: python -m tools.prek fingerprint ", file=sys.stderr) + print("Usage: python -m tools.prek fingerprint ", file=sys.stderr) return 1 root = prek_dir.parent try: print(make_fingerprint_fn(root, root / "pyproject.toml")(argv[0])) - except UnknownScopeError as exc: + except UnknownCheckError as exc: print(str(exc), file=sys.stderr) return 1 return 0 diff --git a/tools/tests/test_prek.py b/tools/tests/test_prek.py index 87a6e502c6..d3e042979d 100644 --- a/tools/tests/test_prek.py +++ b/tools/tests/test_prek.py @@ -14,7 +14,7 @@ GateDeps, MAX_PASSED_FINGERPRINTS, PassRecord, - ScopeDef, + FingerprintDef, State, dry_run_lines, fingerprint_files, @@ -28,7 +28,7 @@ plan_checks, record_passed_check, repo_root, - resolve_scope_files, + resolve_fingerprint_files, run_gate, with_passed_check, write_state, @@ -92,14 +92,14 @@ def test_matches_globs(path: str, globs: list[str], expected: bool) -> None: assert matches_globs(path, globs) is expected -def test_resolve_scope_files_and_fingerprint(tmp_path: Path) -> None: +def test_resolve_fingerprint_files_and_fingerprint(tmp_path: Path) -> None: (tmp_path / "root.toml").write_text("c", encoding="utf-8") (tmp_path / "pkg").mkdir() (tmp_path / "pkg" / "keep.py").write_text("a", encoding="utf-8") - scope = ScopeDef(files=("root.toml",), paths=("pkg",), globs=("*.py",)) - paths = resolve_scope_files( - scope, + fingerprint_def = FingerprintDef(files=("root.toml",), paths=("pkg",), globs=("*.py",)) + paths = resolve_fingerprint_files( + fingerprint_def, list_tracked=lambda _: ["pkg/keep.py", "pkg/skip.txt"], root=tmp_path, ) From a2ea0ed3429b5bc482bf822c6b56b0a1c6b0e8f3 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 22:20:54 +0200 Subject: [PATCH 15/15] FL --- tools/prek.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/prek.py b/tools/prek.py index 278b46061f..841bcbd5ff 100644 --- a/tools/prek.py +++ b/tools/prek.py @@ -218,9 +218,7 @@ def resolve_fingerprint_files( candidates = list_tracked([path_prefix]) if fingerprint_def.globs: files.update( - path - for path in candidates - if matches_globs(path, list(fingerprint_def.globs)) + path for path in candidates if matches_globs(path, list(fingerprint_def.globs)) ) else: files.update(candidates)