From 66f6d3bf5916a698195577ce761768bd9f712aaf Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 15:42:18 +0200 Subject: [PATCH 01/25] More workers and cpu for ahtena tests --- .github/workflows/test_destinations_remote.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_destinations_remote.yml b/.github/workflows/test_destinations_remote.yml index 5931b11dd7..c5a2a030bb 100644 --- a/.github/workflows/test_destinations_remote.yml +++ b/.github/workflows/test_destinations_remote.yml @@ -36,18 +36,24 @@ jobs: filesystem_drivers: "[\"memory\"]" excluded_destination_test_configuration_ids: "[\"athena-iceberg\", \"athena-s3-tables\"]" extras: "--extra athena" + xdist_workers: 12 + runs_on: blacksmith-8vcpu-ubuntu-2404 - name: athena iceberg destinations: "[\"athena\"]" filesystem_drivers: "[\"memory\"]" excluded_destination_test_configuration_ids: "[\"athena\", \"athena-s3-tables\"]" extras: "--extra athena" + xdist_workers: 12 + runs_on: blacksmith-8vcpu-ubuntu-2404 - name: athena s3 tables destinations: "[\"athena\"]" filesystem_drivers: "[\"memory\"]" excluded_destination_test_configuration_ids: "[\"athena\", \"athena-iceberg\"]" extras: "--extra athena" + xdist_workers: 12 + runs_on: blacksmith-8vcpu-ubuntu-2404 # BigQuery - name: bigquery @@ -193,7 +199,7 @@ jobs: defaults: run: shell: bash - runs-on: "ubuntu-latest" + runs-on: ${{ matrix.runs_on || 'ubuntu-latest' }} services: # we need local postgres to run ducklake tests quickly From 44c8333824f5fdd430e41501b97b98b5a6042c43 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 15:44:50 +0200 Subject: [PATCH 02/25] Separate common tests --- .github/workflows/test_common.yml | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index 753bb930b7..4865737438 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -14,10 +14,11 @@ env: jobs: run_common: - name: test + name: test (${{ matrix.python-version }}, ${{ matrix.suite }}) strategy: fail-fast: false matrix: + suite: [pipeline-full, sqlalchemy-2] include: # macos tests @@ -63,6 +64,11 @@ jobs: python-version: "3.13" shell: cmd + exclude: + # Python 3.14 skips the pipeline-full install/test tail; no sqlalchemy-2 leg. + - python-version: "3.14" + suite: sqlalchemy-2 + defaults: run: shell: ${{ matrix.shell }} @@ -104,9 +110,11 @@ jobs: run: make test-common-core env: PYTEST_XDIST_N: auto + if: matrix.suite == 'pipeline-full' - name: Run tools tests run: make test-tools + if: matrix.suite == 'pipeline-full' - name: Install min dependencies to import core source run: make install-common-source @@ -115,6 +123,7 @@ jobs: - name: Run min dependencies source tests run: make test-common-source + if: matrix.suite == 'pipeline-full' - name: Install duckdb dependencies run: make install-pipeline-min @@ -125,6 +134,7 @@ jobs: run: make test-pipeline-min env: PYTEST_XDIST_N: auto + if: matrix.suite == 'pipeline-full' - name: Install pyarrow run: make install-pipeline-arrow @@ -132,7 +142,7 @@ jobs: - name: Run pipeline tests with pyarrow but no pandas installed run: make test-pipeline-arrow - if: matrix.python-version != '3.14' + if: matrix.suite == 'pipeline-full' && matrix.python-version != '3.14' - name: Install workspace dependencies run: make install-workspace @@ -144,7 +154,7 @@ jobs: run: make test-workspace env: PYTEST_XDIST_N: auto - if: matrix.python-version != '3.14' + if: matrix.suite == 'pipeline-full' && matrix.python-version != '3.14' - name: Install pipeline and sources dependencies run: make install-pipeline-full @@ -156,18 +166,17 @@ jobs: run: make test-pipeline-full env: PYTEST_XDIST_N: auto - if: matrix.python-version != '3.14' + if: matrix.suite == 'pipeline-full' && matrix.python-version != '3.14' - # here we upgrade sql alchemy to 2 an run the sql_database tests again - name: Upgrade sql alchemy run: make install-sqlalchemy2 - if: matrix.python-version != '3.14' + if: matrix.suite == 'sqlalchemy-2' && matrix.python-version != '3.14' - name: Run sql database tests and others that require sqlalchemy 2.0 run: make test-with-sqlalchemy-2 env: PYTEST_XDIST_N: auto - if: matrix.python-version != '3.14' + if: matrix.suite == 'sqlalchemy-2' && matrix.python-version != '3.14' matrix_job_required_check: name: common | common tests From 7f0434969206efcd1a055dc5eb14a45c2f1c8700 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 15:48:15 +0200 Subject: [PATCH 03/25] Fix yml --- .github/workflows/test_common.yml | 53 ++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index 4865737438..0a81bc13c0 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -18,55 +18,98 @@ jobs: strategy: fail-fast: false matrix: - suite: [pipeline-full, sqlalchemy-2] + # `suite` is set on each row (not a top-level axis) so we can skip sqlalchemy-2 on 3.14. include: # macos tests - os: blacksmith-12vcpu-macos-latest python-version: "3.11" shell: bash + suite: pipeline-full + - os: blacksmith-12vcpu-macos-latest + python-version: "3.11" + shell: bash + suite: sqlalchemy-2 # linux tests - os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.10" shell: bash + suite: pipeline-full + - os: blacksmith-8vcpu-ubuntu-2404 + python-version: "3.10" + shell: bash + suite: sqlalchemy-2 - os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.11" shell: bash + suite: pipeline-full + - os: blacksmith-8vcpu-ubuntu-2404 + python-version: "3.11" + shell: bash + suite: sqlalchemy-2 + - os: blacksmith-8vcpu-ubuntu-2404 + python-version: "3.12" + shell: bash + suite: pipeline-full - os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.12" shell: bash + suite: sqlalchemy-2 + - os: blacksmith-8vcpu-ubuntu-2404 + python-version: "3.13" + shell: bash + suite: pipeline-full - os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.13" shell: bash + suite: sqlalchemy-2 - os: blacksmith-12vcpu-macos-latest python-version: "3.14" shell: bash + suite: pipeline-full # linux test with minimal dependencies - os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.11" shell: bash uv_sync_args: '--resolution lowest-direct' # could also be 'direct' + suite: pipeline-full + - os: blacksmith-8vcpu-ubuntu-2404 + python-version: "3.11" + shell: bash + uv_sync_args: '--resolution lowest-direct' # could also be 'direct' + suite: sqlalchemy-2 # linux test with newest available allowed packages (will update lockfile, should not be committed if run locally) - os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.11" shell: bash uv_sync_args: '--upgrade' # could also be 'direct' + suite: pipeline-full + - os: blacksmith-8vcpu-ubuntu-2404 + python-version: "3.11" + shell: bash + uv_sync_args: '--upgrade' # could also be 'direct' + suite: sqlalchemy-2 # windows tests - os: blacksmith-8vcpu-windows-2025 python-version: "3.11" shell: cmd + suite: pipeline-full + - os: blacksmith-8vcpu-windows-2025 + python-version: "3.11" + shell: cmd + suite: sqlalchemy-2 + - os: blacksmith-8vcpu-windows-2025 + python-version: "3.13" + shell: cmd + suite: pipeline-full - os: blacksmith-8vcpu-windows-2025 python-version: "3.13" shell: cmd - - exclude: - # Python 3.14 skips the pipeline-full install/test tail; no sqlalchemy-2 leg. - - python-version: "3.14" suite: sqlalchemy-2 defaults: From 0ea4b971d4d5788c69bd99130c4387939a60d377 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 16:03:34 +0200 Subject: [PATCH 04/25] Add os to matrix split --- .github/workflows/test_common.yml | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index 0a81bc13c0..7d40bd2485 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -14,7 +14,7 @@ env: jobs: run_common: - name: test (${{ matrix.python-version }}, ${{ matrix.suite }}) + name: test (${{ matrix.platform }}, ${{ matrix.python-version }}, ${{ matrix.suite }}) strategy: fail-fast: false matrix: @@ -23,60 +23,73 @@ jobs: # macos tests - os: blacksmith-12vcpu-macos-latest + platform: macos python-version: "3.11" shell: bash suite: pipeline-full - os: blacksmith-12vcpu-macos-latest + platform: macos python-version: "3.11" shell: bash suite: sqlalchemy-2 # linux tests - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux python-version: "3.10" shell: bash suite: pipeline-full - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux python-version: "3.10" shell: bash suite: sqlalchemy-2 - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux python-version: "3.11" shell: bash suite: pipeline-full - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux python-version: "3.11" shell: bash suite: sqlalchemy-2 - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux python-version: "3.12" shell: bash suite: pipeline-full - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux python-version: "3.12" shell: bash suite: sqlalchemy-2 - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux python-version: "3.13" shell: bash suite: pipeline-full - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux python-version: "3.13" shell: bash suite: sqlalchemy-2 - os: blacksmith-12vcpu-macos-latest + platform: macos python-version: "3.14" shell: bash suite: pipeline-full # linux test with minimal dependencies - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux-lowest-direct python-version: "3.11" shell: bash uv_sync_args: '--resolution lowest-direct' # could also be 'direct' suite: pipeline-full - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux-lowest-direct python-version: "3.11" shell: bash uv_sync_args: '--resolution lowest-direct' # could also be 'direct' @@ -84,11 +97,13 @@ jobs: # linux test with newest available allowed packages (will update lockfile, should not be committed if run locally) - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux-upgrade python-version: "3.11" shell: bash uv_sync_args: '--upgrade' # could also be 'direct' suite: pipeline-full - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux-upgrade python-version: "3.11" shell: bash uv_sync_args: '--upgrade' # could also be 'direct' @@ -96,18 +111,22 @@ jobs: # windows tests - os: blacksmith-8vcpu-windows-2025 + platform: windows python-version: "3.11" shell: cmd suite: pipeline-full - os: blacksmith-8vcpu-windows-2025 + platform: windows python-version: "3.11" shell: cmd suite: sqlalchemy-2 - os: blacksmith-8vcpu-windows-2025 + platform: windows python-version: "3.13" shell: cmd suite: pipeline-full - os: blacksmith-8vcpu-windows-2025 + platform: windows python-version: "3.13" shell: cmd suite: sqlalchemy-2 From 8870c05b3a2b40fc4635b754c4b1a999cb0114dd Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 16:14:14 +0200 Subject: [PATCH 05/25] Parallelize more tests --- .github/workflows/test_sources_local.yml | 29 ++++++++++++++------- .github/workflows/test_tools_dbt_runner.yml | 13 +++++++-- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test_sources_local.yml b/.github/workflows/test_sources_local.yml index d4ed622532..c682256d65 100644 --- a/.github/workflows/test_sources_local.yml +++ b/.github/workflows/test_sources_local.yml @@ -16,7 +16,7 @@ env: jobs: run_loader: - name: src | rest_api, sql_database, filesystem + name: src | ${{ matrix.name }} (${{ matrix.suite }}) strategy: fail-fast: false matrix: @@ -25,17 +25,28 @@ jobs: dbms_extras: "--extra postgres --extra postgis" needs_postgres: true pytest_mark: "not mssql and not oracle" - allows_sqlalchemy_1_4: true + suite: sources-load + - name: postgres etc. + dbms_extras: "--extra postgres --extra postgis" + needs_postgres: true + pytest_mark: "not mssql and not oracle" + suite: sql-database + - name: mssql + dbms_extras: "--extra mssql" + needs_mssql: true + pytest_mark: "mssql" + suite: sources-load - name: mssql dbms_extras: "--extra mssql" needs_mssql: true pytest_mark: "mssql" - allows_sqlalchemy_1_4: true + suite: sql-database + # Oracle: sqlalchemy 2 / sql_database only (no sources-load leg on SA 1.4) - name: oracle dbms_extras: "--extra oracle" needs_oracle: true pytest_mark: "oracle" - allows_sqlalchemy_1_4: false + suite: sql-database defaults: run: shell: bash @@ -79,23 +90,23 @@ jobs: run: | cp tests/.dlt/dev.secrets.toml tests/.dlt/secrets.toml - # run sources tests in load against configured destinations - - name: Run tests linux + - name: Run sources load tests (sqlalchemy 1.4) run: make test-sources-load env: PYTEST_XDIST_N: auto PYTEST_MARKERS: ${{ matrix.pytest_mark }} - if: ${{ matrix.allows_sqlalchemy_1_4 }} + if: matrix.suite == 'sources-load' - # here we upgrade sql alchemy to 2 an run the sql_database tests again - name: Upgrade sql alchemy run: make install-sqlalchemy2 + if: matrix.suite == 'sql-database' - - name: Run tests Linux + - name: Run sql_database source tests (sqlalchemy 2) run: make test-sources-sql-database env: PYTEST_XDIST_N: auto PYTEST_MARKERS: ${{ matrix.pytest_mark }} + if: matrix.suite == 'sql-database' - name: Stop databases run: docker compose -f "tests/load/sources/sql_database/docker-compose.yml" down -v diff --git a/.github/workflows/test_tools_dbt_runner.yml b/.github/workflows/test_tools_dbt_runner.yml index d1377a1a16..dcb9d70898 100644 --- a/.github/workflows/test_tools_dbt_runner.yml +++ b/.github/workflows/test_tools_dbt_runner.yml @@ -12,7 +12,13 @@ env: jobs: run_dbt: - name: tools | dbt runner tests + name: tools | dbt (${{ matrix.suite }}) + strategy: + fail-fast: false + matrix: + include: + - suite: no-venv + - suite: venv defaults: run: shell: bash @@ -48,11 +54,14 @@ jobs: run: make test-dbt-no-venv env: PYTEST_XDIST_N: 3 + if: matrix.suite == 'no-venv' - name: Remove dbt-core run: uv run pip uninstall dbt-core -y + if: matrix.suite == 'venv' - name: Run dbt runner with venv - Linux/MAC run: make test-dbt-runner-venv env: - PYTEST_XDIST_N: 3 \ No newline at end of file + PYTEST_XDIST_N: 3 + if: matrix.suite == 'venv' From d6f2e6972bfc7ff374bf3ed87b29ac31ec1d293a Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 16:19:06 +0200 Subject: [PATCH 06/25] Faster common --- .github/workflows/test_common.yml | 74 ++++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index 7d40bd2485..6fe780036d 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -18,10 +18,14 @@ jobs: strategy: fail-fast: false matrix: - # `suite` is set on each row (not a top-level axis) so we can skip sqlalchemy-2 on 3.14. include: # macos tests + - os: blacksmith-12vcpu-macos-latest + platform: macos + python-version: "3.11" + shell: bash + suite: common/pipeline-mindeps-workspace - os: blacksmith-12vcpu-macos-latest platform: macos python-version: "3.11" @@ -34,6 +38,11 @@ jobs: suite: sqlalchemy-2 # linux tests + - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux + python-version: "3.10" + shell: bash + suite: common/pipeline-mindeps-workspace - os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.10" @@ -44,6 +53,11 @@ jobs: python-version: "3.10" shell: bash suite: sqlalchemy-2 + - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux + python-version: "3.11" + shell: bash + suite: common/pipeline-mindeps-workspace - os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.11" @@ -54,6 +68,11 @@ jobs: python-version: "3.11" shell: bash suite: sqlalchemy-2 + - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux + python-version: "3.12" + shell: bash + suite: common/pipeline-mindeps-workspace - os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.12" @@ -64,6 +83,11 @@ jobs: python-version: "3.12" shell: bash suite: sqlalchemy-2 + - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux + python-version: "3.13" + shell: bash + suite: common/pipeline-mindeps-workspace - os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.13" @@ -75,13 +99,20 @@ jobs: shell: bash suite: sqlalchemy-2 + # 3.14: early suite only (no pipeline-full / sqlalchemy-2 tail) - os: blacksmith-12vcpu-macos-latest platform: macos python-version: "3.14" shell: bash - suite: pipeline-full + suite: common/pipeline-mindeps-workspace # linux test with minimal dependencies + - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux-lowest-direct + python-version: "3.11" + shell: bash + uv_sync_args: '--resolution lowest-direct' # could also be 'direct' + suite: common/pipeline-mindeps-workspace - os: blacksmith-8vcpu-ubuntu-2404 platform: linux-lowest-direct python-version: "3.11" @@ -96,6 +127,12 @@ jobs: suite: sqlalchemy-2 # linux test with newest available allowed packages (will update lockfile, should not be committed if run locally) + - os: blacksmith-8vcpu-ubuntu-2404 + platform: linux-upgrade + python-version: "3.11" + shell: bash + uv_sync_args: '--upgrade' # could also be 'direct' + suite: common/pipeline-mindeps-workspace - os: blacksmith-8vcpu-ubuntu-2404 platform: linux-upgrade python-version: "3.11" @@ -110,6 +147,11 @@ jobs: suite: sqlalchemy-2 # windows tests + - os: blacksmith-8vcpu-windows-2025 + platform: windows + python-version: "3.11" + shell: cmd + suite: common/pipeline-mindeps-workspace - os: blacksmith-8vcpu-windows-2025 platform: windows python-version: "3.11" @@ -120,6 +162,11 @@ jobs: python-version: "3.11" shell: cmd suite: sqlalchemy-2 + - os: blacksmith-8vcpu-windows-2025 + platform: windows + python-version: "3.13" + shell: cmd + suite: common/pipeline-mindeps-workspace - os: blacksmith-8vcpu-windows-2025 platform: windows python-version: "3.13" @@ -168,61 +215,65 @@ jobs: env: UV_SYNC_ARGS: ${{ matrix.uv_sync_args }} + # --- common / pipeline-mindeps / workspace (sqlalchemy 1.4 path) --- - name: Run common tests with minimum dependencies run: make test-common-core env: PYTEST_XDIST_N: auto - if: matrix.suite == 'pipeline-full' + if: matrix.suite == 'common/pipeline-mindeps-workspace' - name: Run tools tests run: make test-tools - if: matrix.suite == 'pipeline-full' + if: matrix.suite == 'common/pipeline-mindeps-workspace' - name: Install min dependencies to import core source run: make install-common-source env: UV_SYNC_ARGS: ${{ matrix.uv_sync_args }} + if: matrix.suite == 'common/pipeline-mindeps-workspace' || matrix.suite == 'pipeline-full' || matrix.suite == 'sqlalchemy-2' - name: Run min dependencies source tests run: make test-common-source - if: matrix.suite == 'pipeline-full' + if: matrix.suite == 'common/pipeline-mindeps-workspace' - name: Install duckdb dependencies run: make install-pipeline-min env: UV_SYNC_ARGS: ${{ matrix.uv_sync_args }} + if: matrix.suite == 'common/pipeline-mindeps-workspace' || matrix.suite == 'pipeline-full' || matrix.suite == 'sqlalchemy-2' - name: Run pipeline tests with minimum deps run: make test-pipeline-min env: PYTEST_XDIST_N: auto - if: matrix.suite == 'pipeline-full' + if: matrix.suite == 'common/pipeline-mindeps-workspace' - name: Install pyarrow run: make install-pipeline-arrow - if: matrix.python-version != '3.14' + if: (matrix.suite == 'common/pipeline-mindeps-workspace' || matrix.suite == 'pipeline-full' || matrix.suite == 'sqlalchemy-2') && matrix.python-version != '3.14' - name: Run pipeline tests with pyarrow but no pandas installed run: make test-pipeline-arrow - if: matrix.suite == 'pipeline-full' && matrix.python-version != '3.14' + if: matrix.suite == 'common/pipeline-mindeps-workspace' && matrix.python-version != '3.14' - name: Install workspace dependencies run: make install-workspace env: UV_SYNC_ARGS: ${{ matrix.uv_sync_args }} - if: matrix.python-version != '3.14' + if: (matrix.suite == 'common/pipeline-mindeps-workspace' || matrix.suite == 'pipeline-full' || matrix.suite == 'sqlalchemy-2') && matrix.python-version != '3.14' - name: Run workspace tests run: make test-workspace env: PYTEST_XDIST_N: auto - if: matrix.suite == 'pipeline-full' && matrix.python-version != '3.14' + if: matrix.suite == 'common/pipeline-mindeps-workspace' && matrix.python-version != '3.14' + # --- pipeline-full (install chain only, then full extract/pipeline suite) --- - name: Install pipeline and sources dependencies run: make install-pipeline-full env: UV_SYNC_ARGS: ${{ matrix.uv_sync_args }} - if: matrix.python-version != '3.14' + if: (matrix.suite == 'pipeline-full' || matrix.suite == 'sqlalchemy-2') && matrix.python-version != '3.14' - name: Run extract and pipeline tests run: make test-pipeline-full @@ -230,6 +281,7 @@ jobs: PYTEST_XDIST_N: auto if: matrix.suite == 'pipeline-full' && matrix.python-version != '3.14' + # --- sqlalchemy 2 (sql_database + pyiceberg libs) --- - name: Upgrade sql alchemy run: make install-sqlalchemy2 if: matrix.suite == 'sqlalchemy-2' && matrix.python-version != '3.14' From 8a5ba76583adf1121da80ca2d9531f584ce9921f Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 16:21:27 +0200 Subject: [PATCH 07/25] Remove blacksmith from remote destinations --- .github/workflows/test_destinations_remote.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/test_destinations_remote.yml b/.github/workflows/test_destinations_remote.yml index c5a2a030bb..3bfe74c1bc 100644 --- a/.github/workflows/test_destinations_remote.yml +++ b/.github/workflows/test_destinations_remote.yml @@ -37,7 +37,6 @@ jobs: excluded_destination_test_configuration_ids: "[\"athena-iceberg\", \"athena-s3-tables\"]" extras: "--extra athena" xdist_workers: 12 - runs_on: blacksmith-8vcpu-ubuntu-2404 - name: athena iceberg destinations: "[\"athena\"]" @@ -45,7 +44,6 @@ jobs: excluded_destination_test_configuration_ids: "[\"athena\", \"athena-s3-tables\"]" extras: "--extra athena" xdist_workers: 12 - runs_on: blacksmith-8vcpu-ubuntu-2404 - name: athena s3 tables destinations: "[\"athena\"]" @@ -53,7 +51,6 @@ jobs: excluded_destination_test_configuration_ids: "[\"athena\", \"athena-iceberg\"]" extras: "--extra athena" xdist_workers: 12 - runs_on: blacksmith-8vcpu-ubuntu-2404 # BigQuery - name: bigquery From c5fbfeb15892c97a008fd8529975900dc1d7dcd0 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 16:35:10 +0200 Subject: [PATCH 08/25] Better readability --- .github/workflows/fork_tests_with_secrets.yml | 4 ++-- .github/workflows/main.yml | 18 +++++++++--------- .github/workflows/test_common.yml | 6 +++--- .github/workflows/test_destinations_local.yml | 4 ++-- .github/workflows/test_destinations_remote.yml | 4 ++-- .github/workflows/test_hub.yml | 6 +++--- .github/workflows/test_sources_local.yml | 4 ++-- .github/workflows/test_tools_airflow.yml | 4 ++-- .github/workflows/test_tools_build_images.yml | 4 ++-- .github/workflows/test_tools_dashboard.yml | 6 +++--- .github/workflows/test_tools_dbt_runner.yml | 4 ++-- 11 files changed, 32 insertions(+), 32 deletions(-) diff --git a/.github/workflows/fork_tests_with_secrets.yml b/.github/workflows/fork_tests_with_secrets.yml index 7f65ea9b4d..6139730fb9 100644 --- a/.github/workflows/fork_tests_with_secrets.yml +++ b/.github/workflows/fork_tests_with_secrets.yml @@ -25,7 +25,7 @@ jobs: - run: echo "Fork PR authorized — running secret-requiring tests against ${{ github.event.pull_request.head.sha }}" test_destinations_remote: - name: test remote destinations with secrets (fork) + name: dest remote needs: [authorize] uses: ./.github/workflows/test_destinations_remote.yml secrets: inherit @@ -33,7 +33,7 @@ jobs: run_full_test_suite: ${{ contains(github.event.pull_request.labels.*.name, 'ci full') }} test_tools_dbt_runner: - name: test dbt runner (fork) + name: dbt needs: [authorize] uses: ./.github/workflows/test_tools_dbt_runner.yml secrets: inherit diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 189c169403..eb5d7c01ae 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -44,12 +44,12 @@ jobs: if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' test_common: - name: run common tests on all python versions and OSes + name: common needs: [lint] uses: ./.github/workflows/test_common.yml test_dashboard: - name: run dashboard tests + name: dashboard needs: [lint] uses: ./.github/workflows/test_tools_dashboard.yml @@ -58,27 +58,27 @@ jobs: # Other tests that do not require remote connections # test_destinations_local: - name: run local destination tests without secrets + name: dest local needs: test_common uses: ./.github/workflows/test_destinations_local.yml test_sources_local: - name: run local source tests without secrets + name: src local needs: test_common uses: ./.github/workflows/test_sources_local.yml test_hub: - name: test dlthub features + name: hub needs: lint uses: ./.github/workflows/test_hub.yml test_tools_airflow: - name: test airflow helpers + name: airflow needs: lint uses: ./.github/workflows/test_tools_airflow.yml test_tools_build_images: - name: test build images + name: build images needs: lint uses: ./.github/workflows/test_tools_build_images.yml @@ -88,7 +88,7 @@ jobs: # test_destinations_remote: - name: test remote destinations with secrets + name: dest remote needs: [authorize_secrets, test_common] uses: ./.github/workflows/test_destinations_remote.yml secrets: inherit @@ -99,7 +99,7 @@ jobs: # Other tools and tests that require secrets # test_tools_dbt_runner: - name: test dbt runner + name: dbt needs: [test_common, authorize_secrets] uses: ./.github/workflows/test_tools_dbt_runner.yml secrets: inherit diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index 6fe780036d..d2acce9e5c 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -1,4 +1,4 @@ -name: common | common +name: common on: workflow_call: @@ -14,7 +14,7 @@ env: jobs: run_common: - name: test (${{ matrix.platform }}, ${{ matrix.python-version }}, ${{ matrix.suite }}) + name: ${{ matrix.platform }} · py${{ matrix.python-version }} · ${{ matrix.suite }} strategy: fail-fast: false matrix: @@ -293,7 +293,7 @@ jobs: if: matrix.suite == 'sqlalchemy-2' && matrix.python-version != '3.14' matrix_job_required_check: - name: common | common tests + name: common · check needs: run_common runs-on: ubuntu-latest if: always() diff --git a/.github/workflows/test_destinations_local.yml b/.github/workflows/test_destinations_local.yml index 89e79a0432..12361d340f 100644 --- a/.github/workflows/test_destinations_local.yml +++ b/.github/workflows/test_destinations_local.yml @@ -1,7 +1,7 @@ # Tests destinations that can run without credentials. # i.e. local postgres, duckdb, filesystem (with local fs/memory bucket) -name: dest | postgres, duckdb, fs, weaviate, qdrant +name: dest local on: workflow_call: @@ -15,7 +15,7 @@ env: jobs: run_destinations_local: - name: dest | local + name: ${{ matrix.name }} strategy: fail-fast: false diff --git a/.github/workflows/test_destinations_remote.yml b/.github/workflows/test_destinations_remote.yml index 3bfe74c1bc..1c341c7b08 100644 --- a/.github/workflows/test_destinations_remote.yml +++ b/.github/workflows/test_destinations_remote.yml @@ -1,5 +1,5 @@ -name: dest | remote +name: dest remote on: workflow_call: @@ -24,7 +24,7 @@ env: jobs: run_destinations_remote: - name: dest | remote + name: ${{ matrix.name }} strategy: fail-fast: false matrix: diff --git a/.github/workflows/test_hub.yml b/.github/workflows/test_hub.yml index 36150869aa..95790e55b0 100644 --- a/.github/workflows/test_hub.yml +++ b/.github/workflows/test_hub.yml @@ -1,4 +1,4 @@ -name: hub | dlthub features +name: hub # # dlthub smoke tests against the nightly build. @@ -13,7 +13,7 @@ env: jobs: run_hub_features: - name: test + name: ${{ matrix.os }} · py${{ matrix.python-version }} strategy: fail-fast: false matrix: @@ -99,7 +99,7 @@ jobs: # if: ${{ matrix.python-version == '3.11' && matrix.os == 'ubuntu-latest' }} matrix_job_required_check: - name: hub | dlthub features tests + name: hub · check needs: run_hub_features runs-on: ubuntu-latest if: always() diff --git a/.github/workflows/test_sources_local.yml b/.github/workflows/test_sources_local.yml index c682256d65..a1e2872be1 100644 --- a/.github/workflows/test_sources_local.yml +++ b/.github/workflows/test_sources_local.yml @@ -1,6 +1,6 @@ # Tests sources against a couple of local destinations -name: src | rest_api, sql_database, filesystem +name: src local on: workflow_call: @@ -16,7 +16,7 @@ env: jobs: run_loader: - name: src | ${{ matrix.name }} (${{ matrix.suite }}) + name: ${{ matrix.name }} · ${{ matrix.suite }} strategy: fail-fast: false matrix: diff --git a/.github/workflows/test_tools_airflow.yml b/.github/workflows/test_tools_airflow.yml index 2ad85a6f78..d87fbc908a 100644 --- a/.github/workflows/test_tools_airflow.yml +++ b/.github/workflows/test_tools_airflow.yml @@ -1,4 +1,4 @@ -name: tools | airflow +name: airflow on: workflow_call: @@ -6,7 +6,7 @@ on: jobs: run_airflow: - name: tools | airflow ${{ matrix.airflow-version }} tests + name: airflow ${{ matrix.airflow-version }} runs-on: blacksmith-8vcpu-ubuntu-2404 strategy: fail-fast: false diff --git a/.github/workflows/test_tools_build_images.yml b/.github/workflows/test_tools_build_images.yml index 79cf25b1c4..46d0e301b5 100644 --- a/.github/workflows/test_tools_build_images.yml +++ b/.github/workflows/test_tools_build_images.yml @@ -1,4 +1,4 @@ -name: tools | docker images +name: build images on: workflow_call: @@ -6,7 +6,7 @@ on: jobs: run_airflow: - name: tools | docker images build + name: build runs-on: blacksmith-8vcpu-ubuntu-2404 steps: diff --git a/.github/workflows/test_tools_dashboard.yml b/.github/workflows/test_tools_dashboard.yml index 25cb8a8969..eb9e679569 100644 --- a/.github/workflows/test_tools_dashboard.yml +++ b/.github/workflows/test_tools_dashboard.yml @@ -1,4 +1,4 @@ -name: common | common +name: dashboard on: workflow_call: @@ -15,7 +15,7 @@ env: jobs: run_common: - name: test + name: py${{ matrix.python-version }} · ${{ matrix.os }} strategy: fail-fast: false matrix: @@ -111,7 +111,7 @@ jobs: if: matrix.python-version != '3.14.0-beta.4' matrix_job_required_check: - name: common | common tests + name: dashboard · check needs: run_common runs-on: blacksmith-2vcpu-ubuntu-2404 if: always() diff --git a/.github/workflows/test_tools_dbt_runner.yml b/.github/workflows/test_tools_dbt_runner.yml index dcb9d70898..3ef6785c93 100644 --- a/.github/workflows/test_tools_dbt_runner.yml +++ b/.github/workflows/test_tools_dbt_runner.yml @@ -1,5 +1,5 @@ -name: tools | dbt runner +name: dbt on: workflow_call: @@ -12,7 +12,7 @@ env: jobs: run_dbt: - name: tools | dbt (${{ matrix.suite }}) + name: ${{ matrix.suite }} strategy: fail-fast: false matrix: From 62bd1b8641e35d8488d33efb98aa03c2ebe256d0 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 17:43:03 +0200 Subject: [PATCH 09/25] More duraiotns listed --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c0d6b9bc8a..dd7d6c5ee8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -530,7 +530,7 @@ ignore_missing_imports = true [tool.pytest.ini_options] pythonpath = ["dlt"] norecursedirs = [".direnv", ".eggs", "build", "dist"] -addopts = "-p no:xdist --showlocals --durations 10 -m 'not rfam'" +addopts = "-p no:xdist --showlocals --durations 200 -m 'not rfam'" xfail_strict = true timeout = 1800 faulthandler_timeout = 1500 From 25c3c46d2313ee63d3ffb5f4b1516a28f7b415d4 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 17:54:07 +0200 Subject: [PATCH 10/25] Better labels --- .github/workflows/build_docs.yml | 2 +- .github/workflows/lint.yml | 25 +++- .github/workflows/main.yml | 6 +- .github/workflows/test_common.yml | 117 +++++++++++++----- .github/workflows/test_destinations_local.yml | 17 +-- .../workflows/test_destinations_remote.yml | 45 +++---- .github/workflows/test_docs.yml | 2 +- .github/workflows/test_hub.yml | 47 ++++--- .github/workflows/test_sources_local.yml | 18 ++- .github/workflows/test_tools_airflow.yml | 11 +- .github/workflows/test_tools_dashboard.yml | 37 ++++-- .github/workflows/test_tools_dbt_runner.yml | 11 +- 12 files changed, 227 insertions(+), 111 deletions(-) diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml index fbbb040dd1..a83e3c659f 100644 --- a/.github/workflows/build_docs.yml +++ b/.github/workflows/build_docs.yml @@ -6,7 +6,7 @@ on: jobs: build_docs: - name: docs | build docs + name: build runs-on: blacksmith-8vcpu-ubuntu-2404 steps: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 39beef10a9..b16ab68608 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -8,13 +8,28 @@ on: jobs: run_lint: - name: lint + # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). + name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} strategy: fail-fast: true matrix: - os: - - blacksmith-8vcpu-ubuntu-2404 - python-version: ["3.10", "3.11", "3.12", "3.13"] + include: + - display_name: "3.10" + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 + python-version: "3.10" + - display_name: "3.11" + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 + python-version: "3.11" + - display_name: "3.12" + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 + python-version: "3.12" + - display_name: "3.13" + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 + python-version: "3.13" defaults: run: @@ -62,7 +77,7 @@ jobs: matrix_job_required_check: - name: lint | code & tests + name: lint-check needs: run_lint runs-on: blacksmith-2vcpu-ubuntu-2404 if: always() diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index eb5d7c01ae..56503ab280 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,7 +19,7 @@ jobs: # check for secret-requiring jobs: secrets are only available for internal (same-repo) PRs under pull_request. # fork PRs with the `ci from fork` label are handled separately by fork_tests_with_secrets.yml (pull_request_target). authorize_secrets: - name: secrets available (internal PRs only) + name: secrets (internal PRs) if: ${{ github.event.pull_request.head.repo.full_name == github.repository && (github.event.action == 'opened' || github.event.action == 'synchronize') }} runs-on: blacksmith-2vcpu-ubuntu-2404 steps: @@ -28,7 +28,7 @@ jobs: # testing and linting of docs snippets is always run to catch problems in the docs # NOTE: we could splint linting and testing of docs so linter can also always run for fork PRs test_docs: - name: lint and test snippets, examples and notebooks in docs, lints tools + name: docs uses: ./.github/workflows/test_docs.yml secrets: inherit @@ -38,7 +38,7 @@ jobs: uses: ./.github/workflows/build_docs.yml lint: - name: lint on all python versions + name: lint needs: get_docs_changes uses: ./.github/workflows/lint.yml if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index d2acce9e5c..62ae187d1d 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -14,112 +14,151 @@ env: jobs: run_common: - name: ${{ matrix.platform }} · py${{ matrix.python-version }} · ${{ matrix.suite }} + # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). + name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} strategy: fail-fast: false matrix: include: # macos tests - - os: blacksmith-12vcpu-macos-latest + - display_name: macos-3.11-common/pipeline-mindeps-workspace + vcpus: 12 + os: blacksmith-12vcpu-macos-latest platform: macos python-version: "3.11" shell: bash suite: common/pipeline-mindeps-workspace - - os: blacksmith-12vcpu-macos-latest + - display_name: macos-3.11-pipeline-full + vcpus: 12 + os: blacksmith-12vcpu-macos-latest platform: macos python-version: "3.11" shell: bash suite: pipeline-full - - os: blacksmith-12vcpu-macos-latest + - display_name: macos-3.11-sqlalchemy-2 + vcpus: 12 + os: blacksmith-12vcpu-macos-latest platform: macos python-version: "3.11" shell: bash suite: sqlalchemy-2 # linux tests - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.10-common/pipeline-mindeps-workspace + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.10" shell: bash suite: common/pipeline-mindeps-workspace - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.10-pipeline-full + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.10" shell: bash suite: pipeline-full - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.10-sqlalchemy-2 + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.10" shell: bash suite: sqlalchemy-2 - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.11-common/pipeline-mindeps-workspace + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.11" shell: bash suite: common/pipeline-mindeps-workspace - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.11-pipeline-full + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.11" shell: bash suite: pipeline-full - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.11-sqlalchemy-2 + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.11" shell: bash suite: sqlalchemy-2 - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.12-common/pipeline-mindeps-workspace + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.12" shell: bash suite: common/pipeline-mindeps-workspace - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.12-pipeline-full + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.12" shell: bash suite: pipeline-full - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.12-sqlalchemy-2 + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.12" shell: bash suite: sqlalchemy-2 - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.13-common/pipeline-mindeps-workspace + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.13" shell: bash suite: common/pipeline-mindeps-workspace - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.13-pipeline-full + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.13" shell: bash suite: pipeline-full - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.13-sqlalchemy-2 + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.13" shell: bash suite: sqlalchemy-2 # 3.14: early suite only (no pipeline-full / sqlalchemy-2 tail) - - os: blacksmith-12vcpu-macos-latest + - display_name: macos-3.14-common/pipeline-mindeps-workspace + vcpus: 12 + os: blacksmith-12vcpu-macos-latest platform: macos python-version: "3.14" shell: bash suite: common/pipeline-mindeps-workspace # linux test with minimal dependencies - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-lowest-direct-3.11-common/pipeline-mindeps-workspace + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux-lowest-direct python-version: "3.11" shell: bash uv_sync_args: '--resolution lowest-direct' # could also be 'direct' suite: common/pipeline-mindeps-workspace - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-lowest-direct-3.11-pipeline-full + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux-lowest-direct python-version: "3.11" shell: bash uv_sync_args: '--resolution lowest-direct' # could also be 'direct' suite: pipeline-full - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-lowest-direct-3.11-sqlalchemy-2 + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux-lowest-direct python-version: "3.11" shell: bash @@ -127,19 +166,25 @@ jobs: suite: sqlalchemy-2 # linux test with newest available allowed packages (will update lockfile, should not be committed if run locally) - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-upgrade-3.11-common/pipeline-mindeps-workspace + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux-upgrade python-version: "3.11" shell: bash uv_sync_args: '--upgrade' # could also be 'direct' suite: common/pipeline-mindeps-workspace - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-upgrade-3.11-pipeline-full + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux-upgrade python-version: "3.11" shell: bash uv_sync_args: '--upgrade' # could also be 'direct' suite: pipeline-full - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-upgrade-3.11-sqlalchemy-2 + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 platform: linux-upgrade python-version: "3.11" shell: bash @@ -147,32 +192,44 @@ jobs: suite: sqlalchemy-2 # windows tests - - os: blacksmith-8vcpu-windows-2025 + - display_name: windows-3.11-common/pipeline-mindeps-workspace + vcpus: 8 + os: blacksmith-8vcpu-windows-2025 platform: windows python-version: "3.11" shell: cmd suite: common/pipeline-mindeps-workspace - - os: blacksmith-8vcpu-windows-2025 + - display_name: windows-3.11-pipeline-full + vcpus: 8 + os: blacksmith-8vcpu-windows-2025 platform: windows python-version: "3.11" shell: cmd suite: pipeline-full - - os: blacksmith-8vcpu-windows-2025 + - display_name: windows-3.11-sqlalchemy-2 + vcpus: 8 + os: blacksmith-8vcpu-windows-2025 platform: windows python-version: "3.11" shell: cmd suite: sqlalchemy-2 - - os: blacksmith-8vcpu-windows-2025 + - display_name: windows-3.13-common/pipeline-mindeps-workspace + vcpus: 8 + os: blacksmith-8vcpu-windows-2025 platform: windows python-version: "3.13" shell: cmd suite: common/pipeline-mindeps-workspace - - os: blacksmith-8vcpu-windows-2025 + - display_name: windows-3.13-pipeline-full + vcpus: 8 + os: blacksmith-8vcpu-windows-2025 platform: windows python-version: "3.13" shell: cmd suite: pipeline-full - - os: blacksmith-8vcpu-windows-2025 + - display_name: windows-3.13-sqlalchemy-2 + vcpus: 8 + os: blacksmith-8vcpu-windows-2025 platform: windows python-version: "3.13" shell: cmd @@ -293,7 +350,7 @@ jobs: if: matrix.suite == 'sqlalchemy-2' && matrix.python-version != '3.14' matrix_job_required_check: - name: common · check + name: common-check needs: run_common runs-on: ubuntu-latest if: always() diff --git a/.github/workflows/test_destinations_local.yml b/.github/workflows/test_destinations_local.yml index 12361d340f..4123910bf0 100644 --- a/.github/workflows/test_destinations_local.yml +++ b/.github/workflows/test_destinations_local.yml @@ -15,7 +15,8 @@ env: jobs: run_destinations_local: - name: ${{ matrix.name }} + # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). + name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} strategy: fail-fast: false @@ -23,7 +24,7 @@ jobs: include: # Duckdb, Postgres, Filesystem, Weaviate, Qdrant (could be split?) - - name: filesystem, weaviate + - display_name: filesystem, weaviate destinations: "[\"filesystem\", \"weaviate\"]" filesystem_drivers: "[\"memory\", \"file\", \"sftp\"]" extras: "--extra parquet --extra cli --extra filesystem --extra weaviate --extra deltalake --extra pyiceberg --extra sftp" @@ -32,7 +33,7 @@ jobs: post_install_commands: "uv run pip install sqlalchemy==2.0.18" # minimum version required by `pyiceberg` # Qdrant runs test serially, concurrency is problematic - - name: qdrant + - display_name: qdrant destinations: "[\"qdrant\"]" filesystem_drivers: "[\"memory\", \"file\", \"sftp\"]" extras: "--extra parquet --extra cli --extra qdrant --extra sftp" @@ -41,7 +42,7 @@ jobs: pytest_xdist_n: 1 # TODO: also test ducklake in remote mode with a buckets and remote postgres - - name: postgres, duckdb, ducklake, and dummy + - display_name: postgres, duckdb, ducklake, and dummy destinations: "[\"postgres\", \"duckdb\", \"ducklake\", \"dummy\"]" filesystem_drivers: "[\"memory\", \"file\"]" extras: "--group adbc --extra postgres --extra postgis --extra parquet --extra duckdb --extra cli --extra filesystem" @@ -49,7 +50,7 @@ jobs: needs_postgres: true # Clickhouse OSS (TODO: test with minio s3) - - name: clickhouse + - display_name: clickhouse destinations: "[\"clickhouse\"]" filesystem_drivers: "[\"memory\", \"file\"]" extras: "--extra clickhouse --extra parquet" @@ -58,7 +59,7 @@ jobs: excluded_destination_configurations: "[\"clickhouse-parquet-staging-s3-authorization\", \"clickhouse-parquet-staging-az-authorization\", \"clickhouse-jsonl-staging-az-authorization\", \"clickhouse-jsonl-staging-s3-authorization\"]" # Dremio - - name: dremio + - display_name: dremio destinations: "[\"dremio\"]" filesystem_drivers: "[\"memory\"]" extras: "--extra s3 --extra gs --extra az --extra parquet" @@ -66,7 +67,7 @@ jobs: # SQLAlchemy 1.4 - - name: sqlalchemy + - display_name: sqlalchemy-1.4 destinations: "[\"sqlalchemy\"]" filesystem_drivers: "[\"memory\", \"file\"]" extras: "--extra sqlalchemy --extra filesystem --extra parquet --extra postgres --extra oracle --group adbc" @@ -74,7 +75,7 @@ jobs: post_install_commands: "uv run pip install pymysql && uv run pip install sqlalchemy==1.4 && uv run dbc install mysql && uv run dbc install sqlite" # SQLAlchemy 2.0 (same as above but with sqlalchemy 2.0) - - name: sqlalchemy + - display_name: sqlalchemy-2.0 destinations: "[\"sqlalchemy\"]" filesystem_drivers: "[\"memory\", \"file\"]" extras: "--extra sqlalchemy --extra filesystem --extra parquet --extra postgres --extra oracle --group adbc" diff --git a/.github/workflows/test_destinations_remote.yml b/.github/workflows/test_destinations_remote.yml index 1c341c7b08..c87f42c05a 100644 --- a/.github/workflows/test_destinations_remote.yml +++ b/.github/workflows/test_destinations_remote.yml @@ -24,28 +24,29 @@ env: jobs: run_destinations_remote: - name: ${{ matrix.name }} + # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). + name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} strategy: fail-fast: false matrix: include: # Athena - - name: athena + - display_name: athena destinations: "[\"athena\"]" filesystem_drivers: "[\"memory\"]" excluded_destination_test_configuration_ids: "[\"athena-iceberg\", \"athena-s3-tables\"]" extras: "--extra athena" xdist_workers: 12 - - name: athena iceberg + - display_name: athena iceberg destinations: "[\"athena\"]" filesystem_drivers: "[\"memory\"]" excluded_destination_test_configuration_ids: "[\"athena\", \"athena-s3-tables\"]" extras: "--extra athena" xdist_workers: 12 - - name: athena s3 tables + - display_name: athena s3 tables destinations: "[\"athena\"]" filesystem_drivers: "[\"memory\"]" excluded_destination_test_configuration_ids: "[\"athena\", \"athena-iceberg\"]" @@ -53,33 +54,33 @@ jobs: xdist_workers: 12 # BigQuery - - name: bigquery + - display_name: bigquery destinations: "[\"bigquery\"]" filesystem_drivers: "[\"memory\"]" extras: "--extra bigquery --extra parquet" # Clickhouse - - name: clickhouse + - display_name: clickhouse destinations: "[\"clickhouse\"]" filesystem_drivers: "[\"memory\", \"file\"]" extras: "--extra clickhouse --extra parquet" # Databricks - - name: databricks + - display_name: databricks destinations: "[\"databricks\"]" filesystem_drivers: "[\"memory\"]" extras: "--extra databricks --extra s3 --extra gs --extra az --extra parquet" # Filesystem - - name: filesystem_s3_local + - display_name: filesystem_s3_local destinations: "[\"filesystem\"]" # note that all buckets are enabled for testing filesystem_drivers: "[\"memory\", \"file\", \"r2\", \"s3\"]" # excludes sftp which is run in local tests extras: "--extra s3 --extra parquet --extra duckdb --extra filesystem --extra deltalake --extra pyiceberg" post_install_commands: "uv run pip install sqlalchemy==2.0.18" # minimum version required by `pyiceberg` - - name: filesystem_az + - display_name: filesystem_az destinations: "[\"filesystem\"]" # note that all buckets are enabled for testing filesystem_drivers: "[\"memory\", \"az\", \"abfss\"]" # excludes sftp which is run in local tests @@ -87,14 +88,14 @@ jobs: post_install_commands: | # uv run pip install due to minimum version required by `pyiceberg` uv run pip install sqlalchemy==2.0.18 # minimum version required by `pyiceberg` - - name: filesystem_gs_gdrive + - display_name: filesystem_gs_gdrive destinations: "[\"filesystem\"]" # note that all buckets are enabled for testing filesystem_drivers: "[\"memory\", \"gs\", \"gdrive\"]" # excludes sftp which is run in local tests extras: "--extra gs --extra parquet --extra duckdb --extra filesystem --extra deltalake --extra pyiceberg" post_install_commands: "uv run pip install sqlalchemy==2.0.18" # minimum version required by `pyiceberg` - - name: filesystem_hf + - display_name: filesystem_hf destinations: "[\"filesystem\"]" filesystem_drivers: "[\"memory\", \"hf\"]" # although, `hf` does not support delta/iceberg, we include `deltalake`/`pyiceberg` extras to prevent missing imports in tests @@ -102,7 +103,7 @@ jobs: post_install_commands: "uv run pip install sqlalchemy==2.0.18" # minimum version required by `pyiceberg` # LanceDB - - name: lancedb + - display_name: lancedb destinations: "[\"lancedb\"]" filesystem_drivers: "[\"memory\"]" extras: "--extra lancedb --extra parquet" @@ -111,14 +112,14 @@ jobs: # Lance — file and s3 protocols only, no need to test az and gs as all cloud storages go # through same code paths thanks to `object_store` Rust crate - - name: lance_local + - display_name: lance_local destinations: "[\"lance\"]" filesystem_drivers: "[\"file\"]" extras: "--extra lance" post_install_commands: "uv run pip install openai" always_run_all_tests: true - - name: lance_s3 + - display_name: lance_s3 destinations: "[\"lance\"]" filesystem_drivers: "[\"s3\"]" extras: "--extra lance --extra s3" @@ -126,21 +127,21 @@ jobs: always_run_all_tests: true # Motherduck - - name: motherduck + - display_name: motherduck destinations: "[\"motherduck\"]" filesystem_drivers: "[\"memory\"]" extras: "--extra motherduck --extra s3 --extra gs --extra az --extra parquet" xdist_workers: 1 # Ducklake - - name: ducklake + - display_name: ducklake destinations: "[\"ducklake\"]" filesystem_drivers: "[\"s3\", \"gs\", \"abfss\"]" extras: "--extra ducklake --extra s3 --extra gs --extra az --extra parquet" always_run_all_tests: true # MSSQL - - name: mssql + - display_name: mssql destinations: "[\"mssql\"]" filesystem_drivers: "[\"memory\"]" extras: "--extra mssql --extra s3 --extra gs --extra az --extra parquet --group adbc" @@ -149,26 +150,26 @@ jobs: always_run_all_tests: true # Synapse - - name: synapse + - display_name: synapse destinations: "[\"synapse\"]" filesystem_drivers: "[\"memory\"]" extras: "--extra synapse --extra parquet" pre_install_commands: "sudo apt-get update && sudo ACCEPT_EULA=Y apt-get install --yes msodbcsql18" # Fabric - - name: fabric + - display_name: fabric destinations: "[\"fabric\"]" filesystem_drivers: "[\"memory\"]" extras: "--extra fabric" pre_install_commands: "sudo apt-get update && sudo ACCEPT_EULA=Y apt-get install --yes msodbcsql18" # Postgres and Redshift (used to be test_destinations.yml) - - name: redshift + - display_name: redshift destinations: "[\"redshift\"]" filesystem_drivers: "[\"memory\", \"file\"]" extras: "--group adbc --extra postgres --extra redshift --extra postgis --extra s3 --extra gs --extra az --extra parquet --extra duckdb" - - name: postgres + - display_name: postgres destinations: "[\"postgres\"]" filesystem_drivers: "[\"memory\", \"file\"]" extras: "--group adbc --extra postgres --extra postgis --extra parquet --extra duckdb" @@ -183,7 +184,7 @@ jobs: # extras: "--extra qdrant --extra parquet" # Snowflake - - name: snowflake + - display_name: snowflake destinations: "[\"snowflake\"]" filesystem_drivers: "[\"memory\"]" extras: "--extra snowflake --extra s3 --extra gs --extra az --extra parquet" diff --git a/.github/workflows/test_docs.yml b/.github/workflows/test_docs.yml index 92be641cb8..41165a6c8f 100644 --- a/.github/workflows/test_docs.yml +++ b/.github/workflows/test_docs.yml @@ -25,7 +25,7 @@ env: jobs: run_lint: - name: docs | snippets + name: snippets runs-on: ubuntu-latest # Service containers to run with `container-job` diff --git a/.github/workflows/test_hub.yml b/.github/workflows/test_hub.yml index 95790e55b0..27c991c516 100644 --- a/.github/workflows/test_hub.yml +++ b/.github/workflows/test_hub.yml @@ -13,27 +13,36 @@ env: jobs: run_hub_features: - name: ${{ matrix.os }} · py${{ matrix.python-version }} + # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). + name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} strategy: fail-fast: false matrix: - os: ["ubuntu-latest", "macos-latest", "windows-latest"] - python-version: ["3.10", "3.11", "3.12", "3.13"] - dlthub_dep: [""] # , "https://dlt-packages.fra1.digitaloceanspaces.com/dlthub/dlthub-0.0.0+nightly-py3-none-any.whl"] - # Test all python versions on ubuntu only - exclude: - - os: "macos-latest" - python-version: "3.10" - - os: "macos-latest" - python-version: "3.12" - - os: "macos-latest" - python-version: "3.13" - - os: "windows-latest" - python-version: "3.10" - - os: "windows-latest" - python-version: "3.12" - - os: "windows-latest" - python-version: "3.13" + include: + - display_name: linux-3.10 + os: ubuntu-latest + python-version: "3.10" + dlthub_dep: "" + - display_name: linux-3.11 + os: ubuntu-latest + python-version: "3.11" + dlthub_dep: "" + - display_name: linux-3.12 + os: ubuntu-latest + python-version: "3.12" + dlthub_dep: "" + - display_name: linux-3.13 + os: ubuntu-latest + python-version: "3.13" + dlthub_dep: "" + - display_name: macos-3.11 + os: macos-latest + python-version: "3.11" + dlthub_dep: "" + - display_name: windows-3.11 + os: windows-latest + python-version: "3.11" + dlthub_dep: "" defaults: run: @@ -99,7 +108,7 @@ jobs: # if: ${{ matrix.python-version == '3.11' && matrix.os == 'ubuntu-latest' }} matrix_job_required_check: - name: hub · check + name: hub-check needs: run_hub_features runs-on: ubuntu-latest if: always() diff --git a/.github/workflows/test_sources_local.yml b/.github/workflows/test_sources_local.yml index a1e2872be1..e7c23e72c1 100644 --- a/.github/workflows/test_sources_local.yml +++ b/.github/workflows/test_sources_local.yml @@ -16,33 +16,39 @@ env: jobs: run_loader: - name: ${{ matrix.name }} · ${{ matrix.suite }} + # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). + name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} strategy: fail-fast: false matrix: include: - - name: postgres etc. + - display_name: postgres-sources-load + vcpus: 8 dbms_extras: "--extra postgres --extra postgis" needs_postgres: true pytest_mark: "not mssql and not oracle" suite: sources-load - - name: postgres etc. + - display_name: postgres-sql-database + vcpus: 8 dbms_extras: "--extra postgres --extra postgis" needs_postgres: true pytest_mark: "not mssql and not oracle" suite: sql-database - - name: mssql + - display_name: mssql-sources-load + vcpus: 8 dbms_extras: "--extra mssql" needs_mssql: true pytest_mark: "mssql" suite: sources-load - - name: mssql + - display_name: mssql-sql-database + vcpus: 8 dbms_extras: "--extra mssql" needs_mssql: true pytest_mark: "mssql" suite: sql-database # Oracle: sqlalchemy 2 / sql_database only (no sources-load leg on SA 1.4) - - name: oracle + - display_name: oracle-sql-database + vcpus: 8 dbms_extras: "--extra oracle" needs_oracle: true pytest_mark: "oracle" diff --git a/.github/workflows/test_tools_airflow.yml b/.github/workflows/test_tools_airflow.yml index d87fbc908a..0ec329828a 100644 --- a/.github/workflows/test_tools_airflow.yml +++ b/.github/workflows/test_tools_airflow.yml @@ -6,18 +6,23 @@ on: jobs: run_airflow: - name: airflow ${{ matrix.airflow-version }} + # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). + name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} runs-on: blacksmith-8vcpu-ubuntu-2404 strategy: fail-fast: false matrix: include: - - airflow-version: "2" + - display_name: airflow-2 + vcpus: 8 + airflow-version: "2" python-version: "3.12" airflow-pin: "" run-unit-tests: true run-smoke-test: false - - airflow-version: "3" + - display_name: airflow-3 + vcpus: 8 + airflow-version: "3" python-version: "3.12" airflow-pin: "apache-airflow>=3.1" run-unit-tests: false diff --git a/.github/workflows/test_tools_dashboard.yml b/.github/workflows/test_tools_dashboard.yml index eb9e679569..6ffcc561e7 100644 --- a/.github/workflows/test_tools_dashboard.yml +++ b/.github/workflows/test_tools_dashboard.yml @@ -15,47 +15,64 @@ env: jobs: run_common: - name: py${{ matrix.python-version }} · ${{ matrix.os }} + # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). + name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} strategy: fail-fast: false matrix: include: # macos tests - - os: blacksmith-12vcpu-macos-latest + - display_name: macos-3.12 + vcpus: 12 + os: blacksmith-12vcpu-macos-latest python-version: "3.12" shell: bash # linux tests - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.10 + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.10" shell: bash - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.11 + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.11" shell: bash - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.12 + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.12" shell: bash - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.13 + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.13" shell: bash # linux test with minimal dependencies - - os: blacksmith-8vcpu-ubuntu-2404 + - display_name: linux-3.11-mindeps + vcpus: 8 + os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.11" shell: bash uv_sync_args: '--resolution lowest-direct' # could also be 'direct' # mac test with newest available allowed packages (will update lockfile, should not be committed if run locally) # TODO: revert to linux - - os: blacksmith-12vcpu-macos-latest + - display_name: macos-3.11-upgrade + vcpus: 12 + os: blacksmith-12vcpu-macos-latest python-version: "3.11" shell: bash uv_sync_args: '--upgrade' # could also be 'direct' # windows tests - - os: blacksmith-8vcpu-windows-2025 + - display_name: windows-3.12 + vcpus: 8 + os: blacksmith-8vcpu-windows-2025 python-version: "3.12" shell: cmd @@ -111,7 +128,7 @@ jobs: if: matrix.python-version != '3.14.0-beta.4' matrix_job_required_check: - name: dashboard · check + name: dashboard-check needs: run_common runs-on: blacksmith-2vcpu-ubuntu-2404 if: always() diff --git a/.github/workflows/test_tools_dbt_runner.yml b/.github/workflows/test_tools_dbt_runner.yml index 3ef6785c93..875fa3bd42 100644 --- a/.github/workflows/test_tools_dbt_runner.yml +++ b/.github/workflows/test_tools_dbt_runner.yml @@ -12,13 +12,18 @@ env: jobs: run_dbt: - name: ${{ matrix.suite }} + # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). + name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} strategy: fail-fast: false matrix: include: - - suite: no-venv - - suite: venv + - display_name: no-venv + vcpus: 8 + suite: no-venv + - display_name: venv + vcpus: 8 + suite: venv defaults: run: shell: bash From 64b94ad7ff79dd686210f35eeeb2c64c6aa3f87f Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 18:03:10 +0200 Subject: [PATCH 11/25] Better labels v2 --- .github/workflows/lint.yml | 15 +++------ .github/workflows/test_common.yml | 31 +------------------ .github/workflows/test_destinations_local.yml | 3 +- .../workflows/test_destinations_remote.yml | 3 +- .github/workflows/test_hub.yml | 3 +- .github/workflows/test_sources_local.yml | 8 +---- .github/workflows/test_tools_airflow.yml | 5 +-- .github/workflows/test_tools_dashboard.yml | 11 +------ .github/workflows/test_tools_dbt_runner.yml | 5 +-- 9 files changed, 13 insertions(+), 71 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index b16ab68608..b6a3db30c9 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -8,26 +8,21 @@ on: jobs: run_lint: - # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). - name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} + name: ${{ matrix.display_name }} strategy: fail-fast: true matrix: include: - - display_name: "3.10" - vcpus: 8 + - display_name: linux-3.10 os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.10" - - display_name: "3.11" - vcpus: 8 + - display_name: linux-3.11 os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.11" - - display_name: "3.12" - vcpus: 8 + - display_name: linux-3.12 os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.12" - - display_name: "3.13" - vcpus: 8 + - display_name: linux-3.13 os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.13" diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index 62ae187d1d..a223cdbefd 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -14,8 +14,7 @@ env: jobs: run_common: - # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). - name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} + name: ${{ matrix.display_name }} strategy: fail-fast: false matrix: @@ -23,21 +22,18 @@ jobs: # macos tests - display_name: macos-3.11-common/pipeline-mindeps-workspace - vcpus: 12 os: blacksmith-12vcpu-macos-latest platform: macos python-version: "3.11" shell: bash suite: common/pipeline-mindeps-workspace - display_name: macos-3.11-pipeline-full - vcpus: 12 os: blacksmith-12vcpu-macos-latest platform: macos python-version: "3.11" shell: bash suite: pipeline-full - display_name: macos-3.11-sqlalchemy-2 - vcpus: 12 os: blacksmith-12vcpu-macos-latest platform: macos python-version: "3.11" @@ -46,84 +42,72 @@ jobs: # linux tests - display_name: linux-3.10-common/pipeline-mindeps-workspace - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.10" shell: bash suite: common/pipeline-mindeps-workspace - display_name: linux-3.10-pipeline-full - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.10" shell: bash suite: pipeline-full - display_name: linux-3.10-sqlalchemy-2 - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.10" shell: bash suite: sqlalchemy-2 - display_name: linux-3.11-common/pipeline-mindeps-workspace - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.11" shell: bash suite: common/pipeline-mindeps-workspace - display_name: linux-3.11-pipeline-full - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.11" shell: bash suite: pipeline-full - display_name: linux-3.11-sqlalchemy-2 - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.11" shell: bash suite: sqlalchemy-2 - display_name: linux-3.12-common/pipeline-mindeps-workspace - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.12" shell: bash suite: common/pipeline-mindeps-workspace - display_name: linux-3.12-pipeline-full - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.12" shell: bash suite: pipeline-full - display_name: linux-3.12-sqlalchemy-2 - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.12" shell: bash suite: sqlalchemy-2 - display_name: linux-3.13-common/pipeline-mindeps-workspace - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.13" shell: bash suite: common/pipeline-mindeps-workspace - display_name: linux-3.13-pipeline-full - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.13" shell: bash suite: pipeline-full - display_name: linux-3.13-sqlalchemy-2 - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux python-version: "3.13" @@ -132,7 +116,6 @@ jobs: # 3.14: early suite only (no pipeline-full / sqlalchemy-2 tail) - display_name: macos-3.14-common/pipeline-mindeps-workspace - vcpus: 12 os: blacksmith-12vcpu-macos-latest platform: macos python-version: "3.14" @@ -141,7 +124,6 @@ jobs: # linux test with minimal dependencies - display_name: linux-lowest-direct-3.11-common/pipeline-mindeps-workspace - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux-lowest-direct python-version: "3.11" @@ -149,7 +131,6 @@ jobs: uv_sync_args: '--resolution lowest-direct' # could also be 'direct' suite: common/pipeline-mindeps-workspace - display_name: linux-lowest-direct-3.11-pipeline-full - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux-lowest-direct python-version: "3.11" @@ -157,7 +138,6 @@ jobs: uv_sync_args: '--resolution lowest-direct' # could also be 'direct' suite: pipeline-full - display_name: linux-lowest-direct-3.11-sqlalchemy-2 - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux-lowest-direct python-version: "3.11" @@ -167,7 +147,6 @@ jobs: # linux test with newest available allowed packages (will update lockfile, should not be committed if run locally) - display_name: linux-upgrade-3.11-common/pipeline-mindeps-workspace - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux-upgrade python-version: "3.11" @@ -175,7 +154,6 @@ jobs: uv_sync_args: '--upgrade' # could also be 'direct' suite: common/pipeline-mindeps-workspace - display_name: linux-upgrade-3.11-pipeline-full - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux-upgrade python-version: "3.11" @@ -183,7 +161,6 @@ jobs: uv_sync_args: '--upgrade' # could also be 'direct' suite: pipeline-full - display_name: linux-upgrade-3.11-sqlalchemy-2 - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 platform: linux-upgrade python-version: "3.11" @@ -193,42 +170,36 @@ jobs: # windows tests - display_name: windows-3.11-common/pipeline-mindeps-workspace - vcpus: 8 os: blacksmith-8vcpu-windows-2025 platform: windows python-version: "3.11" shell: cmd suite: common/pipeline-mindeps-workspace - display_name: windows-3.11-pipeline-full - vcpus: 8 os: blacksmith-8vcpu-windows-2025 platform: windows python-version: "3.11" shell: cmd suite: pipeline-full - display_name: windows-3.11-sqlalchemy-2 - vcpus: 8 os: blacksmith-8vcpu-windows-2025 platform: windows python-version: "3.11" shell: cmd suite: sqlalchemy-2 - display_name: windows-3.13-common/pipeline-mindeps-workspace - vcpus: 8 os: blacksmith-8vcpu-windows-2025 platform: windows python-version: "3.13" shell: cmd suite: common/pipeline-mindeps-workspace - display_name: windows-3.13-pipeline-full - vcpus: 8 os: blacksmith-8vcpu-windows-2025 platform: windows python-version: "3.13" shell: cmd suite: pipeline-full - display_name: windows-3.13-sqlalchemy-2 - vcpus: 8 os: blacksmith-8vcpu-windows-2025 platform: windows python-version: "3.13" diff --git a/.github/workflows/test_destinations_local.yml b/.github/workflows/test_destinations_local.yml index 4123910bf0..b094f2f404 100644 --- a/.github/workflows/test_destinations_local.yml +++ b/.github/workflows/test_destinations_local.yml @@ -15,8 +15,7 @@ env: jobs: run_destinations_local: - # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). - name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} + name: ${{ matrix.display_name }} strategy: fail-fast: false diff --git a/.github/workflows/test_destinations_remote.yml b/.github/workflows/test_destinations_remote.yml index c87f42c05a..2a557b97df 100644 --- a/.github/workflows/test_destinations_remote.yml +++ b/.github/workflows/test_destinations_remote.yml @@ -24,8 +24,7 @@ env: jobs: run_destinations_remote: - # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). - name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} + name: ${{ matrix.display_name }} strategy: fail-fast: false matrix: diff --git a/.github/workflows/test_hub.yml b/.github/workflows/test_hub.yml index 27c991c516..e82217623e 100644 --- a/.github/workflows/test_hub.yml +++ b/.github/workflows/test_hub.yml @@ -13,8 +13,7 @@ env: jobs: run_hub_features: - # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). - name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} + name: ${{ matrix.display_name }} strategy: fail-fast: false matrix: diff --git a/.github/workflows/test_sources_local.yml b/.github/workflows/test_sources_local.yml index e7c23e72c1..40361a26f6 100644 --- a/.github/workflows/test_sources_local.yml +++ b/.github/workflows/test_sources_local.yml @@ -16,39 +16,33 @@ env: jobs: run_loader: - # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). - name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} + name: ${{ matrix.display_name }} strategy: fail-fast: false matrix: include: - display_name: postgres-sources-load - vcpus: 8 dbms_extras: "--extra postgres --extra postgis" needs_postgres: true pytest_mark: "not mssql and not oracle" suite: sources-load - display_name: postgres-sql-database - vcpus: 8 dbms_extras: "--extra postgres --extra postgis" needs_postgres: true pytest_mark: "not mssql and not oracle" suite: sql-database - display_name: mssql-sources-load - vcpus: 8 dbms_extras: "--extra mssql" needs_mssql: true pytest_mark: "mssql" suite: sources-load - display_name: mssql-sql-database - vcpus: 8 dbms_extras: "--extra mssql" needs_mssql: true pytest_mark: "mssql" suite: sql-database # Oracle: sqlalchemy 2 / sql_database only (no sources-load leg on SA 1.4) - display_name: oracle-sql-database - vcpus: 8 dbms_extras: "--extra oracle" needs_oracle: true pytest_mark: "oracle" diff --git a/.github/workflows/test_tools_airflow.yml b/.github/workflows/test_tools_airflow.yml index 0ec329828a..eff705c8ed 100644 --- a/.github/workflows/test_tools_airflow.yml +++ b/.github/workflows/test_tools_airflow.yml @@ -6,22 +6,19 @@ on: jobs: run_airflow: - # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). - name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} + name: ${{ matrix.display_name }} runs-on: blacksmith-8vcpu-ubuntu-2404 strategy: fail-fast: false matrix: include: - display_name: airflow-2 - vcpus: 8 airflow-version: "2" python-version: "3.12" airflow-pin: "" run-unit-tests: true run-smoke-test: false - display_name: airflow-3 - vcpus: 8 airflow-version: "3" python-version: "3.12" airflow-pin: "apache-airflow>=3.1" diff --git a/.github/workflows/test_tools_dashboard.yml b/.github/workflows/test_tools_dashboard.yml index 6ffcc561e7..3bb6ebe0c4 100644 --- a/.github/workflows/test_tools_dashboard.yml +++ b/.github/workflows/test_tools_dashboard.yml @@ -15,8 +15,7 @@ env: jobs: run_common: - # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). - name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} + name: ${{ matrix.display_name }} strategy: fail-fast: false matrix: @@ -24,36 +23,30 @@ jobs: # macos tests - display_name: macos-3.12 - vcpus: 12 os: blacksmith-12vcpu-macos-latest python-version: "3.12" shell: bash # linux tests - display_name: linux-3.10 - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.10" shell: bash - display_name: linux-3.11 - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.11" shell: bash - display_name: linux-3.12 - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.12" shell: bash - display_name: linux-3.13 - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.13" shell: bash # linux test with minimal dependencies - display_name: linux-3.11-mindeps - vcpus: 8 os: blacksmith-8vcpu-ubuntu-2404 python-version: "3.11" shell: bash @@ -62,7 +55,6 @@ jobs: # mac test with newest available allowed packages (will update lockfile, should not be committed if run locally) # TODO: revert to linux - display_name: macos-3.11-upgrade - vcpus: 12 os: blacksmith-12vcpu-macos-latest python-version: "3.11" shell: bash @@ -71,7 +63,6 @@ jobs: # windows tests - display_name: windows-3.12 - vcpus: 8 os: blacksmith-8vcpu-windows-2025 python-version: "3.12" shell: cmd diff --git a/.github/workflows/test_tools_dbt_runner.yml b/.github/workflows/test_tools_dbt_runner.yml index 875fa3bd42..b1dc5d7a02 100644 --- a/.github/workflows/test_tools_dbt_runner.yml +++ b/.github/workflows/test_tools_dbt_runner.yml @@ -12,17 +12,14 @@ env: jobs: run_dbt: - # Job title: matrix.display_name + optional "-{N}vcpu" (runner image is matrix.os only). - name: ${{ matrix.display_name }}${{ matrix.vcpus && format('-{0}vcpu', matrix.vcpus) || '' }} + name: ${{ matrix.display_name }} strategy: fail-fast: false matrix: include: - display_name: no-venv - vcpus: 8 suite: no-venv - display_name: venv - vcpus: 8 suite: venv defaults: run: From 45bc35a97b6744dc327f4ac57289249391c44fc6 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 18:16:02 +0200 Subject: [PATCH 12/25] Better labels v3 --- .github/workflows/fork_tests_with_secrets.yml | 2 +- .github/workflows/main.yml | 4 ++-- .github/workflows/test_destinations_local.yml | 2 +- .github/workflows/test_destinations_remote.yml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/fork_tests_with_secrets.yml b/.github/workflows/fork_tests_with_secrets.yml index 6139730fb9..2d677ccb6a 100644 --- a/.github/workflows/fork_tests_with_secrets.yml +++ b/.github/workflows/fork_tests_with_secrets.yml @@ -25,7 +25,7 @@ jobs: - run: echo "Fork PR authorized — running secret-requiring tests against ${{ github.event.pull_request.head.sha }}" test_destinations_remote: - name: dest remote + name: remote needs: [authorize] uses: ./.github/workflows/test_destinations_remote.yml secrets: inherit diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 56503ab280..ee13fcd783 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -58,7 +58,7 @@ jobs: # Other tests that do not require remote connections # test_destinations_local: - name: dest local + name: local needs: test_common uses: ./.github/workflows/test_destinations_local.yml @@ -88,7 +88,7 @@ jobs: # test_destinations_remote: - name: dest remote + name: remote needs: [authorize_secrets, test_common] uses: ./.github/workflows/test_destinations_remote.yml secrets: inherit diff --git a/.github/workflows/test_destinations_local.yml b/.github/workflows/test_destinations_local.yml index b094f2f404..ff0904e469 100644 --- a/.github/workflows/test_destinations_local.yml +++ b/.github/workflows/test_destinations_local.yml @@ -1,7 +1,7 @@ # Tests destinations that can run without credentials. # i.e. local postgres, duckdb, filesystem (with local fs/memory bucket) -name: dest local +name: local on: workflow_call: diff --git a/.github/workflows/test_destinations_remote.yml b/.github/workflows/test_destinations_remote.yml index 2a557b97df..9ae2d11317 100644 --- a/.github/workflows/test_destinations_remote.yml +++ b/.github/workflows/test_destinations_remote.yml @@ -1,5 +1,5 @@ -name: dest remote +name: remote on: workflow_call: From df98a21868b041db3dd51b78ff19807b3f4d00ba Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 21:49:52 +0200 Subject: [PATCH 13/25] loadgroup for remote essential --- Makefile | 1 + tests/load/test_read_interfaces.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/Makefile b/Makefile index f77ecd4203..137a6b3f82 100644 --- a/Makefile +++ b/Makefile @@ -337,6 +337,7 @@ test-dest-load: ) test-dest-remote-essential: PYTEST_MARKERS = essential +test-dest-remote-essential: PYTEST_XDIST_DIST = loadgroup test-dest-remote-essential: $(call RUN_XDIST_SAFE_SPLIT, \ tests/load \ diff --git a/tests/load/test_read_interfaces.py b/tests/load/test_read_interfaces.py index 7defcfee14..bf9044dc89 100644 --- a/tests/load/test_read_interfaces.py +++ b/tests/load/test_read_interfaces.py @@ -38,6 +38,9 @@ auto_module_test_storage, ) +# Same worker under pytest-xdist --dist=loadgroup (see make test-dest-remote-essential). +pytestmark = pytest.mark.xdist_group("read_interfaces") + EXPECTED_COLUMNS = ["id", "decimal", "other_decimal", "created_at", "_dlt_load_id", "_dlt_id"] # items.created_at is generated as `ITEMS_EPOCH + timedelta(seconds=i)` for i in range(total_records) From fd47d891611bad0042b3b9b04408c1a5732f2c16 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 22:05:11 +0200 Subject: [PATCH 14/25] Add reruns for dashboard and remote destination --- .github/workflows/test_tools_dashboard.yml | 3 +-- Makefile | 9 +++++++-- pyproject.toml | 4 +++- uv.lock | 15 +++++++++++++++ 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_tools_dashboard.yml b/.github/workflows/test_tools_dashboard.yml index 3bb6ebe0c4..43e3e58339 100644 --- a/.github/workflows/test_tools_dashboard.yml +++ b/.github/workflows/test_tools_dashboard.yml @@ -114,8 +114,7 @@ jobs: # Run workspace dashboard e2e tests - name: Run dashboard e2e - run: | - pytest --browser chromium tests/e2e + run: make test-e2e-dashboard if: matrix.python-version != '3.14.0-beta.4' matrix_job_required_check: diff --git a/Makefile b/Makefile index 137a6b3f82..ed441bb477 100644 --- a/Makefile +++ b/Makefile @@ -90,6 +90,7 @@ PYTEST_ARGS ?= PYTEST_MARKERS ?= PYTEST_XDIST_N ?= PYTEST_XDIST_DIST ?= worksteal +PYTEST_RERUNS ?= PYTEST_TARGET_ARGS := # Internal marker model @@ -111,6 +112,7 @@ PYTEST_BASE = \ PYTHONHASHSEED=$(PYTHONHASHSEED) \ uv run pytest \ $(PYTEST_TARGET_ARGS) \ + $(if $(PYTEST_RERUNS),--reruns $(PYTEST_RERUNS)) \ $(PYTEST_ARGS) # Parallel execution with PYTEST_XDIST_N provided @@ -338,6 +340,7 @@ test-dest-load: test-dest-remote-essential: PYTEST_MARKERS = essential test-dest-remote-essential: PYTEST_XDIST_DIST = loadgroup +test-dest-remote-essential: PYTEST_RERUNS = 1 test-dest-remote-essential: $(call RUN_XDIST_SAFE_SPLIT, \ tests/load \ @@ -345,6 +348,7 @@ test-dest-remote-essential: ) test-dest-remote-nonessential: PYTEST_MARKERS = not essential +test-dest-remote-nonessential: PYTEST_RERUNS = 1 test-dest-remote-nonessential: $(call RUN_XDIST_SAFE_SPLIT, \ tests/load \ @@ -369,6 +373,7 @@ test-dbt-runner-venv: # CI: workspace dashboard & sources # ---------------------------------------------------------------------- +test-workspace-dashboard: PYTEST_RERUNS = 1 test-workspace-dashboard: $(call RUN_XDIST_SAFE_SPLIT, \ tests/workspace/helpers/dashboard \ @@ -426,10 +431,10 @@ check-cli-docs: ## Checks CLI reference docs are up to date (CI) uv run python docs/tools/check_cli_docs.py docs/website/docs/reference/command-line-interface.md --compare test-e2e-dashboard: ## Runs dashboard e2e tests with headless chromium - uv run pytest --browser chromium tests/e2e + uv run pytest --browser chromium --reruns 1 tests/e2e test-e2e-dashboard-headed: ## Runs dashboard e2e tests with visible browser - uv run pytest --headed --browser chromium tests/e2e + uv run pytest --headed --browser chromium --reruns 1 tests/e2e create-test-pipelines: ## Creates test pipelines for manual dashboard testing uv run python tests/workspace/helpers/dashboard/example_pipelines.py diff --git a/pyproject.toml b/pyproject.toml index dd7d6c5ee8..66b7859fbb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -245,6 +245,7 @@ dev = [ "pytest-order>=1.0.0", "pytest-cases>=3.8.6", "pytest-forked>=1.3.0", + "pytest-rerunfailures>=14.0,<15", "types-PyYAML>=6.0.7", "types-cachetools>=4.2.9", "types-protobuf>=3.19.8", @@ -530,7 +531,7 @@ ignore_missing_imports = true [tool.pytest.ini_options] pythonpath = ["dlt"] norecursedirs = [".direnv", ".eggs", "build", "dist"] -addopts = "-p no:xdist --showlocals --durations 200 -m 'not rfam'" +addopts = "-p no:xdist --showlocals --durations 10 -m 'not rfam'" xfail_strict = true timeout = 1800 faulthandler_timeout = 1500 @@ -540,6 +541,7 @@ python_files = ["test_*.py", "*_test.py"] python_functions = ["*_test", "test_*"] filterwarnings = ["ignore::DeprecationWarning"] markers = [ + "xdist_group(name): group tests on one xdist worker (requires --dist=loadgroup)", "essential: marks all essential tests", "forked: marks tests that must run in a separate process (forked), e.g. due to global state, crashes, or side effects", "no_load: marks tests that do not load anything", diff --git a/uv.lock b/uv.lock index 994f11e5cd..f559a63e88 100644 --- a/uv.lock +++ b/uv.lock @@ -2722,6 +2722,7 @@ dev = [ { name = "pytest-forked" }, { name = "pytest-mock" }, { name = "pytest-order" }, + { name = "pytest-rerunfailures" }, { name = "pytest-timeout" }, { name = "pytest-xdist" }, { name = "requests-mock" }, @@ -2960,6 +2961,7 @@ dev = [ { name = "pytest-forked", specifier = ">=1.3.0" }, { name = "pytest-mock", specifier = ">=3.14.0,<4" }, { name = "pytest-order", specifier = ">=1.0.0" }, + { name = "pytest-rerunfailures", specifier = ">=14.0,<15" }, { name = "pytest-timeout", specifier = ">=2.3.1,<3" }, { name = "pytest-xdist", specifier = ">=3.5,<4" }, { name = "requests-mock", specifier = ">=1.10.0,<2" }, @@ -9744,6 +9746,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/76/61/4d333d8354ea2bea2c2f01bad0a4aa3c1262de20e1241f78e73360e9b620/pytest_playwright-0.7.2-py3-none-any.whl", hash = "sha256:8084e015b2b3ecff483c2160f1c8219b38b66c0d4578b23c0f700d1b0240ea38", size = 16881, upload-time = "2025-11-24T03:43:24.423Z" }, ] +[[package]] +name = "pytest-rerunfailures" +version = "14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cc/a4/6de45fe850759e94aa9a55cda807c76245af1941047294df26c851dfb4a9/pytest-rerunfailures-14.0.tar.gz", hash = "sha256:4a400bcbcd3c7a4ad151ab8afac123d90eca3abe27f98725dc4d9702887d2e92", size = 21350, upload-time = "2024-03-13T08:21:39.444Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/e7/e75bd157331aecc190f5f8950d7ea3d2cf56c3c57fb44da70e60b221133f/pytest_rerunfailures-14.0-py3-none-any.whl", hash = "sha256:4197bdd2eaeffdbf50b5ea6e7236f47ff0e44d1def8dae08e409f536d84e7b32", size = 12709, upload-time = "2024-03-13T08:21:37.199Z" }, +] + [[package]] name = "pytest-timeout" version = "2.4.0" From 8544fa06608e6047b42d25cbda67a208c5a4784d Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 22:06:32 +0200 Subject: [PATCH 15/25] Rerun for local dest --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index ed441bb477..eab3ee9883 100644 --- a/Makefile +++ b/Makefile @@ -168,6 +168,7 @@ install-load-local: dev TEST_LOAD_PATHS = tests/load +test-load-local: PYTEST_RERUNS = 1 test-load-local: ## Tests load with local destinations (duckdb + filesystem) ACTIVE_DESTINATIONS='["duckdb", "filesystem"]' \ ALL_FILESYSTEM_DRIVERS='["memory", "file"]' \ @@ -181,6 +182,7 @@ test-load-local: ## Tests load with local destinations (duckdb + filesystem) test-load-local-p: ## Tests load with local destinations in parallel $(MAKE) test-load-local PYTEST_XDIST_N=auto +test-load-local-postgres: PYTEST_RERUNS = 1 test-load-local-postgres: ## Tests load with local postgres (requires start-test-containers) DESTINATION__POSTGRES__CREDENTIALS=postgresql://loader:loader@localhost:5432/dlt_data \ ACTIVE_DESTINATIONS='["postgres"]' \ @@ -331,6 +333,7 @@ test-with-sqlalchemy-2: # CI: destination- and feature-specific # ---------------------------------------------------------------------- +test-dest-load: PYTEST_RERUNS = 1 test-dest-load: $(call RUN_XDIST_SAFE_SPLIT, \ tests/load \ From 952593d7eefeb67daf43529696c616a4f574711e Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 22:16:08 +0200 Subject: [PATCH 16/25] Faster athena --- .github/workflows/test_destinations_remote.yml | 6 +++--- pyproject.toml | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test_destinations_remote.yml b/.github/workflows/test_destinations_remote.yml index 9ae2d11317..4c64470125 100644 --- a/.github/workflows/test_destinations_remote.yml +++ b/.github/workflows/test_destinations_remote.yml @@ -36,21 +36,21 @@ jobs: filesystem_drivers: "[\"memory\"]" excluded_destination_test_configuration_ids: "[\"athena-iceberg\", \"athena-s3-tables\"]" extras: "--extra athena" - xdist_workers: 12 + xdist_workers: 24 - display_name: athena iceberg destinations: "[\"athena\"]" filesystem_drivers: "[\"memory\"]" excluded_destination_test_configuration_ids: "[\"athena\", \"athena-s3-tables\"]" extras: "--extra athena" - xdist_workers: 12 + xdist_workers: 24 - display_name: athena s3 tables destinations: "[\"athena\"]" filesystem_drivers: "[\"memory\"]" excluded_destination_test_configuration_ids: "[\"athena\", \"athena-iceberg\"]" extras: "--extra athena" - xdist_workers: 12 + xdist_workers: 24 # BigQuery - display_name: bigquery diff --git a/pyproject.toml b/pyproject.toml index 66b7859fbb..3af7ac93b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -531,7 +531,8 @@ ignore_missing_imports = true [tool.pytest.ini_options] pythonpath = ["dlt"] norecursedirs = [".direnv", ".eggs", "build", "dist"] -addopts = "-p no:xdist --showlocals --durations 10 -m 'not rfam'" +#TODO: change less durations +addopts = "-p no:xdist --showlocals --durations 200 -m 'not rfam'" xfail_strict = true timeout = 1800 faulthandler_timeout = 1500 From 61ce7ac059e798ce0d49b96acda49fd5c6deaa91 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 23:18:20 +0200 Subject: [PATCH 17/25] Bypass possible --- .github/workflows/main.yml | 41 ++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ee13fcd783..3f87642daf 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -54,17 +54,29 @@ jobs: uses: ./.github/workflows/test_tools_dashboard.yml # - # Destination and Sources local tests, do not provide secrets - # Other tests that do not require remote connections + # Destination and Sources local tests, do not provide secrets. + # With `test-remote-early`, start without waiting for common (mutually exclusive pairs). # + test_destinations_local_early: + name: local + if: contains(github.event.pull_request.labels.*.name, 'test-remote-early') + uses: ./.github/workflows/test_destinations_local.yml + test_destinations_local: name: local needs: test_common + if: ${{ !contains(github.event.pull_request.labels.*.name, 'test-remote-early') }} uses: ./.github/workflows/test_destinations_local.yml + test_sources_local_early: + name: src local + if: contains(github.event.pull_request.labels.*.name, 'test-remote-early') + uses: ./.github/workflows/test_sources_local.yml + test_sources_local: name: src local needs: test_common + if: ${{ !contains(github.event.pull_request.labels.*.name, 'test-remote-early') }} uses: ./.github/workflows/test_sources_local.yml test_hub: @@ -83,24 +95,41 @@ jobs: uses: ./.github/workflows/test_tools_build_images.yml # - # Remote destination tests and docs examples, needs secrets, - # so we depend on authorize and forward secrets - # + # Remote destination tests need secrets. With `test-remote-early`, start after + # authorize only (mutually exclusive pairs, same check name for branch protection). + + test_destinations_remote_early: + name: remote + needs: [authorize_secrets] + if: contains(github.event.pull_request.labels.*.name, 'test-remote-early') + uses: ./.github/workflows/test_destinations_remote.yml + secrets: inherit + with: + run_full_test_suite: ${{ contains(github.event.pull_request.labels.*.name, 'ci full') || github.event_name == 'schedule' }} test_destinations_remote: name: remote needs: [authorize_secrets, test_common] + if: ${{ !contains(github.event.pull_request.labels.*.name, 'test-remote-early') }} uses: ./.github/workflows/test_destinations_remote.yml secrets: inherit with: - run_full_test_suite: ${{ contains(github.event.pull_request.labels.*.name, 'ci full') || github.event_name == 'schedule'}} + run_full_test_suite: ${{ contains(github.event.pull_request.labels.*.name, 'ci full') || github.event_name == 'schedule' }} # # Other tools and tests that require secrets # + test_tools_dbt_runner_early: + name: dbt + needs: [authorize_secrets] + if: contains(github.event.pull_request.labels.*.name, 'test-remote-early') + uses: ./.github/workflows/test_tools_dbt_runner.yml + secrets: inherit + test_tools_dbt_runner: name: dbt needs: [test_common, authorize_secrets] + if: ${{ !contains(github.event.pull_request.labels.*.name, 'test-remote-early') }} uses: ./.github/workflows/test_tools_dbt_runner.yml secrets: inherit From 632e7cce25e90fd18a758dd1d80831360a6a8aa0 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 22 May 2026 23:32:21 +0200 Subject: [PATCH 18/25] Fix chunk size flaky test --- tests/load/test_read_interfaces.py | 58 +++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/tests/load/test_read_interfaces.py b/tests/load/test_read_interfaces.py index bf9044dc89..37ece4f312 100644 --- a/tests/load/test_read_interfaces.py +++ b/tests/load/test_read_interfaces.py @@ -76,6 +76,27 @@ def _expected_chunk_count(p: Pipeline) -> List[int]: ] +def _skip_chunk_size_check( + pipeline: Pipeline, destination_config: DestinationTestConfiguration +) -> bool: + destination_type = pipeline.destination.destination_type + if destination_type in ( + "dlt.destinations.filesystem", + "dlt.destinations.snowflake", # unpredictable chunk size + "dlt.destinations.ducklake", # vector size seems to not be consistent, typically 700 + "dlt.destinations.lancedb", # default is 200 + "dlt.destinations.lance", + ): + return True + # duckdb over parquet files is batched by FILE_MAX_ITEMS, not requested chunk_size + if ( + destination_type == "dlt.destinations.duckdb" + and destination_config.file_format == "parquet" + ): + return True + return False + + def create_test_source(destination_type: str, table_format: TTableFormat) -> DltSource: total_records = _total_records(destination_type) @@ -277,11 +298,14 @@ def test_fetchscalar(populated_pipeline: Pipeline) -> None: @pytest.mark.no_load @pytest.mark.essential -def test_arrow_access(populated_pipeline: Pipeline) -> None: +def test_arrow_access( + populated_pipeline: Pipeline, destination_config: DestinationTestConfiguration +) -> None: table_relationship = populated_pipeline.dataset().items total_records = _total_records(populated_pipeline.destination.destination_type) chunk_size = _chunk_size(populated_pipeline.destination.destination_type) expected_chunk_counts = _expected_chunk_count(populated_pipeline) + skip_chunk_size_check = _skip_chunk_size_check(populated_pipeline, destination_config) # full table table = table_relationship.arrow() @@ -291,13 +315,12 @@ def test_arrow_access(populated_pipeline: Pipeline) -> None: # chunk table = table_relationship.arrow(chunk_size=chunk_size) assert set(table.column_names) == set(EXPECTED_COLUMNS) - # NOTE: chunksize is unpredictable on snowflake - if populated_pipeline.destination.destination_type != "dlt.destinations.snowflake": + if not skip_chunk_size_check: assert table.num_rows == chunk_size # check frame amount and items counts tables = list(table_relationship.iter_arrow(chunk_size=chunk_size)) - if populated_pipeline.destination.destination_type != "dlt.destinations.snowflake": + if not skip_chunk_size_check: assert [t.num_rows for t in tables] == expected_chunk_counts # check all items are present, this MUST also be true for snowflake @@ -307,19 +330,15 @@ def test_arrow_access(populated_pipeline: Pipeline) -> None: @pytest.mark.no_load @pytest.mark.essential -def test_dataframe_access(populated_pipeline: Pipeline) -> None: +def test_dataframe_access( + populated_pipeline: Pipeline, destination_config: DestinationTestConfiguration +) -> None: # access via key table_relationship = populated_pipeline.dataset()["items"] total_records = _total_records(populated_pipeline.destination.destination_type) chunk_size = _chunk_size(populated_pipeline.destination.destination_type) expected_chunk_counts = _expected_chunk_count(populated_pipeline) - skip_df_chunk_size_check = populated_pipeline.destination.destination_type in [ - "dlt.destinations.filesystem", - "dlt.destinations.snowflake", - "dlt.destinations.ducklake", # vector size seems to not be consistent, typically 700 - "dlt.destinations.lancedb", # default is 200 - "dlt.destinations.lance", - ] + skip_chunk_size_check = _skip_chunk_size_check(populated_pipeline, destination_config) # full frame df = table_relationship.df() @@ -329,14 +348,14 @@ def test_dataframe_access(populated_pipeline: Pipeline) -> None: # TODO: snowflake does not follow a chunk size, make and exception (accept range), same for arrow # chunk df = table_relationship.df(chunk_size=chunk_size) - if not skip_df_chunk_size_check: + if not skip_chunk_size_check: assert len(df.index) == chunk_size assert set(df.columns.values) == set(EXPECTED_COLUMNS) # iterate all dataframes frames = list(table_relationship.iter_df(chunk_size=chunk_size)) - if not skip_df_chunk_size_check: + if not skip_chunk_size_check: assert [len(df.index) for df in frames] == expected_chunk_counts # check all items are present @@ -346,12 +365,15 @@ def test_dataframe_access(populated_pipeline: Pipeline) -> None: @pytest.mark.no_load @pytest.mark.essential -def test_db_cursor_access(populated_pipeline: Pipeline) -> None: +def test_db_cursor_access( + populated_pipeline: Pipeline, destination_config: DestinationTestConfiguration +) -> None: # check fetch accessors table_relationship = populated_pipeline.dataset().items total_records = _total_records(populated_pipeline.destination.destination_type) chunk_size = _chunk_size(populated_pipeline.destination.destination_type) expected_chunk_counts = _expected_chunk_count(populated_pipeline) + skip_chunk_size_check = _skip_chunk_size_check(populated_pipeline, destination_config) # check accessing one item one = table_relationship.fetchone() @@ -364,11 +386,13 @@ def test_db_cursor_access(populated_pipeline: Pipeline) -> None: # check fetchmany many = table_relationship.fetchmany(chunk_size) - assert len(many) == chunk_size + if not skip_chunk_size_check: + assert len(many) == chunk_size # check iterfetchmany chunks = list(table_relationship.iter_fetch(chunk_size=chunk_size)) - assert [len(chunk) for chunk in chunks] == expected_chunk_counts + if not skip_chunk_size_check: + assert [len(chunk) for chunk in chunks] == expected_chunk_counts ids = reduce(lambda a, b: a + b, [[item[0] for item in chunk] for chunk in chunks]) assert set(ids) == set(range(total_records)) From 9977d4b31f7f87ae1ff2c9da763b28dd2e69a589 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Sat, 23 May 2026 00:03:45 +0200 Subject: [PATCH 19/25] Better defaults and remove nonessential tests for lance s3 --- .github/workflows/test_destinations_remote.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/test_destinations_remote.yml b/.github/workflows/test_destinations_remote.yml index 4c64470125..f9a5f367fe 100644 --- a/.github/workflows/test_destinations_remote.yml +++ b/.github/workflows/test_destinations_remote.yml @@ -36,21 +36,18 @@ jobs: filesystem_drivers: "[\"memory\"]" excluded_destination_test_configuration_ids: "[\"athena-iceberg\", \"athena-s3-tables\"]" extras: "--extra athena" - xdist_workers: 24 - display_name: athena iceberg destinations: "[\"athena\"]" filesystem_drivers: "[\"memory\"]" excluded_destination_test_configuration_ids: "[\"athena\", \"athena-s3-tables\"]" extras: "--extra athena" - xdist_workers: 24 - display_name: athena s3 tables destinations: "[\"athena\"]" filesystem_drivers: "[\"memory\"]" excluded_destination_test_configuration_ids: "[\"athena\", \"athena-iceberg\"]" extras: "--extra athena" - xdist_workers: 24 # BigQuery - display_name: bigquery @@ -123,7 +120,6 @@ jobs: filesystem_drivers: "[\"s3\"]" extras: "--extra lance --extra s3" post_install_commands: "uv run pip install openai" - always_run_all_tests: true # Motherduck - display_name: motherduck From 45af05f37b34c351972af693d3a19adb7cff7930 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Sat, 23 May 2026 13:26:22 +0200 Subject: [PATCH 20/25] remoev loadgroup --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index eab3ee9883..efe6d1e64f 100644 --- a/Makefile +++ b/Makefile @@ -342,7 +342,6 @@ test-dest-load: ) test-dest-remote-essential: PYTEST_MARKERS = essential -test-dest-remote-essential: PYTEST_XDIST_DIST = loadgroup test-dest-remote-essential: PYTEST_RERUNS = 1 test-dest-remote-essential: $(call RUN_XDIST_SAFE_SPLIT, \ From 3dc21c8bf3f5164519e008dfcba9ab8eb8a7184e Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Thu, 28 May 2026 19:19:58 +0200 Subject: [PATCH 21/25] Blacksmith workers for docs testing --- .github/workflows/test_docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_docs.yml b/.github/workflows/test_docs.yml index 41165a6c8f..d7ad225ba7 100644 --- a/.github/workflows/test_docs.yml +++ b/.github/workflows/test_docs.yml @@ -26,7 +26,7 @@ jobs: run_lint: name: snippets - runs-on: ubuntu-latest + runs-on: blacksmith-8vcpu-ubuntu-2404 # Service containers to run with `container-job` services: From c141772247771d17dbbf1c6ed273cc7fe5d83eed Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 11:35:29 +0200 Subject: [PATCH 22/25] Remove workspace from common tests locally and change label name to test-destinations-early --- .github/workflows/main.yml | 20 ++++++++++---------- Makefile | 1 - 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3f87642daf..5244f725b1 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -55,28 +55,28 @@ jobs: # # Destination and Sources local tests, do not provide secrets. - # With `test-remote-early`, start without waiting for common (mutually exclusive pairs). + # With `test-destinations-early`, start without waiting for common (mutually exclusive pairs). # test_destinations_local_early: name: local - if: contains(github.event.pull_request.labels.*.name, 'test-remote-early') + if: contains(github.event.pull_request.labels.*.name, 'test-destinations-early') uses: ./.github/workflows/test_destinations_local.yml test_destinations_local: name: local needs: test_common - if: ${{ !contains(github.event.pull_request.labels.*.name, 'test-remote-early') }} + if: ${{ !contains(github.event.pull_request.labels.*.name, 'test-destinations-early') }} uses: ./.github/workflows/test_destinations_local.yml test_sources_local_early: name: src local - if: contains(github.event.pull_request.labels.*.name, 'test-remote-early') + if: contains(github.event.pull_request.labels.*.name, 'test-destinations-early') uses: ./.github/workflows/test_sources_local.yml test_sources_local: name: src local needs: test_common - if: ${{ !contains(github.event.pull_request.labels.*.name, 'test-remote-early') }} + if: ${{ !contains(github.event.pull_request.labels.*.name, 'test-destinations-early') }} uses: ./.github/workflows/test_sources_local.yml test_hub: @@ -95,13 +95,13 @@ jobs: uses: ./.github/workflows/test_tools_build_images.yml # - # Remote destination tests need secrets. With `test-remote-early`, start after + # Remote destination tests need secrets. With `test-destinations-early`, start after # authorize only (mutually exclusive pairs, same check name for branch protection). test_destinations_remote_early: name: remote needs: [authorize_secrets] - if: contains(github.event.pull_request.labels.*.name, 'test-remote-early') + if: contains(github.event.pull_request.labels.*.name, 'test-destinations-early') uses: ./.github/workflows/test_destinations_remote.yml secrets: inherit with: @@ -110,7 +110,7 @@ jobs: test_destinations_remote: name: remote needs: [authorize_secrets, test_common] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'test-remote-early') }} + if: ${{ !contains(github.event.pull_request.labels.*.name, 'test-destinations-early') }} uses: ./.github/workflows/test_destinations_remote.yml secrets: inherit with: @@ -122,14 +122,14 @@ jobs: test_tools_dbt_runner_early: name: dbt needs: [authorize_secrets] - if: contains(github.event.pull_request.labels.*.name, 'test-remote-early') + if: contains(github.event.pull_request.labels.*.name, 'test-destinations-early') uses: ./.github/workflows/test_tools_dbt_runner.yml secrets: inherit test_tools_dbt_runner: name: dbt needs: [test_common, authorize_secrets] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'test-remote-early') }} + if: ${{ !contains(github.event.pull_request.labels.*.name, 'test-destinations-early') }} uses: ./.github/workflows/test_tools_dbt_runner.yml secrets: inherit diff --git a/Makefile b/Makefile index efe6d1e64f..6188c6ecde 100644 --- a/Makefile +++ b/Makefile @@ -148,7 +148,6 @@ TEST_COMMON_PATHS = \ tests/pipeline \ tests/reflection \ tests/sources \ - tests/workspace \ tests/load/test_dummy_client.py \ tests/libs \ tests/destinations From 81d4bf198c90b516b92ac54116266d67fcfbbaaf Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 11:39:29 +0200 Subject: [PATCH 23/25] Revert unncessary changes --- pyproject.toml | 4 +- tests/load/test_read_interfaces.py | 61 +++++++++--------------------- 2 files changed, 18 insertions(+), 47 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3af7ac93b0..4316d2ea8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -531,8 +531,7 @@ ignore_missing_imports = true [tool.pytest.ini_options] pythonpath = ["dlt"] norecursedirs = [".direnv", ".eggs", "build", "dist"] -#TODO: change less durations -addopts = "-p no:xdist --showlocals --durations 200 -m 'not rfam'" +addopts = "-p no:xdist --showlocals --durations 10 -m 'not rfam'" xfail_strict = true timeout = 1800 faulthandler_timeout = 1500 @@ -542,7 +541,6 @@ python_files = ["test_*.py", "*_test.py"] python_functions = ["*_test", "test_*"] filterwarnings = ["ignore::DeprecationWarning"] markers = [ - "xdist_group(name): group tests on one xdist worker (requires --dist=loadgroup)", "essential: marks all essential tests", "forked: marks tests that must run in a separate process (forked), e.g. due to global state, crashes, or side effects", "no_load: marks tests that do not load anything", diff --git a/tests/load/test_read_interfaces.py b/tests/load/test_read_interfaces.py index 37ece4f312..7defcfee14 100644 --- a/tests/load/test_read_interfaces.py +++ b/tests/load/test_read_interfaces.py @@ -38,9 +38,6 @@ auto_module_test_storage, ) -# Same worker under pytest-xdist --dist=loadgroup (see make test-dest-remote-essential). -pytestmark = pytest.mark.xdist_group("read_interfaces") - EXPECTED_COLUMNS = ["id", "decimal", "other_decimal", "created_at", "_dlt_load_id", "_dlt_id"] # items.created_at is generated as `ITEMS_EPOCH + timedelta(seconds=i)` for i in range(total_records) @@ -76,27 +73,6 @@ def _expected_chunk_count(p: Pipeline) -> List[int]: ] -def _skip_chunk_size_check( - pipeline: Pipeline, destination_config: DestinationTestConfiguration -) -> bool: - destination_type = pipeline.destination.destination_type - if destination_type in ( - "dlt.destinations.filesystem", - "dlt.destinations.snowflake", # unpredictable chunk size - "dlt.destinations.ducklake", # vector size seems to not be consistent, typically 700 - "dlt.destinations.lancedb", # default is 200 - "dlt.destinations.lance", - ): - return True - # duckdb over parquet files is batched by FILE_MAX_ITEMS, not requested chunk_size - if ( - destination_type == "dlt.destinations.duckdb" - and destination_config.file_format == "parquet" - ): - return True - return False - - def create_test_source(destination_type: str, table_format: TTableFormat) -> DltSource: total_records = _total_records(destination_type) @@ -298,14 +274,11 @@ def test_fetchscalar(populated_pipeline: Pipeline) -> None: @pytest.mark.no_load @pytest.mark.essential -def test_arrow_access( - populated_pipeline: Pipeline, destination_config: DestinationTestConfiguration -) -> None: +def test_arrow_access(populated_pipeline: Pipeline) -> None: table_relationship = populated_pipeline.dataset().items total_records = _total_records(populated_pipeline.destination.destination_type) chunk_size = _chunk_size(populated_pipeline.destination.destination_type) expected_chunk_counts = _expected_chunk_count(populated_pipeline) - skip_chunk_size_check = _skip_chunk_size_check(populated_pipeline, destination_config) # full table table = table_relationship.arrow() @@ -315,12 +288,13 @@ def test_arrow_access( # chunk table = table_relationship.arrow(chunk_size=chunk_size) assert set(table.column_names) == set(EXPECTED_COLUMNS) - if not skip_chunk_size_check: + # NOTE: chunksize is unpredictable on snowflake + if populated_pipeline.destination.destination_type != "dlt.destinations.snowflake": assert table.num_rows == chunk_size # check frame amount and items counts tables = list(table_relationship.iter_arrow(chunk_size=chunk_size)) - if not skip_chunk_size_check: + if populated_pipeline.destination.destination_type != "dlt.destinations.snowflake": assert [t.num_rows for t in tables] == expected_chunk_counts # check all items are present, this MUST also be true for snowflake @@ -330,15 +304,19 @@ def test_arrow_access( @pytest.mark.no_load @pytest.mark.essential -def test_dataframe_access( - populated_pipeline: Pipeline, destination_config: DestinationTestConfiguration -) -> None: +def test_dataframe_access(populated_pipeline: Pipeline) -> None: # access via key table_relationship = populated_pipeline.dataset()["items"] total_records = _total_records(populated_pipeline.destination.destination_type) chunk_size = _chunk_size(populated_pipeline.destination.destination_type) expected_chunk_counts = _expected_chunk_count(populated_pipeline) - skip_chunk_size_check = _skip_chunk_size_check(populated_pipeline, destination_config) + skip_df_chunk_size_check = populated_pipeline.destination.destination_type in [ + "dlt.destinations.filesystem", + "dlt.destinations.snowflake", + "dlt.destinations.ducklake", # vector size seems to not be consistent, typically 700 + "dlt.destinations.lancedb", # default is 200 + "dlt.destinations.lance", + ] # full frame df = table_relationship.df() @@ -348,14 +326,14 @@ def test_dataframe_access( # TODO: snowflake does not follow a chunk size, make and exception (accept range), same for arrow # chunk df = table_relationship.df(chunk_size=chunk_size) - if not skip_chunk_size_check: + if not skip_df_chunk_size_check: assert len(df.index) == chunk_size assert set(df.columns.values) == set(EXPECTED_COLUMNS) # iterate all dataframes frames = list(table_relationship.iter_df(chunk_size=chunk_size)) - if not skip_chunk_size_check: + if not skip_df_chunk_size_check: assert [len(df.index) for df in frames] == expected_chunk_counts # check all items are present @@ -365,15 +343,12 @@ def test_dataframe_access( @pytest.mark.no_load @pytest.mark.essential -def test_db_cursor_access( - populated_pipeline: Pipeline, destination_config: DestinationTestConfiguration -) -> None: +def test_db_cursor_access(populated_pipeline: Pipeline) -> None: # check fetch accessors table_relationship = populated_pipeline.dataset().items total_records = _total_records(populated_pipeline.destination.destination_type) chunk_size = _chunk_size(populated_pipeline.destination.destination_type) expected_chunk_counts = _expected_chunk_count(populated_pipeline) - skip_chunk_size_check = _skip_chunk_size_check(populated_pipeline, destination_config) # check accessing one item one = table_relationship.fetchone() @@ -386,13 +361,11 @@ def test_db_cursor_access( # check fetchmany many = table_relationship.fetchmany(chunk_size) - if not skip_chunk_size_check: - assert len(many) == chunk_size + assert len(many) == chunk_size # check iterfetchmany chunks = list(table_relationship.iter_fetch(chunk_size=chunk_size)) - if not skip_chunk_size_check: - assert [len(chunk) for chunk in chunks] == expected_chunk_counts + assert [len(chunk) for chunk in chunks] == expected_chunk_counts ids = reduce(lambda a, b: a + b, [[item[0] for item in chunk] for chunk in chunks]) assert set(ids) == set(range(total_records)) From 518000bf9a01a9d49d2932526bb8b513b0461ad2 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 11:42:48 +0200 Subject: [PATCH 24/25] Test From 101dfdd3eb73812c98069766a2718d1c5b2e4486 Mon Sep 17 00:00:00 2001 From: Alberto Hernandez <22517436+tetelio@users.noreply.github.com> Date: Fri, 29 May 2026 12:40:05 +0200 Subject: [PATCH 25/25] Fix flaky test --- tests/load/test_read_interfaces.py | 63 +++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 18 deletions(-) diff --git a/tests/load/test_read_interfaces.py b/tests/load/test_read_interfaces.py index 7defcfee14..9e1de70643 100644 --- a/tests/load/test_read_interfaces.py +++ b/tests/load/test_read_interfaces.py @@ -38,6 +38,9 @@ auto_module_test_storage, ) +# Same worker under pytest-xdist --dist=loadgroup (see make test-dest-remote-essential). +pytestmark = pytest.mark.xdist_group("read_interfaces") + EXPECTED_COLUMNS = ["id", "decimal", "other_decimal", "created_at", "_dlt_load_id", "_dlt_id"] # items.created_at is generated as `ITEMS_EPOCH + timedelta(seconds=i)` for i in range(total_records) @@ -73,6 +76,27 @@ def _expected_chunk_count(p: Pipeline) -> List[int]: ] +def _skip_chunk_size_check( + pipeline: Pipeline, destination_config: DestinationTestConfiguration +) -> bool: + destination_type = pipeline.destination.destination_type + if destination_type in ( + "dlt.destinations.filesystem", + "dlt.destinations.snowflake", # unpredictable chunk size + "dlt.destinations.ducklake", # vector size seems to not be consistent, typically 700 + "dlt.destinations.lancedb", # default is 200 + "dlt.destinations.lance", + ): + return True + # duckdb over parquet files is batched by FILE_MAX_ITEMS, not requested chunk_size + if ( + destination_type == "dlt.destinations.duckdb" + and destination_config.file_format == "parquet" + ): + return True + return False + + def create_test_source(destination_type: str, table_format: TTableFormat) -> DltSource: total_records = _total_records(destination_type) @@ -274,11 +298,14 @@ def test_fetchscalar(populated_pipeline: Pipeline) -> None: @pytest.mark.no_load @pytest.mark.essential -def test_arrow_access(populated_pipeline: Pipeline) -> None: +def test_arrow_access( + populated_pipeline: Pipeline, destination_config: DestinationTestConfiguration +) -> None: table_relationship = populated_pipeline.dataset().items total_records = _total_records(populated_pipeline.destination.destination_type) chunk_size = _chunk_size(populated_pipeline.destination.destination_type) expected_chunk_counts = _expected_chunk_count(populated_pipeline) + skip_chunk_size_check = _skip_chunk_size_check(populated_pipeline, destination_config) # full table table = table_relationship.arrow() @@ -288,13 +315,12 @@ def test_arrow_access(populated_pipeline: Pipeline) -> None: # chunk table = table_relationship.arrow(chunk_size=chunk_size) assert set(table.column_names) == set(EXPECTED_COLUMNS) - # NOTE: chunksize is unpredictable on snowflake - if populated_pipeline.destination.destination_type != "dlt.destinations.snowflake": + if not skip_chunk_size_check: assert table.num_rows == chunk_size # check frame amount and items counts tables = list(table_relationship.iter_arrow(chunk_size=chunk_size)) - if populated_pipeline.destination.destination_type != "dlt.destinations.snowflake": + if not skip_chunk_size_check: assert [t.num_rows for t in tables] == expected_chunk_counts # check all items are present, this MUST also be true for snowflake @@ -304,19 +330,15 @@ def test_arrow_access(populated_pipeline: Pipeline) -> None: @pytest.mark.no_load @pytest.mark.essential -def test_dataframe_access(populated_pipeline: Pipeline) -> None: +def test_dataframe_access( + populated_pipeline: Pipeline, destination_config: DestinationTestConfiguration +) -> None: # access via key table_relationship = populated_pipeline.dataset()["items"] total_records = _total_records(populated_pipeline.destination.destination_type) chunk_size = _chunk_size(populated_pipeline.destination.destination_type) expected_chunk_counts = _expected_chunk_count(populated_pipeline) - skip_df_chunk_size_check = populated_pipeline.destination.destination_type in [ - "dlt.destinations.filesystem", - "dlt.destinations.snowflake", - "dlt.destinations.ducklake", # vector size seems to not be consistent, typically 700 - "dlt.destinations.lancedb", # default is 200 - "dlt.destinations.lance", - ] + skip_chunk_size_check = _skip_chunk_size_check(populated_pipeline, destination_config) # full frame df = table_relationship.df() @@ -326,14 +348,14 @@ def test_dataframe_access(populated_pipeline: Pipeline) -> None: # TODO: snowflake does not follow a chunk size, make and exception (accept range), same for arrow # chunk df = table_relationship.df(chunk_size=chunk_size) - if not skip_df_chunk_size_check: + if not skip_chunk_size_check: assert len(df.index) == chunk_size assert set(df.columns.values) == set(EXPECTED_COLUMNS) # iterate all dataframes frames = list(table_relationship.iter_df(chunk_size=chunk_size)) - if not skip_df_chunk_size_check: + if not skip_chunk_size_check: assert [len(df.index) for df in frames] == expected_chunk_counts # check all items are present @@ -343,12 +365,15 @@ def test_dataframe_access(populated_pipeline: Pipeline) -> None: @pytest.mark.no_load @pytest.mark.essential -def test_db_cursor_access(populated_pipeline: Pipeline) -> None: +def test_db_cursor_access( + populated_pipeline: Pipeline, destination_config: DestinationTestConfiguration +) -> None: # check fetch accessors table_relationship = populated_pipeline.dataset().items total_records = _total_records(populated_pipeline.destination.destination_type) chunk_size = _chunk_size(populated_pipeline.destination.destination_type) expected_chunk_counts = _expected_chunk_count(populated_pipeline) + skip_chunk_size_check = _skip_chunk_size_check(populated_pipeline, destination_config) # check accessing one item one = table_relationship.fetchone() @@ -361,11 +386,13 @@ def test_db_cursor_access(populated_pipeline: Pipeline) -> None: # check fetchmany many = table_relationship.fetchmany(chunk_size) - assert len(many) == chunk_size + if not skip_chunk_size_check: + assert len(many) == chunk_size # check iterfetchmany chunks = list(table_relationship.iter_fetch(chunk_size=chunk_size)) - assert [len(chunk) for chunk in chunks] == expected_chunk_counts + if not skip_chunk_size_check: + assert [len(chunk) for chunk in chunks] == expected_chunk_counts ids = reduce(lambda a, b: a + b, [[item[0] for item in chunk] for chunk in chunks]) assert set(ids) == set(range(total_records)) @@ -1791,4 +1818,4 @@ def test_multi_schema_ibis(overlap_pipeline: Pipeline) -> None: assert "alpha_detail" in cols assert "beta_metric" in cols assert "gamma_flag" in cols - assert events.count().execute() == 6 + assert events.count().execute() == 6 \ No newline at end of file