From 36420d071e2edc3581ae833cfe688860cd3b2031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Wed, 15 Apr 2026 09:22:33 +0000 Subject: [PATCH 1/5] fix: strip trailing slash from FilesystemClient.dataset_path OneLake (Microsoft Fabric) responds with 403 ClientAuthenticationError when BlobClient.exists targets a blob name ending in /. That kills FilesystemClient.initialize_storage at the very first fs.isdir call on self.dataset_path. Non-OneLake backends silently treat it as False and hit the same latent defect, just non-fatally. Strip the empty segment from the pathlib.join so dataset_path never ends in /. Refs #3866 --- dlt/destinations/impl/filesystem/filesystem.py | 2 +- tests/load/filesystem/test_filesystem_client.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/dlt/destinations/impl/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py index 2e2ea61837..00033f4f1d 100644 --- a/dlt/destinations/impl/filesystem/filesystem.py +++ b/dlt/destinations/impl/filesystem/filesystem.py @@ -588,7 +588,7 @@ def dataset_path(self) -> str: """A path within a bucket to tables in a dataset NOTE: dataset_name changes if with_staging_dataset is active """ - return self.pathlib.join(self.bucket_path, self.dataset_name, "") # type: ignore[no-any-return] + return self.pathlib.join(self.bucket_path, self.dataset_name) # type: ignore[no-any-return] @contextmanager def with_staging_dataset(self) -> Iterator["FilesystemClient"]: diff --git a/tests/load/filesystem/test_filesystem_client.py b/tests/load/filesystem/test_filesystem_client.py index 9ff9a008be..d52c0fafad 100644 --- a/tests/load/filesystem/test_filesystem_client.py +++ b/tests/load/filesystem/test_filesystem_client.py @@ -555,3 +555,19 @@ def assert_hf_endpoint_set(*args, **kwargs): with patch("huggingface_hub.metadata_update", side_effect=assert_hf_endpoint_set): client.update_dataset_card_metadata(load_id="test") + + +def test_dataset_path_has_no_trailing_separator() -> None: + """`dataset_path` must not end with `/`. + + OneLake (Microsoft Fabric) responds with `403 ClientAuthenticationError` + when `BlobClient.exists` targets a blob name ending in `/`, instead of + the `404 ResourceNotFoundError` that other Azure backends return. That + makes `FilesystemClient.initialize_storage` blow up on its first + `fs.isdir(self.dataset_path)` call before any data is written. Non-OneLake + backends observe the same latent defect as a silent `False`. + """ + client = _client_factory(filesystem(bucket_url="file:///tmp/dlt-test-bucket")) + assert not client.dataset_path.endswith("/"), ( + f"dataset_path must not end with '/', got {client.dataset_path!r}" + ) From eebb842de838f297bac1c8c2905a1892c7b7cfb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Wed, 15 Apr 2026 11:33:20 +0000 Subject: [PATCH 2/5] style: apply black formatting to new regression test Black wants the multi-line assert in test_dataset_path_has_no_trailing_separator reformatted into a single-line assert. Apply the formatter's output so `make format-check` passes in CI. Refs #3866 --- tests/load/filesystem/test_filesystem_client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/load/filesystem/test_filesystem_client.py b/tests/load/filesystem/test_filesystem_client.py index d52c0fafad..e960af8feb 100644 --- a/tests/load/filesystem/test_filesystem_client.py +++ b/tests/load/filesystem/test_filesystem_client.py @@ -568,6 +568,6 @@ def test_dataset_path_has_no_trailing_separator() -> None: backends observe the same latent defect as a silent `False`. """ client = _client_factory(filesystem(bucket_url="file:///tmp/dlt-test-bucket")) - assert not client.dataset_path.endswith("/"), ( - f"dataset_path must not end with '/', got {client.dataset_path!r}" - ) + assert not client.dataset_path.endswith( + "/" + ), f"dataset_path must not end with '/', got {client.dataset_path!r}" From f3fd2de9597916f858d3a80bfc851db902b01616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Wed, 15 Apr 2026 11:36:22 +0000 Subject: [PATCH 3/5] fix: strip trailing slash from FilesystemClient.get_table_dir Same OneLake 403 root cause as the previous commit on dataset_path, one level deeper. FilesystemClient.truncate_tables calls fs.exists(table_dir) for each entry from get_table_dirs(...), which on OneLake 403s on every table once dataset_path is already fixed. Drop the trailing pathlib.sep so get_table_dir returns a path shape that BlobClient.exists accepts. Refs #3866 --- dlt/destinations/impl/filesystem/filesystem.py | 2 +- tests/load/filesystem/test_filesystem_client.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/dlt/destinations/impl/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py index 00033f4f1d..101773a1f2 100644 --- a/dlt/destinations/impl/filesystem/filesystem.py +++ b/dlt/destinations/impl/filesystem/filesystem.py @@ -879,7 +879,7 @@ def get_table_dir( """ # dlt tables do not respect layout (for now) table_prefix = self.get_table_prefix(table_name, schema_name=schema_name) - table_dir: str = self.pathlib.dirname(table_prefix) + self.pathlib.sep + table_dir: str = self.pathlib.dirname(table_prefix) if remote: table_dir = self.make_remote_url(table_dir) return table_dir diff --git a/tests/load/filesystem/test_filesystem_client.py b/tests/load/filesystem/test_filesystem_client.py index e960af8feb..99a3b0e3ec 100644 --- a/tests/load/filesystem/test_filesystem_client.py +++ b/tests/load/filesystem/test_filesystem_client.py @@ -571,3 +571,19 @@ def test_dataset_path_has_no_trailing_separator() -> None: assert not client.dataset_path.endswith( "/" ), f"dataset_path must not end with '/', got {client.dataset_path!r}" + + +def test_get_table_dir_has_no_trailing_separator() -> None: + """`get_table_dir` must not end with `/` or `\\`. + + `FilesystemClient.truncate_tables` iterates `get_table_dirs(...)` and + calls `self.fs_client.exists(table_dir)` for each one. On OneLake that + produces a `403 ClientAuthenticationError` on every truncated table + once the `dataset_path` trailing-slash bug (see previous test) is + already fixed. Same root cause, one level deeper. + """ + client = _client_factory(filesystem(bucket_url="file:///tmp/dlt-test-bucket")) + table_dir = client.get_table_dir("some_table") + assert not table_dir.endswith( + ("/", "\\") + ), f"get_table_dir must not end with a separator, got {table_dir!r}" From 93a2794ef40e59eae01c720d4e08c7e145b4fe01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Wed, 15 Apr 2026 12:04:23 +0000 Subject: [PATCH 4/5] test: align trailing-slash expectations with corrected FilesystemClient paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tasks 2 and 3 of this PR (#3867) stripped the trailing separator from `FilesystemClient.dataset_path` and `FilesystemClient.get_table_dir`. The pre-existing `test_trailing_separators` hardcoded the old shape (trailing /) in its parameterized assertions. Flip those seven assertions to the corrected shape. Also drop the stale "ending with separator" phrase from `get_table_dir`'s docstring — same invariant flip, land together. `get_table_prefix` is untouched and still preserves its trailing separator for folder-style layouts; the two assertions on that method stay as-is. Refs #3866 --- dlt/destinations/impl/filesystem/filesystem.py | 5 +++-- tests/load/filesystem/test_filesystem_client.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/dlt/destinations/impl/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py index 101773a1f2..17af7e28c1 100644 --- a/dlt/destinations/impl/filesystem/filesystem.py +++ b/dlt/destinations/impl/filesystem/filesystem.py @@ -874,8 +874,9 @@ def prepare_load_table(self, table_name: str) -> PreparedTableSchema: def get_table_dir( self, table_name: str, remote: bool = False, schema_name: Optional[str] = None ) -> str: - """Returns a directory containing table files, ending with separator. - Note that many tables can share the same table dir + """Returns a directory containing table files. + + Note that many tables can share the same table dir. """ # dlt tables do not respect layout (for now) table_prefix = self.get_table_prefix(table_name, schema_name=schema_name) diff --git a/tests/load/filesystem/test_filesystem_client.py b/tests/load/filesystem/test_filesystem_client.py index 99a3b0e3ec..21803afe7b 100644 --- a/tests/load/filesystem/test_filesystem_client.py +++ b/tests/load/filesystem/test_filesystem_client.py @@ -134,18 +134,18 @@ def test_trailing_separators(layout: str, with_gdrive_buckets_env: str) -> None: os.environ["DESTINATION__FILESYSTEM__LAYOUT"] = layout load = setup_loader("_data") client: FilesystemClient = load.get_destination_client(Schema("empty")) # type: ignore[assignment] - # assert separators - assert client.dataset_path.endswith("_data/") - assert client.get_table_dir("_dlt_versions").endswith("_dlt_versions/") - assert client.get_table_dir("_dlt_versions", remote=True).endswith("_dlt_versions/") + # assert paths no longer carry a trailing separator after the strip-trailing-slash fix + assert client.dataset_path.endswith("_data") + assert client.get_table_dir("_dlt_versions").endswith("_dlt_versions") + assert client.get_table_dir("_dlt_versions", remote=True).endswith("_dlt_versions") is_folder = layout.startswith("{table_name}/") if is_folder: - assert client.get_table_dir("letters").endswith("_data/letters/") - assert client.get_table_dir("letters", remote=True).endswith("_data/letters/") + assert client.get_table_dir("letters").endswith("_data/letters") + assert client.get_table_dir("letters", remote=True).endswith("_data/letters") else: # strip prefix - assert client.get_table_dir("letters").endswith("_data/") - assert client.get_table_dir("letters", remote=True).endswith("_data/") + assert client.get_table_dir("letters").endswith("_data") + assert client.get_table_dir("letters", remote=True).endswith("_data") if is_folder: assert client.get_table_prefix("letters").endswith("_data/letters/") else: From b5aedc023c808b5c70d52de6fabdfb969322e5d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Wed, 15 Apr 2026 12:41:24 +0000 Subject: [PATCH 5/5] test: align test_destination_config_in_name with stripped dataset_path Task 2 of this PR (#3867) stripped the trailing separator from `FilesystemClient.dataset_path`. The pre-existing `test_destination_config_in_name` assertion at line 218 was `endswith(dataset_name + pathlib.sep)`, which encoded the old shape. Replace with `endswith(dataset_name)` and drop the now-unused `pathlib` local variable (and its `type: ignore` comment). Caught by `make test-common-p`, not surfaced by the filesystem test module run in Task 4 because this test lives under `tests/destinations/`. Refs #3866 --- tests/destinations/test_destination_name_and_config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/destinations/test_destination_name_and_config.py b/tests/destinations/test_destination_name_and_config.py index 66e19c8362..0b15d399fd 100644 --- a/tests/destinations/test_destination_name_and_config.py +++ b/tests/destinations/test_destination_name_and_config.py @@ -214,5 +214,4 @@ def test_destination_config_in_name(environment: DictStrStr) -> None: environment["DESTINATION__FILESYSTEM-PROD__BUCKET_URL"] = FilesystemConfiguration.make_file_url( get_test_storage_root() ) - pathlib = p._fs_client().pathlib # type: ignore[attr-defined] - assert p._fs_client().dataset_path.endswith(p.dataset_name + pathlib.sep) + assert p._fs_client().dataset_path.endswith(p.dataset_name)