From 36420d071e2edc3581ae833cfe688860cd3b2031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Wed, 15 Apr 2026 09:22:33 +0000 Subject: [PATCH 01/16] fix: strip trailing slash from FilesystemClient.dataset_path OneLake (Microsoft Fabric) responds with 403 ClientAuthenticationError when BlobClient.exists targets a blob name ending in /. That kills FilesystemClient.initialize_storage at the very first fs.isdir call on self.dataset_path. Non-OneLake backends silently treat it as False and hit the same latent defect, just non-fatally. Strip the empty segment from the pathlib.join so dataset_path never ends in /. Refs #3866 --- dlt/destinations/impl/filesystem/filesystem.py | 2 +- tests/load/filesystem/test_filesystem_client.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/dlt/destinations/impl/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py index 2e2ea61837..00033f4f1d 100644 --- a/dlt/destinations/impl/filesystem/filesystem.py +++ b/dlt/destinations/impl/filesystem/filesystem.py @@ -588,7 +588,7 @@ def dataset_path(self) -> str: """A path within a bucket to tables in a dataset NOTE: dataset_name changes if with_staging_dataset is active """ - return self.pathlib.join(self.bucket_path, self.dataset_name, "") # type: ignore[no-any-return] + return self.pathlib.join(self.bucket_path, self.dataset_name) # type: ignore[no-any-return] @contextmanager def with_staging_dataset(self) -> Iterator["FilesystemClient"]: diff --git a/tests/load/filesystem/test_filesystem_client.py b/tests/load/filesystem/test_filesystem_client.py index 9ff9a008be..d52c0fafad 100644 --- a/tests/load/filesystem/test_filesystem_client.py +++ b/tests/load/filesystem/test_filesystem_client.py @@ -555,3 +555,19 @@ def assert_hf_endpoint_set(*args, **kwargs): with patch("huggingface_hub.metadata_update", side_effect=assert_hf_endpoint_set): client.update_dataset_card_metadata(load_id="test") + + +def test_dataset_path_has_no_trailing_separator() -> None: + """`dataset_path` must not end with `/`. + + OneLake (Microsoft Fabric) responds with `403 ClientAuthenticationError` + when `BlobClient.exists` targets a blob name ending in `/`, instead of + the `404 ResourceNotFoundError` that other Azure backends return. That + makes `FilesystemClient.initialize_storage` blow up on its first + `fs.isdir(self.dataset_path)` call before any data is written. Non-OneLake + backends observe the same latent defect as a silent `False`. + """ + client = _client_factory(filesystem(bucket_url="file:///tmp/dlt-test-bucket")) + assert not client.dataset_path.endswith("/"), ( + f"dataset_path must not end with '/', got {client.dataset_path!r}" + ) From eebb842de838f297bac1c8c2905a1892c7b7cfb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Wed, 15 Apr 2026 11:33:20 +0000 Subject: [PATCH 02/16] style: apply black formatting to new regression test Black wants the multi-line assert in test_dataset_path_has_no_trailing_separator reformatted into a single-line assert. Apply the formatter's output so `make format-check` passes in CI. Refs #3866 --- tests/load/filesystem/test_filesystem_client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/load/filesystem/test_filesystem_client.py b/tests/load/filesystem/test_filesystem_client.py index d52c0fafad..e960af8feb 100644 --- a/tests/load/filesystem/test_filesystem_client.py +++ b/tests/load/filesystem/test_filesystem_client.py @@ -568,6 +568,6 @@ def test_dataset_path_has_no_trailing_separator() -> None: backends observe the same latent defect as a silent `False`. """ client = _client_factory(filesystem(bucket_url="file:///tmp/dlt-test-bucket")) - assert not client.dataset_path.endswith("/"), ( - f"dataset_path must not end with '/', got {client.dataset_path!r}" - ) + assert not client.dataset_path.endswith( + "/" + ), f"dataset_path must not end with '/', got {client.dataset_path!r}" From f3fd2de9597916f858d3a80bfc851db902b01616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Wed, 15 Apr 2026 11:36:22 +0000 Subject: [PATCH 03/16] fix: strip trailing slash from FilesystemClient.get_table_dir Same OneLake 403 root cause as the previous commit on dataset_path, one level deeper. FilesystemClient.truncate_tables calls fs.exists(table_dir) for each entry from get_table_dirs(...), which on OneLake 403s on every table once dataset_path is already fixed. Drop the trailing pathlib.sep so get_table_dir returns a path shape that BlobClient.exists accepts. Refs #3866 --- dlt/destinations/impl/filesystem/filesystem.py | 2 +- tests/load/filesystem/test_filesystem_client.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/dlt/destinations/impl/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py index 00033f4f1d..101773a1f2 100644 --- a/dlt/destinations/impl/filesystem/filesystem.py +++ b/dlt/destinations/impl/filesystem/filesystem.py @@ -879,7 +879,7 @@ def get_table_dir( """ # dlt tables do not respect layout (for now) table_prefix = self.get_table_prefix(table_name, schema_name=schema_name) - table_dir: str = self.pathlib.dirname(table_prefix) + self.pathlib.sep + table_dir: str = self.pathlib.dirname(table_prefix) if remote: table_dir = self.make_remote_url(table_dir) return table_dir diff --git a/tests/load/filesystem/test_filesystem_client.py b/tests/load/filesystem/test_filesystem_client.py index e960af8feb..99a3b0e3ec 100644 --- a/tests/load/filesystem/test_filesystem_client.py +++ b/tests/load/filesystem/test_filesystem_client.py @@ -571,3 +571,19 @@ def test_dataset_path_has_no_trailing_separator() -> None: assert not client.dataset_path.endswith( "/" ), f"dataset_path must not end with '/', got {client.dataset_path!r}" + + +def test_get_table_dir_has_no_trailing_separator() -> None: + """`get_table_dir` must not end with `/` or `\\`. + + `FilesystemClient.truncate_tables` iterates `get_table_dirs(...)` and + calls `self.fs_client.exists(table_dir)` for each one. On OneLake that + produces a `403 ClientAuthenticationError` on every truncated table + once the `dataset_path` trailing-slash bug (see previous test) is + already fixed. Same root cause, one level deeper. + """ + client = _client_factory(filesystem(bucket_url="file:///tmp/dlt-test-bucket")) + table_dir = client.get_table_dir("some_table") + assert not table_dir.endswith( + ("/", "\\") + ), f"get_table_dir must not end with a separator, got {table_dir!r}" From 93a2794ef40e59eae01c720d4e08c7e145b4fe01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Wed, 15 Apr 2026 12:04:23 +0000 Subject: [PATCH 04/16] test: align trailing-slash expectations with corrected FilesystemClient paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tasks 2 and 3 of this PR (#3867) stripped the trailing separator from `FilesystemClient.dataset_path` and `FilesystemClient.get_table_dir`. The pre-existing `test_trailing_separators` hardcoded the old shape (trailing /) in its parameterized assertions. Flip those seven assertions to the corrected shape. Also drop the stale "ending with separator" phrase from `get_table_dir`'s docstring — same invariant flip, land together. `get_table_prefix` is untouched and still preserves its trailing separator for folder-style layouts; the two assertions on that method stay as-is. Refs #3866 --- dlt/destinations/impl/filesystem/filesystem.py | 5 +++-- tests/load/filesystem/test_filesystem_client.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/dlt/destinations/impl/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py index 101773a1f2..17af7e28c1 100644 --- a/dlt/destinations/impl/filesystem/filesystem.py +++ b/dlt/destinations/impl/filesystem/filesystem.py @@ -874,8 +874,9 @@ def prepare_load_table(self, table_name: str) -> PreparedTableSchema: def get_table_dir( self, table_name: str, remote: bool = False, schema_name: Optional[str] = None ) -> str: - """Returns a directory containing table files, ending with separator. - Note that many tables can share the same table dir + """Returns a directory containing table files. + + Note that many tables can share the same table dir. """ # dlt tables do not respect layout (for now) table_prefix = self.get_table_prefix(table_name, schema_name=schema_name) diff --git a/tests/load/filesystem/test_filesystem_client.py b/tests/load/filesystem/test_filesystem_client.py index 99a3b0e3ec..21803afe7b 100644 --- a/tests/load/filesystem/test_filesystem_client.py +++ b/tests/load/filesystem/test_filesystem_client.py @@ -134,18 +134,18 @@ def test_trailing_separators(layout: str, with_gdrive_buckets_env: str) -> None: os.environ["DESTINATION__FILESYSTEM__LAYOUT"] = layout load = setup_loader("_data") client: FilesystemClient = load.get_destination_client(Schema("empty")) # type: ignore[assignment] - # assert separators - assert client.dataset_path.endswith("_data/") - assert client.get_table_dir("_dlt_versions").endswith("_dlt_versions/") - assert client.get_table_dir("_dlt_versions", remote=True).endswith("_dlt_versions/") + # assert paths no longer carry a trailing separator after the strip-trailing-slash fix + assert client.dataset_path.endswith("_data") + assert client.get_table_dir("_dlt_versions").endswith("_dlt_versions") + assert client.get_table_dir("_dlt_versions", remote=True).endswith("_dlt_versions") is_folder = layout.startswith("{table_name}/") if is_folder: - assert client.get_table_dir("letters").endswith("_data/letters/") - assert client.get_table_dir("letters", remote=True).endswith("_data/letters/") + assert client.get_table_dir("letters").endswith("_data/letters") + assert client.get_table_dir("letters", remote=True).endswith("_data/letters") else: # strip prefix - assert client.get_table_dir("letters").endswith("_data/") - assert client.get_table_dir("letters", remote=True).endswith("_data/") + assert client.get_table_dir("letters").endswith("_data") + assert client.get_table_dir("letters", remote=True).endswith("_data") if is_folder: assert client.get_table_prefix("letters").endswith("_data/letters/") else: From b5aedc023c808b5c70d52de6fabdfb969322e5d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Wed, 15 Apr 2026 12:41:24 +0000 Subject: [PATCH 05/16] test: align test_destination_config_in_name with stripped dataset_path Task 2 of this PR (#3867) stripped the trailing separator from `FilesystemClient.dataset_path`. The pre-existing `test_destination_config_in_name` assertion at line 218 was `endswith(dataset_name + pathlib.sep)`, which encoded the old shape. Replace with `endswith(dataset_name)` and drop the now-unused `pathlib` local variable (and its `type: ignore` comment). Caught by `make test-common-p`, not surfaced by the filesystem test module run in Task 4 because this test lives under `tests/destinations/`. Refs #3866 --- tests/destinations/test_destination_name_and_config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/destinations/test_destination_name_and_config.py b/tests/destinations/test_destination_name_and_config.py index 66e19c8362..0b15d399fd 100644 --- a/tests/destinations/test_destination_name_and_config.py +++ b/tests/destinations/test_destination_name_and_config.py @@ -214,5 +214,4 @@ def test_destination_config_in_name(environment: DictStrStr) -> None: environment["DESTINATION__FILESYSTEM-PROD__BUCKET_URL"] = FilesystemConfiguration.make_file_url( get_test_storage_root() ) - pathlib = p._fs_client().pathlib # type: ignore[attr-defined] - assert p._fs_client().dataset_path.endswith(p.dataset_name + pathlib.sep) + assert p._fs_client().dataset_path.endswith(p.dataset_name) From 26ee8ffefeae8733f93e679e05f3f2e39d6c37ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 16 Apr 2026 11:31:13 +0000 Subject: [PATCH 06/16] feat(fabric): add access_token field to FabricCredentials MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces an optional `access_token` field on `FabricCredentials` that holds a pre-fetched AAD bearer token, and a `get_access_token()` helper that returns it as a raw string or `None`. This is the first piece of notebook-user identity support — a subsequent commit will add an injectable `TokenCredential` path, and the DSN builder and `FabricSqlClient.open_connection` will start branching on `get_access_token()` later in the PR. Refs #3865 --- dlt/destinations/impl/fabric/configuration.py | 10 ++++++++++ .../load/fabric/test_fabric_configuration.py | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/dlt/destinations/impl/fabric/configuration.py b/dlt/destinations/impl/fabric/configuration.py index e7cfcc2ccf..c7c031a4b4 100644 --- a/dlt/destinations/impl/fabric/configuration.py +++ b/dlt/destinations/impl/fabric/configuration.py @@ -5,6 +5,7 @@ from dlt.common.configuration.specs import AzureServicePrincipalCredentials from dlt.common.destination.client import DestinationClientDwhWithStagingConfiguration from dlt.common.exceptions import MissingDependencyException +from dlt.common.typing import TSecretStrValue from dlt import version _AZURE_STORAGE_EXTRA = f"{version.DLT_PKG_NAME}[az]" @@ -38,6 +39,15 @@ class FabricCredentials(AzureServicePrincipalCredentials): azure_storage_account_name: Optional[str] = None """Not used for Fabric Warehouse credentials (only staging credentials need this)""" + access_token: Optional[TSecretStrValue] = None + """Pre-fetched AAD bearer token for Fabric Warehouse.""" + + def get_access_token(self) -> Optional[str]: + """Return an AAD bearer token for Fabric Warehouse, or `None`.""" + if self.access_token is not None: + return str(self.access_token) + return None + def on_partial(self) -> None: """Enable fallback to DefaultAzureCredential if explicit credentials not provided.""" try: diff --git a/tests/load/fabric/test_fabric_configuration.py b/tests/load/fabric/test_fabric_configuration.py index 329d72c5ac..09f0aeeae6 100644 --- a/tests/load/fabric/test_fabric_configuration.py +++ b/tests/load/fabric/test_fabric_configuration.py @@ -194,3 +194,22 @@ def test_fabric_credentials_authentication_method() -> None: # Verify ActiveDirectoryServicePrincipal is set dsn_dict = creds.get_odbc_dsn_dict() assert dsn_dict["AUTHENTICATION"] == "ActiveDirectoryServicePrincipal" + + +def test_get_access_token_returns_raw_string_when_set() -> None: + """`get_access_token()` returns the raw `access_token` when it is set.""" + creds = FabricCredentials() + creds.host = "test.datawarehouse.fabric.microsoft.com" + creds.database = "testdb" + creds.access_token = "abc123" + + assert creds.get_access_token() == "abc123" + + +def test_get_access_token_returns_none_when_no_token_configured() -> None: + """`get_access_token()` returns None when neither access_token nor azure_credential is set.""" + creds = FabricCredentials() + creds.host = "test.datawarehouse.fabric.microsoft.com" + creds.database = "testdb" + + assert creds.get_access_token() is None From 9d572f18f10d54f7b77bfbac0366bef78361e91d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 16 Apr 2026 11:34:14 +0000 Subject: [PATCH 07/16] feat(fabric): add azure_credential field and extend get_access_token Adds an optional `azure_credential: TokenCredential` field on `FabricCredentials` and teaches `get_access_token()` to call `get_token("https://database.windows.net/.default")` on it when the raw `access_token` is not set. This gives long-running notebook sessions a refreshing credential path while keeping the one-shot `access_token` string path for simple cases. The field uses `Optional[Any]` at runtime because dlt's `configspec` decorator does not support forward-referenced types; the docstring documents the expected `TokenCredential` protocol. Refs #3865 --- dlt/destinations/impl/fabric/configuration.py | 5 +++++ tests/load/fabric/test_fabric_configuration.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/dlt/destinations/impl/fabric/configuration.py b/dlt/destinations/impl/fabric/configuration.py index c7c031a4b4..11766e2780 100644 --- a/dlt/destinations/impl/fabric/configuration.py +++ b/dlt/destinations/impl/fabric/configuration.py @@ -42,10 +42,15 @@ class FabricCredentials(AzureServicePrincipalCredentials): access_token: Optional[TSecretStrValue] = None """Pre-fetched AAD bearer token for Fabric Warehouse.""" + azure_credential: Optional[Any] = None + """Injectable `azure.core.credentials.TokenCredential` for Fabric Warehouse.""" + def get_access_token(self) -> Optional[str]: """Return an AAD bearer token for Fabric Warehouse, or `None`.""" if self.access_token is not None: return str(self.access_token) + if self.azure_credential is not None: + return self.azure_credential.get_token("https://database.windows.net/.default").token return None def on_partial(self) -> None: diff --git a/tests/load/fabric/test_fabric_configuration.py b/tests/load/fabric/test_fabric_configuration.py index 09f0aeeae6..bdd49e2390 100644 --- a/tests/load/fabric/test_fabric_configuration.py +++ b/tests/load/fabric/test_fabric_configuration.py @@ -213,3 +213,21 @@ def test_get_access_token_returns_none_when_no_token_configured() -> None: creds.database = "testdb" assert creds.get_access_token() is None + + +def test_get_access_token_calls_injected_credential_when_set() -> None: + """`get_access_token()` delegates to an injected TokenCredential when + `access_token` is not set.""" + from unittest.mock import MagicMock + from azure.core.credentials import AccessToken + + fake_credential = MagicMock() + fake_credential.get_token.return_value = AccessToken("injected-token", 1234567890) + + creds = FabricCredentials() + creds.host = "test.datawarehouse.fabric.microsoft.com" + creds.database = "testdb" + creds.azure_credential = fake_credential + + assert creds.get_access_token() == "injected-token" + fake_credential.get_token.assert_called_once_with("https://database.windows.net/.default") From 0d0f864b742d7f5b2305985770b5df0527431441 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 16 Apr 2026 11:36:20 +0000 Subject: [PATCH 08/16] feat(fabric): omit ODBC auth fields in DSN when token-auth mode is active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `get_odbc_dsn_dict` now checks `get_access_token()` and skips `AUTHENTICATION`/`UID`/`PWD` when a bearer token is available. SP path is unchanged — the existing regression tests for `ActiveDirectoryServicePrincipal` and SP credential derivation remain green. Refs #3865 --- dlt/destinations/impl/fabric/configuration.py | 12 +++++++----- .../load/fabric/test_fabric_configuration.py | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/dlt/destinations/impl/fabric/configuration.py b/dlt/destinations/impl/fabric/configuration.py index 11766e2780..9db1eac821 100644 --- a/dlt/destinations/impl/fabric/configuration.py +++ b/dlt/destinations/impl/fabric/configuration.py @@ -68,18 +68,20 @@ def on_partial(self) -> None: self.resolve() def get_odbc_dsn_dict(self) -> Dict[str, Any]: - """Build ODBC DSN dictionary with Fabric-specific settings.""" - params = { + """Build the ODBC DSN dictionary with Fabric-specific settings.""" + params: Dict[str, Any] = { "DRIVER": "{ODBC Driver 18 for SQL Server}", "SERVER": f"{self.host},{self.port}", "DATABASE": self.database, - "AUTHENTICATION": "ActiveDirectoryServicePrincipal", - "LongAsMax": "yes", # Required for UTF-8 collation support + "LongAsMax": "yes", "Encrypt": "yes", "TrustServerCertificate": "no", } - # Add Service Principal credentials if provided + if self.get_access_token() is not None: + return params + + params["AUTHENTICATION"] = "ActiveDirectoryServicePrincipal" if self.azure_client_id and self.azure_tenant_id and self.azure_client_secret: params["UID"] = f"{self.azure_client_id}@{self.azure_tenant_id}" params["PWD"] = str(self.azure_client_secret) diff --git a/tests/load/fabric/test_fabric_configuration.py b/tests/load/fabric/test_fabric_configuration.py index bdd49e2390..8c40a07967 100644 --- a/tests/load/fabric/test_fabric_configuration.py +++ b/tests/load/fabric/test_fabric_configuration.py @@ -231,3 +231,22 @@ def test_get_access_token_calls_injected_credential_when_set() -> None: assert creds.get_access_token() == "injected-token" fake_credential.get_token.assert_called_once_with("https://database.windows.net/.default") + + +def test_get_odbc_dsn_dict_omits_auth_fields_in_token_mode() -> None: + """When `access_token` is set, the DSN dict must not include + `AUTHENTICATION`/`UID`/`PWD`.""" + creds = FabricCredentials() + creds.host = "test.datawarehouse.fabric.microsoft.com" + creds.database = "testdb" + creds.access_token = "abc123" + + dsn_dict = creds.get_odbc_dsn_dict() + + assert "AUTHENTICATION" not in dsn_dict + assert "UID" not in dsn_dict + assert "PWD" not in dsn_dict + assert dsn_dict["DRIVER"] == "{ODBC Driver 18 for SQL Server}" + assert dsn_dict["SERVER"] == "test.datawarehouse.fabric.microsoft.com,1433" + assert dsn_dict["DATABASE"] == "testdb" + assert dsn_dict["LongAsMax"] == "yes" From 75b0ee04e6d9f35bfd5a588c2c9a979cda635f0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 16 Apr 2026 11:38:26 +0000 Subject: [PATCH 09/16] feat(fabric): skip DefaultAzureCredential fallback in token-auth mode `FabricCredentials.on_partial` previously attempted to fall back to `DefaultAzureCredential` when explicit SP credentials were missing. That fallback is not supported inside Fabric notebooks. Skip it when either `access_token` or `azure_credential` is set. SP path unchanged. Refs #3865 --- dlt/destinations/impl/fabric/configuration.py | 11 ++++++++--- tests/load/fabric/test_fabric_configuration.py | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/dlt/destinations/impl/fabric/configuration.py b/dlt/destinations/impl/fabric/configuration.py index 9db1eac821..91a389f45d 100644 --- a/dlt/destinations/impl/fabric/configuration.py +++ b/dlt/destinations/impl/fabric/configuration.py @@ -54,16 +54,21 @@ def get_access_token(self) -> Optional[str]: return None def on_partial(self) -> None: - """Enable fallback to DefaultAzureCredential if explicit credentials not provided.""" + """Resolve partial credentials. + + When `access_token` or `azure_credential` is set, skip the + `DefaultAzureCredential` fallback -- the user has already provided auth. + """ + if self.access_token is not None or self.azure_credential is not None: + return + try: from azure.identity import DefaultAzureCredential except ModuleNotFoundError: raise MissingDependencyException(self.__class__.__name__, [_AZURE_STORAGE_EXTRA]) - # If no explicit Service Principal credentials, use default credentials if not self.azure_client_id or not self.azure_client_secret or not self.azure_tenant_id: self._set_default_credentials(DefaultAzureCredential()) - # Resolve if we have warehouse connection details (not storage account name) if self.host and self.database: self.resolve() diff --git a/tests/load/fabric/test_fabric_configuration.py b/tests/load/fabric/test_fabric_configuration.py index 8c40a07967..fcfefa7aa1 100644 --- a/tests/load/fabric/test_fabric_configuration.py +++ b/tests/load/fabric/test_fabric_configuration.py @@ -250,3 +250,21 @@ def test_get_odbc_dsn_dict_omits_auth_fields_in_token_mode() -> None: assert dsn_dict["SERVER"] == "test.datawarehouse.fabric.microsoft.com,1433" assert dsn_dict["DATABASE"] == "testdb" assert dsn_dict["LongAsMax"] == "yes" + + +def test_on_partial_skips_default_azure_credential_in_token_mode() -> None: + """When `access_token` is set, `on_partial` must not attempt to + import or instantiate `DefaultAzureCredential`.""" + from unittest.mock import patch + + creds = FabricCredentials() + creds.host = "test.datawarehouse.fabric.microsoft.com" + creds.database = "testdb" + creds.access_token = "abc123" + + with patch( + "dlt.destinations.impl.fabric.configuration.FabricCredentials._set_default_credentials", + ) as mock_set_default: + creds.on_partial() + + mock_set_default.assert_not_called() From 225fef383ee110b932d0493e50e4f561cf809bac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 16 Apr 2026 11:42:40 +0000 Subject: [PATCH 10/16] feat(fabric): override open_connection for notebook-token auth `FabricSqlClient.open_connection` now branches on `credentials.get_access_token()`. When a bearer token is available, it packs the token into the little-endian UTF-16 struct ODBC Driver 18 expects for `SQL_COPT_SS_ACCESS_TOKEN` (1256) and passes it via `pyodbc.connect(..., attrs_before={1256: ...})`. The datetimeoffset output converter and autocommit-on behavior are preserved. When no token is available, the call falls through to the parent path. Six mocked tests cover the struct layout, attrs_before kwarg, fall-through, autocommit, output converter, and _conn caching. Refs #3865 --- dlt/destinations/impl/fabric/sql_client.py | 33 ++++- tests/load/fabric/test_fabric_sql_client.py | 138 ++++++++++++++++++++ 2 files changed, 167 insertions(+), 4 deletions(-) create mode 100644 tests/load/fabric/test_fabric_sql_client.py diff --git a/dlt/destinations/impl/fabric/sql_client.py b/dlt/destinations/impl/fabric/sql_client.py index c7309eab95..174fe6d5db 100644 --- a/dlt/destinations/impl/fabric/sql_client.py +++ b/dlt/destinations/impl/fabric/sql_client.py @@ -1,18 +1,25 @@ """SQL client for Fabric Warehouse - extends Synapse SQL client""" -from typing import TYPE_CHECKING +import struct +from typing import TYPE_CHECKING, Any from dlt.common.destination import DestinationCapabilitiesContext from dlt.destinations.impl.synapse.sql_client import SynapseSqlClient +from dlt.destinations.impl.mssql.sql_client import handle_datetimeoffset if TYPE_CHECKING: from dlt.destinations.impl.fabric.configuration import FabricCredentials +SQL_COPT_SS_ACCESS_TOKEN = 1256 + + class FabricSqlClient(SynapseSqlClient): - """SQL client for Microsoft Fabric Warehouse + """SQL client for Microsoft Fabric Warehouse. - Inherits all behavior from Synapse since Fabric Warehouse is built on Synapse technology. + Overrides `open_connection` to support passing a pre-fetched AAD bearer + token via `attrs_before={SQL_COPT_SS_ACCESS_TOKEN: ...}` when the + credentials object is in notebook-token mode. """ def __init__( @@ -22,6 +29,24 @@ def __init__( credentials: "FabricCredentials", capabilities: DestinationCapabilitiesContext, ) -> None: - # FabricCredentials has all required attributes: database, to_odbc_dsn(), connect_timeout super().__init__(dataset_name, staging_dataset_name, credentials, capabilities) # type: ignore[arg-type] self.credentials: "FabricCredentials" = credentials # type: ignore[assignment] + + def open_connection(self) -> Any: + """Open a pyodbc connection, passing an AAD bearer token when available.""" + import pyodbc + + token_str = self.credentials.get_access_token() + if token_str is None: + return super().open_connection() + + raw = token_str.encode("utf-16-le") + token_struct = struct.pack(f" None: + """Install a fake pyodbc module in sys.modules.""" + fake_pyodbc = MagicMock(name="pyodbc_module") + monkeypatch.setitem(sys.modules, "pyodbc", fake_pyodbc) + yield + + +def _fake_sql_client() -> SimpleNamespace: + """Stand-in FabricSqlClient with attributes open_connection touches.""" + creds = SimpleNamespace( + host="test.datawarehouse.fabric.microsoft.com", + port=1433, + database="testdb", + connect_timeout=15, + access_token=None, + azure_credential=None, + azure_client_id=None, + azure_tenant_id=None, + azure_client_secret=None, + ) + creds.get_access_token = lambda: ( + str(creds.access_token) if creds.access_token is not None else None + ) + creds.to_odbc_dsn = MagicMock( + return_value=( + "DRIVER={ODBC Driver 18 for SQL Server};" + "SERVER=test.datawarehouse.fabric.microsoft.com,1433;" + "DATABASE=testdb;" + "LongAsMax=yes;Encrypt=yes;TrustServerCertificate=no;" + ) + ) + return SimpleNamespace(credentials=creds, _conn=None) + + +def test_open_connection_passes_token_via_attrs_before_1256() -> None: + from dlt.destinations.impl.fabric.sql_client import FabricSqlClient + + client = _fake_sql_client() + client.credentials.access_token = "FAKE_TOKEN" + + FabricSqlClient.open_connection(client) + + pyodbc = sys.modules["pyodbc"] + assert pyodbc.connect.called + attrs_before = pyodbc.connect.call_args.kwargs["attrs_before"] + assert 1256 in attrs_before + raw = "FAKE_TOKEN".encode("utf-16-le") + expected = struct.pack(f" None: + from dlt.destinations.impl.fabric.sql_client import FabricSqlClient + + client = _fake_sql_client() + client.credentials.access_token = "FAKE_TOKEN" + client.credentials.connect_timeout = 42 + + FabricSqlClient.open_connection(client) + + pyodbc = sys.modules["pyodbc"] + assert pyodbc.connect.call_args.kwargs["timeout"] == 42 + + +def test_open_connection_uses_sp_path_when_no_access_token() -> None: + from unittest.mock import patch + + from dlt.destinations.impl.fabric.sql_client import FabricSqlClient + from dlt.destinations.impl.mssql.sql_client import PyOdbcMsSqlClient + + # super() requires a real FabricSqlClient instance, so build one + # with __init__ bypassed + client = FabricSqlClient.__new__(FabricSqlClient) + base = _fake_sql_client() + client.credentials = base.credentials + client._conn = None + client.credentials.access_token = None + + sentinel = object() + with patch.object(PyOdbcMsSqlClient, "open_connection", return_value=sentinel) as mock_super: + result = FabricSqlClient.open_connection(client) + + mock_super.assert_called_once() + assert result is sentinel + + +def test_open_connection_sets_autocommit_true_in_token_mode() -> None: + from dlt.destinations.impl.fabric.sql_client import FabricSqlClient + + client = _fake_sql_client() + client.credentials.access_token = "FAKE_TOKEN" + + FabricSqlClient.open_connection(client) + + pyodbc = sys.modules["pyodbc"] + returned_conn = pyodbc.connect.return_value + assert returned_conn.autocommit is True + + +def test_open_connection_installs_datetimeoffset_converter_in_token_mode() -> None: + from dlt.destinations.impl.fabric.sql_client import FabricSqlClient + + client = _fake_sql_client() + client.credentials.access_token = "FAKE_TOKEN" + + FabricSqlClient.open_connection(client) + + pyodbc = sys.modules["pyodbc"] + returned_conn = pyodbc.connect.return_value + returned_conn.add_output_converter.assert_called() + converter_args = returned_conn.add_output_converter.call_args.args + assert converter_args[0] == -155 + + +def test_open_connection_caches_conn_on_self() -> None: + from dlt.destinations.impl.fabric.sql_client import FabricSqlClient + + client = _fake_sql_client() + client.credentials.access_token = "FAKE_TOKEN" + + returned = FabricSqlClient.open_connection(client) + + assert client._conn is returned From 57f1061f0668a4ce2ccfa53134df8e7c1e97cc98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 16 Apr 2026 11:45:16 +0000 Subject: [PATCH 11/16] feat(azure): add OneLakeNotebookIdentityCredentials for Fabric staging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New credential class that returns adlfs kwargs with `account_name` and `account_host` only — no `credential` key. Lets Fabric's registered `OnelakeFileSystem.__init__` fall through to its built-in `make_credential()` helper for notebook-user identity. Only usable inside a Fabric notebook kernel. Pairs with `FabricCredentials.access_token` on the warehouse side. Refs #3865 --- .../configuration/specs/azure_credentials.py | 27 +++++++++++++++ .../load/filesystem/test_azure_credentials.py | 34 +++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/dlt/common/configuration/specs/azure_credentials.py b/dlt/common/configuration/specs/azure_credentials.py index d31f902433..2bc280070b 100644 --- a/dlt/common/configuration/specs/azure_credentials.py +++ b/dlt/common/configuration/specs/azure_credentials.py @@ -187,6 +187,33 @@ def to_adlfs_credentials(self) -> Dict[str, Any]: return base_kwargs +@configspec +class OneLakeNotebookIdentityCredentials(CredentialsConfiguration): + """Azure credentials for OneLake filesystem staging under Fabric notebook identity. + + Returns adlfs kwargs with `account_name` and `account_host` only -- no + `credential` key. Fabric Python notebooks register a custom + `OnelakeFileSystem` as the `abfss://` handler, and its `__init__` falls + through to a built-in `make_credential()` helper when no credential is + supplied. + + Warning: only usable inside a Fabric notebook kernel. + """ + + azure_storage_account_name: str = "onelake" + """OneLake logical storage account name.""" + + azure_account_host: str = "onelake.blob.fabric.microsoft.com" + """OneLake blob DFS endpoint.""" + + def to_adlfs_credentials(self) -> Dict[str, Any]: + """Return adlfs kwargs with `account_name` and `account_host` only.""" + return { + "account_name": self.azure_storage_account_name, + "account_host": self.azure_account_host, + } + + AnyAzureCredentials = Union[ # Credentials without defaults come first because union types are attempted in order # and explicit config should supersede system defaults diff --git a/tests/load/filesystem/test_azure_credentials.py b/tests/load/filesystem/test_azure_credentials.py index e9138c5ec6..4d1488c72c 100644 --- a/tests/load/filesystem/test_azure_credentials.py +++ b/tests/load/filesystem/test_azure_credentials.py @@ -344,3 +344,37 @@ def test_azure_service_principal_pyiceberg_export_import() -> None: # test connection using imported credentials assert can_connect_pyiceberg_fileio_config(ABFS_BUCKET, pyiceberg_config) + + +def test_onelake_notebook_identity_credentials_defaults() -> None: + from dlt.common.configuration.specs.azure_credentials import ( + OneLakeNotebookIdentityCredentials, + ) + + creds = OneLakeNotebookIdentityCredentials() + assert creds.azure_storage_account_name == "onelake" + assert creds.azure_account_host == "onelake.blob.fabric.microsoft.com" + + +def test_onelake_notebook_identity_to_adlfs_credentials_returns_account_only() -> None: + from dlt.common.configuration.specs.azure_credentials import ( + OneLakeNotebookIdentityCredentials, + ) + + creds = OneLakeNotebookIdentityCredentials() + result = creds.to_adlfs_credentials() + + assert set(result.keys()) == {"account_name", "account_host"} + assert result["account_name"] == "onelake" + assert result["account_host"] == "onelake.blob.fabric.microsoft.com" + assert "credential" not in result + + +def test_onelake_notebook_identity_is_not_service_principal_subclass() -> None: + from dlt.common.configuration.specs.azure_credentials import ( + AzureServicePrincipalCredentialsWithoutDefaults, + OneLakeNotebookIdentityCredentials, + ) + + creds = OneLakeNotebookIdentityCredentials() + assert not isinstance(creds, AzureServicePrincipalCredentialsWithoutDefaults) From 5d898460066c682414ffc586cd47eb46545d9b41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 16 Apr 2026 11:48:25 +0000 Subject: [PATCH 12/16] feat(fabric): short-circuit _ensure_fabric_token_initialized when SP secret is empty The Fabric API token warmup builds a `ClientSecretCredential` from `credentials.azure_client_secret` and hits `https://api.fabric.microsoft.com/.default` before every OneLake load. When the SP secret is empty or None, this fails with `ClientAuthenticationError`. Return early when the secret is falsy. The real-SP happy path is unchanged. Refs #3865 --- dlt/destinations/impl/fabric/fabric.py | 2 + tests/load/fabric/test_fabric_warmup_gate.py | 96 ++++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 tests/load/fabric/test_fabric_warmup_gate.py diff --git a/dlt/destinations/impl/fabric/fabric.py b/dlt/destinations/impl/fabric/fabric.py index c0e4332ffe..e5f66470ad 100644 --- a/dlt/destinations/impl/fabric/fabric.py +++ b/dlt/destinations/impl/fabric/fabric.py @@ -101,6 +101,8 @@ def _ensure_fabric_token_initialized( Token initialization is cached per client_id to prevent excessive API calls during bulk loads. """ + if not credentials.azure_client_secret: + return cache_key = credentials.azure_client_id # Check if we've already initialized the token for this client diff --git a/tests/load/fabric/test_fabric_warmup_gate.py b/tests/load/fabric/test_fabric_warmup_gate.py new file mode 100644 index 0000000000..13a5bee1c7 --- /dev/null +++ b/tests/load/fabric/test_fabric_warmup_gate.py @@ -0,0 +1,96 @@ +"""Tests for the defensive short-circuit on +`FabricCopyFileLoadJob._ensure_fabric_token_initialized`.""" +from __future__ import annotations + +import sys +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + + +pytestmark = pytest.mark.essential + + +@pytest.fixture(autouse=True) +def _mock_pyodbc(monkeypatch: pytest.MonkeyPatch) -> None: + """Install a fake pyodbc module so the mssql import chain succeeds.""" + fake_pyodbc = MagicMock(name="pyodbc_module") + monkeypatch.setitem(sys.modules, "pyodbc", fake_pyodbc) + + +@pytest.fixture(autouse=True) +def _mock_azure_identity(monkeypatch: pytest.MonkeyPatch) -> MagicMock: + """Install a fake azure.identity.ClientSecretCredential.""" + fake_credential_cls = MagicMock(name="ClientSecretCredential") + fake_module = MagicMock(name="azure.identity") + fake_module.ClientSecretCredential = fake_credential_cls + monkeypatch.setitem(sys.modules, "azure.identity", fake_module) + return fake_credential_cls + + +def _fake_load_job() -> SimpleNamespace: + return SimpleNamespace(_token_initialized_cache={}) + + +def test_warmup_short_circuits_when_staging_secret_is_none( + _mock_azure_identity: MagicMock, +) -> None: + from dlt.destinations.impl.fabric.fabric import FabricCopyFileLoadJob + + job = _fake_load_job() + staging_credentials = SimpleNamespace( + azure_client_id="client-id", + azure_tenant_id="tenant-id", + azure_client_secret=None, + ) + + result = FabricCopyFileLoadJob._ensure_fabric_token_initialized(job, staging_credentials) + + assert result is None + _mock_azure_identity.assert_not_called() + + +def test_warmup_short_circuits_when_staging_secret_is_empty_string( + _mock_azure_identity: MagicMock, +) -> None: + from dlt.destinations.impl.fabric.fabric import FabricCopyFileLoadJob + + job = _fake_load_job() + staging_credentials = SimpleNamespace( + azure_client_id="client-id", + azure_tenant_id="tenant-id", + azure_client_secret="", + ) + + result = FabricCopyFileLoadJob._ensure_fabric_token_initialized(job, staging_credentials) + + assert result is None + _mock_azure_identity.assert_not_called() + + +def test_warmup_still_runs_when_staging_secret_is_real_value( + _mock_azure_identity: MagicMock, monkeypatch: pytest.MonkeyPatch +) -> None: + """The short-circuit must not break the SP happy path.""" + fake_requests = MagicMock(name="requests_module") + fake_requests.get.return_value = SimpleNamespace(status_code=200, text="ok") + monkeypatch.setitem(sys.modules, "requests", fake_requests) + + from dlt.destinations.impl.fabric.fabric import FabricCopyFileLoadJob + + job = _fake_load_job() + staging_credentials = SimpleNamespace( + azure_client_id="client-id", + azure_tenant_id="tenant-id", + azure_client_secret="real-secret", + ) + + FabricCopyFileLoadJob._ensure_fabric_token_initialized(job, staging_credentials) + + _mock_azure_identity.assert_called_once_with( + tenant_id="tenant-id", + client_id="client-id", + client_secret="real-secret", + ) + assert job._token_initialized_cache["client-id"] is True From 5163ca3ab51088740a8255dd8d5bfbc60752a39c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 16 Apr 2026 11:50:46 +0000 Subject: [PATCH 13/16] docs(fabric): document notebook user identity auth patterns Adds a "Notebook user identity" section under the Fabric destination docs with raw `access_token` (one-shot) and injectable `TokenCredential` (refreshing) patterns. Includes copy-pasteable examples using `notebookutils.credentials.getToken("pbi")`. Cross-links to the filesystem staging OneLake section. Refs #3865 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../docs/dlt-ecosystem/destinations/fabric.md | 71 ++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/docs/website/docs/dlt-ecosystem/destinations/fabric.md b/docs/website/docs/dlt-ecosystem/destinations/fabric.md index 75ed4a1361..9bebea8867 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/fabric.md +++ b/docs/website/docs/dlt-ecosystem/destinations/fabric.md @@ -44,6 +44,75 @@ Fabric Warehouse requires Azure Active Directory Service Principal authenticatio - Select **SQL endpoint** - Copy the **SQL connection string** - it should be in the format: `.datawarehouse.fabric.microsoft.com` +### Notebook user identity (Microsoft Fabric notebooks) + +When running dlt from inside a Microsoft Fabric Python notebook, a Service Principal is typically not available -- the canonical auth source is `notebookutils.credentials.getToken(...)`. The Fabric destination supports two additional credential shapes for this scenario. + +#### Option A: raw `access_token` string + +Pass a pre-fetched AAD bearer token as the `access_token` field on `FabricCredentials`. Simplest pattern, suitable for pipelines that complete before the token expires (typically ~50 minutes): + +```py +import os + +import dlt + +os.environ["DESTINATION__FABRIC__CREDENTIALS__ACCESS_TOKEN"] = ( + notebookutils.credentials.getToken("pbi") +) +os.environ["DESTINATION__FABRIC__CREDENTIALS__HOST"] = ( + ".datawarehouse.fabric.microsoft.com" +) +os.environ["DESTINATION__FABRIC__CREDENTIALS__DATABASE"] = "" + +pipeline = dlt.pipeline( + pipeline_name="fabric_notebook_demo", + destination="fabric", + staging="filesystem", + dataset_name="demo", +) +``` + +The bearer token is consumed via `pyodbc.connect(..., attrs_before={1256: token_struct})` (`SQL_COPT_SS_ACCESS_TOKEN`) and the ODBC DSN omits `AUTHENTICATION`, `UID`, and `PWD`. + +**Token refresh:** the `access_token` string is static. For pipelines that may run longer than the token's validity window, use Option B. + +#### Option B: injectable `TokenCredential` (refreshing) + +Pass an `azure.core.credentials.TokenCredential` instance as the `azure_credential` field. The Fabric destination will call `get_token("https://database.windows.net/.default")` on each connection, delegating token caching and refresh to the credential implementation: + +```py +import time + +import dlt +from azure.core.credentials import AccessToken, TokenCredential + +from dlt.destinations.impl.fabric.configuration import FabricCredentials + + +class NotebookTokenCredential(TokenCredential): + def get_token(self, *scopes, **kwargs) -> AccessToken: + token = notebookutils.credentials.getToken("pbi") + return AccessToken(token, int(time.time()) + 3000) + + +creds = FabricCredentials() +creds.host = ".datawarehouse.fabric.microsoft.com" +creds.database = "" +creds.azure_credential = NotebookTokenCredential() + +pipeline = dlt.pipeline( + pipeline_name="fabric_notebook_demo_long", + destination=dlt.destinations.fabric(credentials=creds), + staging="filesystem", + dataset_name="demo", +) +``` + +#### Pairing with OneLake staging + +Under notebook user identity the filesystem staging side must also skip the Service Principal auth path. Use `OneLakeNotebookIdentityCredentials` on the filesystem staging config -- see the [filesystem destination OneLake section](filesystem.md#onelake-under-notebook-identity) for details. + ### Create a pipeline **1. Initialize a project with a pipeline that loads to Fabric by running:** @@ -205,7 +274,7 @@ driver="ODBC Driver 18 for SQL Server" While Fabric Warehouse is based on SQL Server, there are key differences: -1. **Authentication**: Fabric requires Service Principal; username/password auth is not supported +1. **Authentication**: Fabric supports Service Principal, raw `access_token`, and injectable `TokenCredential`; username/password auth is not supported 2. **Type System**: Uses `varchar` and `datetime2` instead of `nvarchar` and `datetimeoffset` 3. **Collation**: Optimized for UTF-8 collations with automatic `LongAsMax` configuration 4. **SQL Dialect**: Uses `fabric` SQLglot dialect for proper SQL generation From 14f223c0b184fc4ee5a28bf95168190beb3d08cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 16 Apr 2026 11:52:27 +0000 Subject: [PATCH 14/16] docs(filesystem): document OneLakeNotebookIdentityCredentials for Fabric notebooks Adds an "OneLake under notebook identity" subsection with TOML and Python config examples, a caution that the class is Fabric-notebook-only, and a cross-link back to the Fabric destination notebook identity section. Refs #3865 --- .../dlt-ecosystem/destinations/filesystem.md | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md index 1e915aed9d..f01febe5d9 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md +++ b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md @@ -258,6 +258,38 @@ max_concurrency=3 ``` ::: +#### OneLake under notebook identity + +When using dlt from inside a Microsoft Fabric Python notebook with `staging="filesystem"` pointing at a OneLake bucket, the standard Azure credential classes are not applicable -- the notebook user has no Service Principal, and the Fabric-registered `OnelakeFileSystem` handler authenticates the current notebook user only when no explicit credential is supplied. + +Use `OneLakeNotebookIdentityCredentials` on the filesystem staging credentials: + +```toml +[destination.filesystem] +bucket_url = "abfss://@onelake.dfs.fabric.microsoft.com//Files/_dlt_stage" + +[destination.filesystem.credentials] +type = "OneLakeNotebookIdentityCredentials" +``` + +Or in Python: + +```py +from dlt.common.configuration.specs.azure_credentials import ( + OneLakeNotebookIdentityCredentials, +) + +filesystem_credentials = OneLakeNotebookIdentityCredentials() +``` + +This class returns adlfs kwargs with `account_name` and `account_host` only -- no `credential` key. Inside a Fabric notebook kernel, the registered `OnelakeFileSystem.__init__` falls through to its built-in `make_credential()` helper, producing a credential tied to the notebook user identity. + +:::caution +`OneLakeNotebookIdentityCredentials` only works inside a Fabric notebook kernel. Outside of Fabric, the `abfss://` protocol handler is plain adlfs `AzureBlobFileSystem`, which has no built-in credential fallback and will fail authentication at the first read. +::: + +Pair with `FabricCredentials.access_token` or `azure_credential` on the Fabric destination side -- see the [Fabric destination notebook identity section](fabric.md#notebook-user-identity-microsoft-fabric-notebooks). + ### Hugging Face The filesystem destination supports loading into [Hugging Face Datasets](https://huggingface.co/docs/datasets/index) using the `hf://` protocol. See the [Hugging Face destination](huggingface) page for setup and configuration details. From fe97d2c4c2eb477b5a3207ff141b69bee82f104b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 16 Apr 2026 12:20:39 +0000 Subject: [PATCH 15/16] fix(fabric): resolve mypy errors on test mocks and credential return type Add `# type: ignore[arg-type]` on SimpleNamespace->typed-class calls in test_fabric_sql_client.py and test_fabric_warmup_gate.py (standard dlt pattern for test mocks). Add `# type: ignore[no-any-return]` on the azure_credential.get_token().token return in configuration.py. Drop void-function return-value captures in warmup gate tests. Refs #3865 Co-Authored-By: Claude Opus 4.6 (1M context) --- dlt/destinations/impl/fabric/configuration.py | 2 +- tests/load/fabric/test_fabric_sql_client.py | 12 ++++++------ tests/load/fabric/test_fabric_warmup_gate.py | 8 +++----- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/dlt/destinations/impl/fabric/configuration.py b/dlt/destinations/impl/fabric/configuration.py index 91a389f45d..8955a90172 100644 --- a/dlt/destinations/impl/fabric/configuration.py +++ b/dlt/destinations/impl/fabric/configuration.py @@ -50,7 +50,7 @@ def get_access_token(self) -> Optional[str]: if self.access_token is not None: return str(self.access_token) if self.azure_credential is not None: - return self.azure_credential.get_token("https://database.windows.net/.default").token + return self.azure_credential.get_token("https://database.windows.net/.default").token # type: ignore[no-any-return] return None def on_partial(self) -> None: diff --git a/tests/load/fabric/test_fabric_sql_client.py b/tests/load/fabric/test_fabric_sql_client.py index 5aa5d3a32c..fb1da91d51 100644 --- a/tests/load/fabric/test_fabric_sql_client.py +++ b/tests/load/fabric/test_fabric_sql_client.py @@ -13,7 +13,7 @@ @pytest.fixture(autouse=True) -def _mock_pyodbc(monkeypatch: pytest.MonkeyPatch) -> None: +def _mock_pyodbc(monkeypatch: pytest.MonkeyPatch) -> None: # type: ignore[misc] """Install a fake pyodbc module in sys.modules.""" fake_pyodbc = MagicMock(name="pyodbc_module") monkeypatch.setitem(sys.modules, "pyodbc", fake_pyodbc) @@ -53,7 +53,7 @@ def test_open_connection_passes_token_via_attrs_before_1256() -> None: client = _fake_sql_client() client.credentials.access_token = "FAKE_TOKEN" - FabricSqlClient.open_connection(client) + FabricSqlClient.open_connection(client) # type: ignore[arg-type] pyodbc = sys.modules["pyodbc"] assert pyodbc.connect.called @@ -71,7 +71,7 @@ def test_open_connection_passes_connect_timeout_in_token_mode() -> None: client.credentials.access_token = "FAKE_TOKEN" client.credentials.connect_timeout = 42 - FabricSqlClient.open_connection(client) + FabricSqlClient.open_connection(client) # type: ignore[arg-type] pyodbc = sys.modules["pyodbc"] assert pyodbc.connect.call_args.kwargs["timeout"] == 42 @@ -105,7 +105,7 @@ def test_open_connection_sets_autocommit_true_in_token_mode() -> None: client = _fake_sql_client() client.credentials.access_token = "FAKE_TOKEN" - FabricSqlClient.open_connection(client) + FabricSqlClient.open_connection(client) # type: ignore[arg-type] pyodbc = sys.modules["pyodbc"] returned_conn = pyodbc.connect.return_value @@ -118,7 +118,7 @@ def test_open_connection_installs_datetimeoffset_converter_in_token_mode() -> No client = _fake_sql_client() client.credentials.access_token = "FAKE_TOKEN" - FabricSqlClient.open_connection(client) + FabricSqlClient.open_connection(client) # type: ignore[arg-type] pyodbc = sys.modules["pyodbc"] returned_conn = pyodbc.connect.return_value @@ -133,6 +133,6 @@ def test_open_connection_caches_conn_on_self() -> None: client = _fake_sql_client() client.credentials.access_token = "FAKE_TOKEN" - returned = FabricSqlClient.open_connection(client) + returned = FabricSqlClient.open_connection(client) # type: ignore[arg-type] assert client._conn is returned diff --git a/tests/load/fabric/test_fabric_warmup_gate.py b/tests/load/fabric/test_fabric_warmup_gate.py index 13a5bee1c7..60e98c7b02 100644 --- a/tests/load/fabric/test_fabric_warmup_gate.py +++ b/tests/load/fabric/test_fabric_warmup_gate.py @@ -45,9 +45,8 @@ def test_warmup_short_circuits_when_staging_secret_is_none( azure_client_secret=None, ) - result = FabricCopyFileLoadJob._ensure_fabric_token_initialized(job, staging_credentials) + FabricCopyFileLoadJob._ensure_fabric_token_initialized(job, staging_credentials) # type: ignore[arg-type] - assert result is None _mock_azure_identity.assert_not_called() @@ -63,9 +62,8 @@ def test_warmup_short_circuits_when_staging_secret_is_empty_string( azure_client_secret="", ) - result = FabricCopyFileLoadJob._ensure_fabric_token_initialized(job, staging_credentials) + FabricCopyFileLoadJob._ensure_fabric_token_initialized(job, staging_credentials) # type: ignore[arg-type] - assert result is None _mock_azure_identity.assert_not_called() @@ -86,7 +84,7 @@ def test_warmup_still_runs_when_staging_secret_is_real_value( azure_client_secret="real-secret", ) - FabricCopyFileLoadJob._ensure_fabric_token_initialized(job, staging_credentials) + FabricCopyFileLoadJob._ensure_fabric_token_initialized(job, staging_credentials) # type: ignore[arg-type] _mock_azure_identity.assert_called_once_with( tenant_id="tenant-id", From 65f383c263adae9ffbd303bd4ca28aef6881d0f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Thal=C3=A9n?= Date: Thu, 16 Apr 2026 13:14:20 +0000 Subject: [PATCH 16/16] fix(fabric): resolve credentials in on_partial when token-auth mode is active `on_partial` returned early when `access_token` was set but did not call `self.resolve()`, leaving the credentials in a partial state. The pipeline then received `None` for credentials and crashed with `AttributeError: 'NoneType' object has no attribute 'database'`. Mirror the existing SP fallback pattern: check `self.host and self.database` and call `self.resolve()` before returning. Caught during live Fabric tenant validation. Refs #3865 Co-Authored-By: Claude Opus 4.6 (1M context) --- dlt/destinations/impl/fabric/configuration.py | 2 ++ tests/load/fabric/test_fabric_configuration.py | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/dlt/destinations/impl/fabric/configuration.py b/dlt/destinations/impl/fabric/configuration.py index 8955a90172..785c2d6fb3 100644 --- a/dlt/destinations/impl/fabric/configuration.py +++ b/dlt/destinations/impl/fabric/configuration.py @@ -60,6 +60,8 @@ def on_partial(self) -> None: `DefaultAzureCredential` fallback -- the user has already provided auth. """ if self.access_token is not None or self.azure_credential is not None: + if self.host and self.database: + self.resolve() return try: diff --git a/tests/load/fabric/test_fabric_configuration.py b/tests/load/fabric/test_fabric_configuration.py index fcfefa7aa1..ccf77af9c6 100644 --- a/tests/load/fabric/test_fabric_configuration.py +++ b/tests/load/fabric/test_fabric_configuration.py @@ -268,3 +268,16 @@ def test_on_partial_skips_default_azure_credential_in_token_mode() -> None: creds.on_partial() mock_set_default.assert_not_called() + + +def test_on_partial_resolves_when_access_token_and_host_set() -> None: + """When `access_token`, `host`, and `database` are set, `on_partial` must + call `self.resolve()` so the credentials are not left in a partial state.""" + creds = FabricCredentials() + creds.host = "test.datawarehouse.fabric.microsoft.com" + creds.database = "testdb" + creds.access_token = "abc123" + + creds.on_partial() + + assert creds.is_resolved()