Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions cloudpathlib/azure/azblobclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@
except ModuleNotFoundError:
implementation_registry["azure"].dependencies_loaded = False

# azure-identity is an additional optional dependency; users can use cloudpathlib's
# Azure functionality without it, but will not get automatic DefaultAzureCredential support.
try:
from azure.identity import DefaultAzureCredential
except ImportError:
DefaultAzureCredential = None
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please follow existing convention and add to import block of azure dependencies above.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The azure.identity import was intentionally left out of the big import block above, as azure-identity is an optional azure dependency, meaning you can use the azure blob client without it (you will just not be able to use identity-based authentication).

I've left a comment to make this clearer. Let me know if you prefer a different approach.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please put it in with the other dependencies, we don't want multiple dependency fallback paths. Either a user has all the backend dependencies, or they don't, which is how you implemented it in the pyproject.toml.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good to me! I've addressed this comment. Feel free to resolve.



@register_client_class("azure")
class AzureBlobClient(Client):
Expand All @@ -66,20 +73,23 @@ def __init__(
https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python).
Supports the following authentication methods of `BlobServiceClient`.

- Environment variable `""AZURE_STORAGE_CONNECTION_STRING"` containing connecting string
- Environment variable `AZURE_STORAGE_CONNECTION_STRING` containing connecting string
with account credentials. See [Azure Storage SDK documentation](
https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python#copy-your-credentials-from-the-azure-portal).
- Environment variable `AZURE_STORAGE_ACCOUNT_URL` containing the account URL. If
`azure-identity` is installed, `DefaultAzureCredential` will be used automatically.
- Connection string via `connection_string`, authenticated either with an embedded SAS
token or with credentials passed to `credentials`.
- Account URL via `account_url`, authenticated either with an embedded SAS token, or with
credentials passed to `credentials`.
credentials passed to `credentials`. If `credential` is not provided and `azure-identity`
is installed, `DefaultAzureCredential` will be used automatically.
- Instantiated and already authenticated [`BlobServiceClient`](
https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python) or
[`DataLakeServiceClient`](https://learn.microsoft.com/en-us/python/api/azure-storage-file-datalake/azure.storage.filedatalake.datalakeserviceclient).

If multiple methods are used, priority order is reverse of list above (later in list takes
priority). If no methods are used, a [`MissingCredentialsError`][cloudpathlib.exceptions.MissingCredentialsError]
exception will be raised raised.
exception will be raised.

Args:
account_url (Optional[str]): The URL to the blob storage account, optionally
Expand Down Expand Up @@ -117,6 +127,8 @@ def __init__(

if connection_string is None:
connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING", None)
if account_url is None:
account_url = os.getenv("AZURE_STORAGE_ACCOUNT_URL", None)

self.data_lake_client: Optional[DataLakeServiceClient] = (
None # only needs to end up being set if HNS is enabled
Expand Down Expand Up @@ -174,6 +186,8 @@ def __init__(
conn_str=connection_string, credential=credential
)
elif account_url is not None:
if credential is None and DefaultAzureCredential is not None:
credential = DefaultAzureCredential()
if ".dfs." in account_url:
self.service_client = BlobServiceClient(
account_url=account_url.replace(".dfs.", ".blob."), credential=credential
Expand Down
1 change: 1 addition & 0 deletions cloudpathlib/local/implementations/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self, *args, **kwargs):
kwargs.get("connection_string", None),
kwargs.get("account_url", None),
os.getenv("AZURE_STORAGE_CONNECTION_STRING", None),
os.getenv("AZURE_STORAGE_ACCOUNT_URL", None),
]
super().__init__(*args, **kwargs)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ dependencies = [
]

[project.optional-dependencies]
azure = ["azure-storage-blob>=12", "azure-storage-file-datalake>=12"]
azure = ["azure-storage-blob>=12", "azure-storage-file-datalake>=12", "azure-identity>=1"]
Comment thread
janjagusch marked this conversation as resolved.
gs = ["google-cloud-storage"]
s3 = ["boto3>=1.34.0"]
all = ["cloudpathlib[azure]", "cloudpathlib[gs]", "cloudpathlib[s3]"]
Expand Down
8 changes: 7 additions & 1 deletion tests/mock_clients/mock_adls_gen2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@


class MockedDataLakeServiceClient:
def __init__(self, test_dir, adls):
def __init__(self, test_dir=None, adls=None, account_url=None, credential=None):
if account_url is not None:
# account_url-based construction: store url and credential for verification
self._account_url = account_url
self._credential = credential
return

# root is parent of the test specific directory
self.root = test_dir.parent
self.test_dir = test_dir
Expand Down
8 changes: 7 additions & 1 deletion tests/mock_clients/mock_azureblob.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,13 @@ def get(self, key, default=None):


class MockBlobServiceClient:
def __init__(self, test_dir, adls):
def __init__(self, test_dir=None, adls=None, account_url=None, credential=None):
if account_url is not None:
# account_url-based construction: store url and credential for verification
self._account_url = account_url
self._credential = credential
return
Comment on lines +52 to +57
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As per request, I added the account and credential settings to the mock classes:

Also, please follow existing test patterns, not the MagicMock patterns in this PR. You should add azure mocks if you need them to the mock we already have and actually ensure the AzureClient object gets properties set on it correctly.

But honestly, this doesn't feel right to me. If I understand correctly, the MockedDataLakeServiceClient and MockBlobServiceClient are used to test file-system operations (cp, rm, mv, ls, etc.). For the unit tests I added, I only want to test the authentication step, without running file-system operations.

Extending the constructor feels unnatural. For example, we need a conditional block and an early return to avoid running into the shutil.copytree, which would fail (since we don't specify a test directory).

Happy to incorporate any feedback from the maintainers on this topic. Also fine with leaving it as-is, if that's the preferred solution.


# copy test assets for reference in tests without affecting assets
shutil.copytree(TEST_ASSETS, test_dir, dirs_exist_ok=True)

Expand Down
109 changes: 108 additions & 1 deletion tests/test_azure_specific.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from azure.storage.filedatalake import DataLakeServiceClient
import pytest

import cloudpathlib.azure.azblobclient
from urllib.parse import urlparse, parse_qs
from cloudpathlib import AzureBlobClient, AzureBlobPath
from cloudpathlib.exceptions import (
Expand All @@ -19,7 +20,8 @@
)
from cloudpathlib.local import LocalAzureBlobClient, LocalAzureBlobPath

from .mock_clients.mock_azureblob import MockStorageStreamDownloader
from .mock_clients.mock_azureblob import MockBlobServiceClient, MockStorageStreamDownloader
from .mock_clients.mock_adls_gen2 import MockedDataLakeServiceClient


@pytest.mark.parametrize("path_class", [AzureBlobPath, LocalAzureBlobPath])
Expand All @@ -39,10 +41,115 @@ def test_azureblobpath_properties(path_class, monkeypatch):
@pytest.mark.parametrize("client_class", [AzureBlobClient, LocalAzureBlobClient])
def test_azureblobpath_nocreds(client_class, monkeypatch):
monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False)
monkeypatch.delenv("AZURE_STORAGE_ACCOUNT_URL", raising=False)
monkeypatch.setattr(
"cloudpathlib.azure.azblobclient.DefaultAzureCredential", None
)
with pytest.raises(MissingCredentialsError):
client_class()


def _mock_azure_clients(monkeypatch):
"""Monkeypatch BlobServiceClient and DataLakeServiceClient with mocks."""
monkeypatch.setattr(
cloudpathlib.azure.azblobclient, "BlobServiceClient", MockBlobServiceClient
)
monkeypatch.setattr(
cloudpathlib.azure.azblobclient, "DataLakeServiceClient", MockedDataLakeServiceClient
)


def test_default_credential_used_with_account_url(monkeypatch):
"""DefaultAzureCredential is used when account_url is provided without credential."""
monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False)
monkeypatch.delenv("AZURE_STORAGE_ACCOUNT_URL", raising=False)
_mock_azure_clients(monkeypatch)

client = AzureBlobClient(account_url="https://myaccount.blob.core.windows.net")

assert isinstance(client.service_client, MockBlobServiceClient)
assert client.service_client._account_url == "https://myaccount.blob.core.windows.net"
assert isinstance(client.service_client._credential, DefaultAzureCredential)

assert isinstance(client.data_lake_client, MockedDataLakeServiceClient)
assert client.data_lake_client._account_url == "https://myaccount.dfs.core.windows.net"
assert isinstance(client.data_lake_client._credential, DefaultAzureCredential)


def test_no_default_credential_when_explicit_credential(monkeypatch):
"""DefaultAzureCredential is NOT used when an explicit credential is provided."""
monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False)
monkeypatch.delenv("AZURE_STORAGE_ACCOUNT_URL", raising=False)
_mock_azure_clients(monkeypatch)

explicit_cred = "my-explicit-credential"
client = AzureBlobClient(
account_url="https://myaccount.blob.core.windows.net",
credential=explicit_cred,
)

assert client.service_client._credential == explicit_cred
assert not isinstance(client.service_client._credential, DefaultAzureCredential)


def test_fallback_when_azure_identity_not_installed(monkeypatch):
"""When azure-identity is not installed, credential=None is passed through."""
monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False)
monkeypatch.delenv("AZURE_STORAGE_ACCOUNT_URL", raising=False)
monkeypatch.setattr(
cloudpathlib.azure.azblobclient, "DefaultAzureCredential", None
)
_mock_azure_clients(monkeypatch)

client = AzureBlobClient(account_url="https://myaccount.blob.core.windows.net")

assert client.service_client._credential is None
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can remove per comments above

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've addressed this comment. Feel free to resolve.



def test_account_url_env_var_blob(monkeypatch):
"""AZURE_STORAGE_ACCOUNT_URL env var with .blob. URL creates both clients."""
monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False)
monkeypatch.setenv(
"AZURE_STORAGE_ACCOUNT_URL", "https://myaccount.blob.core.windows.net"
)
_mock_azure_clients(monkeypatch)

client = AzureBlobClient()

assert isinstance(client.service_client, MockBlobServiceClient)
assert client.service_client._account_url == "https://myaccount.blob.core.windows.net"
assert isinstance(client.service_client._credential, DefaultAzureCredential)

assert isinstance(client.data_lake_client, MockedDataLakeServiceClient)
assert client.data_lake_client._account_url == "https://myaccount.dfs.core.windows.net"
assert isinstance(client.data_lake_client._credential, DefaultAzureCredential)


def test_account_url_env_var_dfs(monkeypatch):
"""AZURE_STORAGE_ACCOUNT_URL env var with .dfs. URL creates both clients."""
monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False)
monkeypatch.setenv(
"AZURE_STORAGE_ACCOUNT_URL", "https://myaccount.dfs.core.windows.net"
)
_mock_azure_clients(monkeypatch)

client = AzureBlobClient()

assert client.service_client._account_url == "https://myaccount.blob.core.windows.net"
assert client.data_lake_client._account_url == "https://myaccount.dfs.core.windows.net"


def test_missing_creds_error_no_env_vars(monkeypatch):
"""MissingCredentialsError is still raised when nothing is configured."""
monkeypatch.delenv("AZURE_STORAGE_CONNECTION_STRING", raising=False)
monkeypatch.delenv("AZURE_STORAGE_ACCOUNT_URL", raising=False)
monkeypatch.setattr(
cloudpathlib.azure.azblobclient, "DefaultAzureCredential", None
)
with pytest.raises(MissingCredentialsError):
AzureBlobClient()


def test_as_url(azure_rigs):
p: AzureBlobPath = azure_rigs.create_cloud_path("dir_0/file0_0.txt")

Expand Down