diff --git a/dandiapi/api/services/search/filters.py b/dandiapi/api/services/search/filters.py index db3c0cf7c..d31a74e17 100644 --- a/dandiapi/api/services/search/filters.py +++ b/dandiapi/api/services/search/filters.py @@ -6,14 +6,17 @@ import re from typing import TYPE_CHECKING -from django.db.models import OuterRef, Subquery +from django.contrib.auth.models import User +from django.db.models import OuterRef, Q, Subquery, Value +from django.db.models.functions import Concat from dandiapi.api.models import Version +from dandiapi.api.models.dandiset import DandisetUserObjectPermission from dandiapi.api.services.search.parser import SearchSyntaxError from dandiapi.search.models import AssetSearch if TYPE_CHECKING: - from django.contrib.auth.models import AnonymousUser, User + from django.contrib.auth.models import AnonymousUser from django.db.models import QuerySet from dandiapi.api.models import Dandiset @@ -39,6 +42,7 @@ } ) _ASSET_OPS = frozenset({'species', 'approach', 'technique', 'standard', 'file_type'}) +_OWNER_OPS = frozenset({'owner'}) def _annotate_latest_version_modified(queryset): @@ -104,6 +108,31 @@ def _apply_asset_filter(queryset, operator: str, value: str): raise ValueError(f'unknown asset operator: {operator}') # pragma: no cover +def _apply_owner_filter(queryset: QuerySet[Dandiset], value: str) -> QuerySet[Dandiset]: + """Filter dandisets to those owned by the given user identifier. + + `value` is matched case-insensitively against `User.username`, `User.email`, + `User.first_name`, `User.last_name`, or `"first_name last_name"` (so the + display name shown in the UI works). Multiple users may match; we union + dandisets owned by any of them. Unknown user → empty result. + """ + matched_user_pks = ( + User.objects.annotate(_full_name=Concat('first_name', Value(' '), 'last_name')) + .filter( + Q(username__iexact=value) + | Q(email__iexact=value) + | Q(first_name__iexact=value) + | Q(last_name__iexact=value) + | Q(_full_name__iexact=value) + ) + .values_list('pk', flat=True) + ) + owned_pks = DandisetUserObjectPermission.objects.filter( + user__in=matched_user_pks, permission__codename='owner' + ).values('content_object') + return queryset.filter(pk__in=owned_pks) + + _MODIFIED_ALIAS = '_search_latest_version_modified' _PUBLISHED_ALIAS = '_search_latest_published_created' @@ -157,8 +186,9 @@ def apply_search_filters( asset_qs = None annotated: set[str] = set() - for key, raw_value in parsed.operators: - value = raw_value.strip() + for op in parsed.operators: + key = op.key + value = op.value.strip() if not value: raise SearchSyntaxError(f'Operator "{key}" requires a value (e.g. {key}:something).') @@ -174,6 +204,8 @@ def apply_search_filters( if asset_qs is None: asset_qs = AssetSearch.objects.visible_to(user) asset_qs = _apply_asset_filter(asset_qs, key, value) + elif key in _OWNER_OPS: + queryset = _apply_owner_filter(queryset, value) if asset_qs is not None: # NOTE perf: jsonb_path_exists with a runtime-built jsonpath cannot diff --git a/dandiapi/api/services/search/parser.py b/dandiapi/api/services/search/parser.py index 817642341..c3b688222 100644 --- a/dandiapi/api/services/search/parser.py +++ b/dandiapi/api/services/search/parser.py @@ -31,6 +31,7 @@ 'technique', 'standard', 'file_type', + 'owner', } ) @@ -60,10 +61,18 @@ class SearchSyntaxError(ValueError): """Raised when a search query can't be parsed.""" +@dataclass +class Operator: + """One parsed `key:value` operator.""" + + key: str + value: str + + @dataclass class ParsedSearch: free_text: list[str] = field(default_factory=list) - operators: list[tuple[str, str]] = field(default_factory=list) + operators: list[Operator] = field(default_factory=list) def _check_balanced_quotes(query: str) -> None: @@ -99,7 +108,7 @@ def parse_search(query: str) -> ParsedSearch: for match in _TOKEN_RE.finditer(query): if (key := match.group('op_key')) is not None: _validate_operator_key(key) - parsed.operators.append((key, match.group('op_qval'))) + parsed.operators.append(Operator(key, match.group('op_qval'))) elif (free := match.group('free_quoted')) is not None: parsed.free_text.append(free) else: @@ -107,7 +116,7 @@ def parse_search(query: str) -> ParsedSearch: if op_match := _BARE_OP_RE.match(bare): key = op_match.group(1) _validate_operator_key(key) - parsed.operators.append((key, op_match.group(2))) + parsed.operators.append(Operator(key, op_match.group(2))) else: parsed.free_text.append(bare) return parsed diff --git a/dandiapi/api/tests/test_dandiset.py b/dandiapi/api/tests/test_dandiset.py index 712399fc3..871c1b062 100644 --- a/dandiapi/api/tests/test_dandiset.py +++ b/dandiapi/api/tests/test_dandiset.py @@ -2086,3 +2086,83 @@ def test_advanced_search_species_respects_embargo_visibility(api_client): # Anonymous request: embargoed must be filtered out. assert _search_ids(api_client, 'species:mouse') == {open_ds.identifier} + + +# --- owner: operator ----------------------------------------------------------------------------- + + +@pytest.mark.ai_generated +@pytest.mark.django_db +def test_advanced_search_owner_lookup_paths_and_combinations(api_client): + """One setup, many assertions for the owner: operator. + + Resolves users by every documented lookup path, unions across multiple + matched users, returns 0 for unknown values, is case-insensitive, and + combines correctly with other operators (cross-key AND on the same + dandiset). + """ + # Three users with overlapping last names so we can exercise every lookup + # path AND the multi-user union in a single setup. + alice = UserFactory.create( + username='Alice', email='Alice@Example.com', first_name='Alice', last_name='Smith' + ) + bob = UserFactory.create( + username='bob', email='bob@example.com', first_name='Bob', last_name='Smith' + ) + carol = UserFactory.create( + username='carol', email='carol@example.com', first_name='Carol', last_name='Jones' + ) + alice_old = DandisetFactory.create(owners=[alice]) + alice_new = DandisetFactory.create(owners=[alice]) + bob_ds = DandisetFactory.create(owners=[bob]) + carol_ds = DandisetFactory.create(owners=[carol]) + for ds in (alice_old, alice_new, bob_ds, carol_ds): + DraftVersionFactory.create(dandiset=ds) + + # Backdate alice_old so we can intersect with a date operator below. + cutoff = timezone.now() - datetime.timedelta(days=1) + Dandiset.objects.filter(pk=alice_old.pk).update(created=cutoff - datetime.timedelta(days=30)) + after_str = (cutoff + datetime.timedelta(seconds=1)).date().isoformat() + + alice_dsets = {alice_old.identifier, alice_new.identifier} + + # username (case-insensitive) + assert _search_ids(api_client, 'owner:alice') == alice_dsets + assert _search_ids(api_client, 'owner:ALICE') == alice_dsets + + # email (case-insensitive) + assert _search_ids(api_client, 'owner:alice@example.com') == alice_dsets + assert _search_ids(api_client, 'owner:ALICE@Example.com') == alice_dsets + + # first / last / full name + assert _search_ids(api_client, 'owner:Bob') == {bob_ds.identifier} + assert _search_ids(api_client, 'owner:Jones') == {carol_ds.identifier} + assert _search_ids(api_client, 'owner:"Carol Jones"') == {carol_ds.identifier} + + # union: shared last name returns dandisets from both users + assert _search_ids(api_client, 'owner:Smith') == alice_dsets | {bob_ds.identifier} + + # unknown user → 0 results, not 400 (a valid 0-hit query) + assert _search_ids(api_client, 'owner:no_such_user_anywhere') == set() + + # combines with other operators: cross-key AND on the same dandiset. + # Only alice_new satisfies BOTH owner:alice AND created_after. + assert _search_ids(api_client, f'owner:alice created_after:{after_str}') == { + alice_new.identifier + } + + +@pytest.mark.ai_generated +@pytest.mark.django_db +def test_advanced_search_owner_does_not_inflate_to_superuser_archive(api_client): + # Guardian's get_objects_for_user(with_superuser=True) returns ALL objects + # for superusers — wrong semantics for owner: searches. We pass + # with_superuser=False so `owner:admin` returns only what admin + # explicitly owns, not the entire archive. + admin = UserFactory.create(username='admin', is_superuser=True) + other = UserFactory.create() + DraftVersionFactory.create(dandiset=DandisetFactory.create(owners=[other])) + admin_owned = DandisetFactory.create(owners=[admin]) + DraftVersionFactory.create(dandiset=admin_owned) + + assert _search_ids(api_client, 'owner:admin') == {admin_owned.identifier} diff --git a/dandiapi/api/tests/test_search_parser.py b/dandiapi/api/tests/test_search_parser.py index 2ab5b42f8..2498a0b20 100644 --- a/dandiapi/api/tests/test_search_parser.py +++ b/dandiapi/api/tests/test_search_parser.py @@ -3,6 +3,7 @@ import pytest from dandiapi.api.services.search.parser import ( + Operator, SearchSyntaxError, parse_search, ) @@ -22,29 +23,32 @@ ( 'species:mouse created_after:2024-01-01', [], - [('species', 'mouse'), ('created_after', '2024-01-01')], + [Operator('species', 'mouse'), Operator('created_after', '2024-01-01')], ), # Mixed ( 'place cells species:mouse created_after:2024-01-01 ca1', ['place', 'cells', 'ca1'], - [('species', 'mouse'), ('created_after', '2024-01-01')], + [Operator('species', 'mouse'), Operator('created_after', '2024-01-01')], ), # Quoted phrase as free text ('"place cells" hippocampus', ['place cells', 'hippocampus'], []), # Quoted operator value (multi-word) - ('technique:"patch clamp"', [], [('technique', 'patch clamp')]), + ('technique:"patch clamp"', [], [Operator('technique', 'patch clamp')]), # Repeated operator keeps every entry (AND'd downstream) ( 'species:mouse species:rat', [], - [('species', 'mouse'), ('species', 'rat')], + [Operator('species', 'mouse'), Operator('species', 'rat')], ), # Special characters preserved inside quoted operator value - ('species:"C57BL/6"', [], [('species', 'C57BL/6')]), + ('species:"C57BL/6"', [], [Operator('species', 'C57BL/6')]), # Quoted token that *looks* like an operator is treated as free text — - # this is the documented escape hatch for searching for a literal colon. + # documented escape hatch for searching for a literal colon. ('"foo:bar" hippocampus', ['foo:bar', 'hippocampus'], []), + # Owner operator + ('owner:jdoe', [], [Operator('owner', 'jdoe')]), + ('owner:user@example.com', [], [Operator('owner', 'user@example.com')]), ], ids=[ 'empty', @@ -57,6 +61,8 @@ 'repeated-operator-key', 'special-chars-in-quoted-value', 'quoted-operator-like-token-is-free-text', + 'owner-username', + 'owner-email', ], ) def test_parse_search(query, expected_free_text, expected_operators): diff --git a/dandiapi/api/views/serializers.py b/dandiapi/api/views/serializers.py index 0d64477c8..fb032a157 100644 --- a/dandiapi/api/views/serializers.py +++ b/dandiapi/api/views/serializers.py @@ -311,7 +311,9 @@ class DandisetQueryParameterSerializer(serializers.Serializer): 'published_before, published_after (all take YYYY-MM-DD); ' 'species, approach, technique, standard (case-insensitive ' 'substring against the corresponding asset_metadata array); ' - 'file_type (nwb, image, text, video — or any MIME prefix). ' + 'file_type (nwb, image, text, video — or any MIME prefix); ' + 'owner (case-insensitive match against username, email, first ' + 'name, last name, or "first last"). ' 'Invalid syntax returns HTTP 400 with the offending token; ' 'unknown operators get a "Did you mean?" suggestion.' ), diff --git a/web/src/components/DandisetSearchField.vue b/web/src/components/DandisetSearchField.vue index 5b3b5a6db..30f6738dc 100644 --- a/web/src/components/DandisetSearchField.vue +++ b/web/src/components/DandisetSearchField.vue @@ -95,6 +95,7 @@ const operatorHelp = [ { example: 'technique:"patch clamp"', description: 'Has assets using a measurement technique' }, { example: 'standard:nwb', description: 'Has assets in a data standard' }, { example: 'file_type:nwb', description: 'Has assets of a file type (nwb, image, text, video)' }, + { example: 'owner:"Jane Doe"', description: 'Owned by a user (name, username, or email)' }, ]; function updateSearch(search: string) {