Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 36 additions & 4 deletions dandiapi/api/services/search/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@
import re
from typing import TYPE_CHECKING

from django.db.models import OuterRef, Subquery
from django.contrib.auth.models import User
from django.db.models import OuterRef, Q, Subquery, Value
from django.db.models.functions import Concat

from dandiapi.api.models import Version
from dandiapi.api.models.dandiset import DandisetUserObjectPermission
from dandiapi.api.services.search.parser import SearchSyntaxError
from dandiapi.search.models import AssetSearch

if TYPE_CHECKING:
from django.contrib.auth.models import AnonymousUser, User
from django.contrib.auth.models import AnonymousUser
from django.db.models import QuerySet

from dandiapi.api.models import Dandiset
Expand All @@ -39,6 +42,7 @@
}
)
_ASSET_OPS = frozenset({'species', 'approach', 'technique', 'standard', 'file_type'})
_OWNER_OPS = frozenset({'owner'})


def _annotate_latest_version_modified(queryset):
Expand Down Expand Up @@ -104,6 +108,31 @@ def _apply_asset_filter(queryset, operator: str, value: str):
raise ValueError(f'unknown asset operator: {operator}') # pragma: no cover


def _apply_owner_filter(queryset: QuerySet[Dandiset], value: str) -> QuerySet[Dandiset]:
"""Filter dandisets to those owned by the given user identifier.

`value` is matched case-insensitively against `User.username`, `User.email`,
`User.first_name`, `User.last_name`, or `"first_name last_name"` (so the
display name shown in the UI works). Multiple users may match; we union
dandisets owned by any of them. Unknown user → empty result.
"""
matched_user_pks = (
User.objects.annotate(_full_name=Concat('first_name', Value(' '), 'last_name'))
.filter(
Q(username__iexact=value)
| Q(email__iexact=value)
| Q(first_name__iexact=value)
| Q(last_name__iexact=value)
| Q(_full_name__iexact=value)
)
.values_list('pk', flat=True)
)
owned_pks = DandisetUserObjectPermission.objects.filter(
user__in=matched_user_pks, permission__codename='owner'
).values('content_object')
return queryset.filter(pk__in=owned_pks)


_MODIFIED_ALIAS = '_search_latest_version_modified'
_PUBLISHED_ALIAS = '_search_latest_published_created'

Expand Down Expand Up @@ -157,8 +186,9 @@ def apply_search_filters(
asset_qs = None
annotated: set[str] = set()

for key, raw_value in parsed.operators:
value = raw_value.strip()
for op in parsed.operators:
key = op.key
value = op.value.strip()
if not value:
raise SearchSyntaxError(f'Operator "{key}" requires a value (e.g. {key}:something).')

Expand All @@ -174,6 +204,8 @@ def apply_search_filters(
if asset_qs is None:
asset_qs = AssetSearch.objects.visible_to(user)
asset_qs = _apply_asset_filter(asset_qs, key, value)
elif key in _OWNER_OPS:
queryset = _apply_owner_filter(queryset, value)

if asset_qs is not None:
# NOTE perf: jsonb_path_exists with a runtime-built jsonpath cannot
Expand Down
15 changes: 12 additions & 3 deletions dandiapi/api/services/search/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
'technique',
'standard',
'file_type',
'owner',
}
)

Expand Down Expand Up @@ -60,10 +61,18 @@ class SearchSyntaxError(ValueError):
"""Raised when a search query can't be parsed."""


@dataclass
class Operator:
"""One parsed `key:value` operator."""

key: str
value: str


@dataclass
class ParsedSearch:
free_text: list[str] = field(default_factory=list)
operators: list[tuple[str, str]] = field(default_factory=list)
operators: list[Operator] = field(default_factory=list)


def _check_balanced_quotes(query: str) -> None:
Expand Down Expand Up @@ -99,15 +108,15 @@ def parse_search(query: str) -> ParsedSearch:
for match in _TOKEN_RE.finditer(query):
if (key := match.group('op_key')) is not None:
_validate_operator_key(key)
parsed.operators.append((key, match.group('op_qval')))
parsed.operators.append(Operator(key, match.group('op_qval')))
elif (free := match.group('free_quoted')) is not None:
parsed.free_text.append(free)
else:
bare = match.group('bare')
if op_match := _BARE_OP_RE.match(bare):
key = op_match.group(1)
_validate_operator_key(key)
parsed.operators.append((key, op_match.group(2)))
parsed.operators.append(Operator(key, op_match.group(2)))
else:
parsed.free_text.append(bare)
return parsed
80 changes: 80 additions & 0 deletions dandiapi/api/tests/test_dandiset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2086,3 +2086,83 @@ def test_advanced_search_species_respects_embargo_visibility(api_client):

# Anonymous request: embargoed must be filtered out.
assert _search_ids(api_client, 'species:mouse') == {open_ds.identifier}


# --- owner: operator -----------------------------------------------------------------------------


@pytest.mark.ai_generated
@pytest.mark.django_db
def test_advanced_search_owner_lookup_paths_and_combinations(api_client):
"""One setup, many assertions for the owner: operator.

Resolves users by every documented lookup path, unions across multiple
matched users, returns 0 for unknown values, is case-insensitive, and
combines correctly with other operators (cross-key AND on the same
dandiset).
"""
# Three users with overlapping last names so we can exercise every lookup
# path AND the multi-user union in a single setup.
alice = UserFactory.create(
username='Alice', email='Alice@Example.com', first_name='Alice', last_name='Smith'
)
bob = UserFactory.create(
username='bob', email='bob@example.com', first_name='Bob', last_name='Smith'
)
carol = UserFactory.create(
username='carol', email='carol@example.com', first_name='Carol', last_name='Jones'
)
alice_old = DandisetFactory.create(owners=[alice])
alice_new = DandisetFactory.create(owners=[alice])
bob_ds = DandisetFactory.create(owners=[bob])
carol_ds = DandisetFactory.create(owners=[carol])
for ds in (alice_old, alice_new, bob_ds, carol_ds):
DraftVersionFactory.create(dandiset=ds)

# Backdate alice_old so we can intersect with a date operator below.
cutoff = timezone.now() - datetime.timedelta(days=1)
Dandiset.objects.filter(pk=alice_old.pk).update(created=cutoff - datetime.timedelta(days=30))
after_str = (cutoff + datetime.timedelta(seconds=1)).date().isoformat()

alice_dsets = {alice_old.identifier, alice_new.identifier}

# username (case-insensitive)
assert _search_ids(api_client, 'owner:alice') == alice_dsets
assert _search_ids(api_client, 'owner:ALICE') == alice_dsets

# email (case-insensitive)
assert _search_ids(api_client, 'owner:alice@example.com') == alice_dsets
assert _search_ids(api_client, 'owner:ALICE@Example.com') == alice_dsets

# first / last / full name
assert _search_ids(api_client, 'owner:Bob') == {bob_ds.identifier}
assert _search_ids(api_client, 'owner:Jones') == {carol_ds.identifier}
assert _search_ids(api_client, 'owner:"Carol Jones"') == {carol_ds.identifier}

# union: shared last name returns dandisets from both users
assert _search_ids(api_client, 'owner:Smith') == alice_dsets | {bob_ds.identifier}

# unknown user → 0 results, not 400 (a valid 0-hit query)
assert _search_ids(api_client, 'owner:no_such_user_anywhere') == set()

# combines with other operators: cross-key AND on the same dandiset.
# Only alice_new satisfies BOTH owner:alice AND created_after.
assert _search_ids(api_client, f'owner:alice created_after:{after_str}') == {
alice_new.identifier
}


@pytest.mark.ai_generated
@pytest.mark.django_db
def test_advanced_search_owner_does_not_inflate_to_superuser_archive(api_client):
# Guardian's get_objects_for_user(with_superuser=True) returns ALL objects
# for superusers — wrong semantics for owner: searches. We pass
# with_superuser=False so `owner:admin` returns only what admin
# explicitly owns, not the entire archive.
admin = UserFactory.create(username='admin', is_superuser=True)
other = UserFactory.create()
DraftVersionFactory.create(dandiset=DandisetFactory.create(owners=[other]))
admin_owned = DandisetFactory.create(owners=[admin])
DraftVersionFactory.create(dandiset=admin_owned)

assert _search_ids(api_client, 'owner:admin') == {admin_owned.identifier}
18 changes: 12 additions & 6 deletions dandiapi/api/tests/test_search_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest

from dandiapi.api.services.search.parser import (
Operator,
SearchSyntaxError,
parse_search,
)
Expand All @@ -22,29 +23,32 @@
(
'species:mouse created_after:2024-01-01',
[],
[('species', 'mouse'), ('created_after', '2024-01-01')],
[Operator('species', 'mouse'), Operator('created_after', '2024-01-01')],
),
# Mixed
(
'place cells species:mouse created_after:2024-01-01 ca1',
['place', 'cells', 'ca1'],
[('species', 'mouse'), ('created_after', '2024-01-01')],
[Operator('species', 'mouse'), Operator('created_after', '2024-01-01')],
),
# Quoted phrase as free text
('"place cells" hippocampus', ['place cells', 'hippocampus'], []),
# Quoted operator value (multi-word)
('technique:"patch clamp"', [], [('technique', 'patch clamp')]),
('technique:"patch clamp"', [], [Operator('technique', 'patch clamp')]),
# Repeated operator keeps every entry (AND'd downstream)
(
'species:mouse species:rat',
[],
[('species', 'mouse'), ('species', 'rat')],
[Operator('species', 'mouse'), Operator('species', 'rat')],
),
# Special characters preserved inside quoted operator value
('species:"C57BL/6"', [], [('species', 'C57BL/6')]),
('species:"C57BL/6"', [], [Operator('species', 'C57BL/6')]),
# Quoted token that *looks* like an operator is treated as free text —
# this is the documented escape hatch for searching for a literal colon.
# documented escape hatch for searching for a literal colon.
('"foo:bar" hippocampus', ['foo:bar', 'hippocampus'], []),
# Owner operator
('owner:jdoe', [], [Operator('owner', 'jdoe')]),
('owner:user@example.com', [], [Operator('owner', 'user@example.com')]),
],
ids=[
'empty',
Expand All @@ -57,6 +61,8 @@
'repeated-operator-key',
'special-chars-in-quoted-value',
'quoted-operator-like-token-is-free-text',
'owner-username',
'owner-email',
],
)
def test_parse_search(query, expected_free_text, expected_operators):
Expand Down
4 changes: 3 additions & 1 deletion dandiapi/api/views/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,9 @@ class DandisetQueryParameterSerializer(serializers.Serializer):
'published_before, published_after (all take YYYY-MM-DD); '
'species, approach, technique, standard (case-insensitive '
'substring against the corresponding asset_metadata array); '
'file_type (nwb, image, text, video — or any MIME prefix). '
'file_type (nwb, image, text, video — or any MIME prefix); '
'owner (case-insensitive match against username, email, first '
'name, last name, or "first last"). '
'Invalid syntax returns HTTP 400 with the offending token; '
'unknown operators get a "Did you mean?" suggestion.'
),
Expand Down
1 change: 1 addition & 0 deletions web/src/components/DandisetSearchField.vue
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ const operatorHelp = [
{ example: 'technique:"patch clamp"', description: 'Has assets using a measurement technique' },
{ example: 'standard:nwb', description: 'Has assets in a data standard' },
{ example: 'file_type:nwb', description: 'Has assets of a file type (nwb, image, text, video)' },
{ example: 'owner:"Jane Doe"', description: 'Owned by a user (name, username, or email)' },
];

function updateSearch(search: string) {
Expand Down
Loading