Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
cf6e41b
feat: add DOI service layer from rf-enh-dandiset-dois (Jake Nesbitt)
satra Apr 28, 2026
695ecd4
feat: integrate concept DOI at creation, version DOI at publish
satra Apr 28, 2026
afa8a10
fix: address review findings — retry, timeout, session, concept_doi
satra Apr 28, 2026
0652808
fix: wire doi_state lifecycle, add doi_configured guard, remove old d…
satra Apr 28, 2026
c6e03c8
feat: Phases 5-7 — citation fix, DOI prefix verification, remediation
satra Apr 28, 2026
e2fc672
fix: remove test_datacite.py — references nonexistent DataCiteClient
satra Apr 28, 2026
663bb8b
fix: add doi_configured guards, defensive concept_doi, remove broken …
satra Apr 28, 2026
d32a25e
fix: address all review findings from tech/devils-advocate/systems re…
satra Apr 28, 2026
dbc692d
fix: address all remaining review items
satra Apr 28, 2026
9013013
fix: final review — Procfile, retry logic, lint, doi_configured guards
satra Apr 28, 2026
7b0d9db
fix: TypeScript error in HowToCiteTab.vue — cast doi/version as string
satra Apr 29, 2026
aa59a2a
feat: close design doc gaps — metadata sync, unembargo DOI, hide vs d…
satra Apr 29, 2026
7771e20
docs: mark AI-generated management commands
satra Apr 29, 2026
1fff4e5
fix: final review fixes — on_commit, 404 handling, doi_configured gates
satra Apr 29, 2026
17314de
fix: address all reviewer feedback — schema dep, state machine, cleanup
satra Apr 29, 2026
bf2d8a3
feat: add 20 parametrized DOI lifecycle tests + design doc update + t…
satra Apr 29, 2026
c1b7cbd
fix: address second-pass review — async hide, inline import, logging
satra Apr 29, 2026
baa3c37
fix: address review — idempotent concept-DOI create, drop unique cons…
satra May 4, 2026
44ce822
fix: renumber DOI migrations 0032/0033 → 0033/0034 after rebase
satra May 7, 2026
acc9ccc
fix: keep draft citation as dandiset URL (not DOI)
satra May 7, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Procfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ web: gunicorn --config gunicorn.conf.py dandiapi.wsgi
# This is OK for now because of how lightweight all high priority tasks currently are,
# but we may need to switch back to a dedicated worker in the future.
# The queue `celery` is the default queue.
worker: REMAP_SIGTERM=SIGQUIT celery --app dandiapi.celery worker --loglevel INFO --without-mingle --without-heartbeat --without-gossip --queues celery --beat
worker: REMAP_SIGTERM=SIGQUIT celery --app dandiapi.celery worker --loglevel INFO --without-mingle --without-heartbeat --without-gossip --queues celery,doi --beat
# The checksum-worker calculates blob checksums and updates zarr checksum files
checksum-worker: REMAP_SIGTERM=SIGQUIT celery --app dandiapi.celery worker --loglevel INFO --without-mingle --without-heartbeat --without-gossip --queues calculate_sha256,ingest_zarr_archive
88 changes: 0 additions & 88 deletions dandiapi/api/doi.py

This file was deleted.

88 changes: 88 additions & 0 deletions dandiapi/api/management/commands/check_doi_health.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""
Management command to check DOI health — find stuck or failed DOI states.

Run periodically (e.g., via cron or Celery beat) to detect DOIs that
are stuck in 'pending' or 'failed' state.

Note: This command was AI-generated (Claude Code).
"""

from __future__ import annotations

import datetime

from django.core.management.base import BaseCommand
from django.utils import timezone

from dandiapi.api.models import Version
from dandiapi.api.models.dandiset import Dandiset


class Command(BaseCommand):
help = 'Check for DOIs stuck in pending or failed state.'

def add_arguments(self, parser):
parser.add_argument(
'--threshold-minutes',
type=int,
default=30,
help='Consider pending DOIs stuck after this many minutes (default: 30).',
)

def handle(self, *args, **options):
threshold = timezone.now() - datetime.timedelta(minutes=options['threshold_minutes'])

# Find versions stuck in 'pending' longer than threshold
stuck_pending = Version.objects.filter(
doi_state='pending',
modified__lt=threshold,
).select_related('dandiset')

# Find versions in 'failed' state
failed = Version.objects.filter(
doi_state='failed',
).select_related('dandiset')

# Find dandisets without concept_doi
missing_concept = Dandiset.objects.filter(
concept_doi__isnull=True,
embargo_status=Dandiset.EmbargoStatus.OPEN,
)

self.stdout.write('\n--- DOI Health Check ---')
self.stdout.write(f'Threshold: {options["threshold_minutes"]} minutes\n')

if stuck_pending.exists():
self.stdout.write(
self.style.WARNING(f'STUCK PENDING: {stuck_pending.count()} versions')
)
for v in stuck_pending[:20]:
self.stdout.write(
f' {v.dandiset.identifier}/{v.version} doi={v.doi} modified={v.modified}'
)
else:
self.stdout.write(self.style.SUCCESS('No stuck pending DOIs'))

if failed.exists():
self.stdout.write(self.style.WARNING(f'FAILED: {failed.count()} versions'))
for v in failed[:20]:
self.stdout.write(
f' {v.dandiset.identifier}/{v.version} doi={v.doi} modified={v.modified}'
)
else:
self.stdout.write(self.style.SUCCESS('No failed DOIs'))

if missing_concept.exists():
self.stdout.write(
self.style.WARNING(f'MISSING CONCEPT DOI: {missing_concept.count()} open dandisets')
)
for d in missing_concept[:20]:
self.stdout.write(f' {d.identifier}')
else:
self.stdout.write(self.style.SUCCESS('All open dandisets have concept DOIs'))

total_issues = stuck_pending.count() + failed.count() + missing_concept.count()
if total_issues > 0:
self.stdout.write(self.style.ERROR(f'\nTotal issues: {total_issues}'))
else:
self.stdout.write(self.style.SUCCESS('\nAll DOIs healthy'))
150 changes: 150 additions & 0 deletions dandiapi/api/management/commands/remediate_dois.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""
Management command to remediate historical fake/null DOIs.

Finds published versions with missing, null, or fake DOIs and
registers correct DOIs on DataCite. Also backfills concept DOIs
for dandisets that don't have them.

Note: This command was AI-generated (Claude Code).
"""

from __future__ import annotations

import logging
import time

from django.core.management.base import BaseCommand
import requests

from dandiapi.api.models import Version
from dandiapi.api.models.dandiset import Dandiset
from dandiapi.api.services.doi import create_dandiset_doi, create_published_version_doi
from dandiapi.api.services.doi.exceptions import DataCiteAPIError
from dandiapi.api.services.doi.utils import doi_configured, format_doi
from dandiapi.api.tasks import write_manifest_files

logger = logging.getLogger(__name__)

FAKE_DOI_PATTERN = '.123456/0.123456.1234'


class Command(BaseCommand):
help = 'Remediate published versions with fake/null DOIs and backfill concept DOIs.'

def add_arguments(self, parser):
parser.add_argument(
'--dry-run',
action='store_true',
help='Report what would be remediated without making changes.',
)
parser.add_argument(
'--delay',
type=float,
default=2.0,
help='Seconds to wait between DataCite API calls (default: 2.0).',
)

def handle(self, *args, **options):
dry_run = options['dry_run']

if not doi_configured():
self.stderr.write('DOI settings are not configured. Aborting.')
return

if dry_run:
self.stdout.write('=== DRY RUN — no changes will be made ===\n')

delay = options['delay']

# Phase 1: Fix published versions with fake or null DOIs
self._remediate_version_dois(dry_run=dry_run, delay=delay)

# Phase 2: Backfill concept DOIs for dandisets
self._backfill_concept_dois(dry_run=dry_run, delay=delay)

self.stdout.write('\nRemediation complete.')

def _remediate_version_dois(self, *, dry_run: bool, delay: float = 2.0):
"""Find and fix published versions with bad DOIs."""
self.stdout.write('\n--- Remediating version DOIs ---')

# Find versions with null DOI
null_doi_versions = Version.objects.filter(
doi__isnull=True,
).exclude(version='draft')

# Find versions with fake placeholder DOI
fake_doi_versions = Version.objects.filter(
doi__contains=FAKE_DOI_PATTERN,
).exclude(version='draft')

affected = list(null_doi_versions) + list(fake_doi_versions)
self.stdout.write(f'Found {len(affected)} versions with null or fake DOIs')

for version in affected:
real_doi = format_doi(version.dandiset.identifier, version.version)
self.stdout.write(
f' {version.dandiset.identifier}/{version.version}: {version.doi!r} -> {real_doi}'
)

if not dry_run:
try:
version.metadata['doi'] = real_doi
version.doi = real_doi
version.doi_state = 'pending'
version.save()

create_published_version_doi(version)

version.doi_state = 'findable'
version.save(update_fields=['doi_state'])

# Regenerate manifests
write_manifest_files.delay(version.id)

self.stdout.write(' OK — DOI minted and manifests queued')
except (DataCiteAPIError, requests.exceptions.RequestException) as e:
version.doi_state = 'failed'
version.save(update_fields=['doi_state'])
self.stderr.write(f' FAILED — {e}')

# Rate limit between DataCite API calls
time.sleep(delay)

def _backfill_concept_dois(self, *, dry_run: bool, delay: float = 2.0):
"""Backfill concept DOIs for dandisets that don't have them."""
self.stdout.write('\n--- Backfilling concept DOIs ---')

dandisets_without_concept_doi = Dandiset.objects.filter(concept_doi__isnull=True)
self.stdout.write(
f'Found {dandisets_without_concept_doi.count()} dandisets without concept DOI'
)

for dandiset in dandisets_without_concept_doi:
concept_doi = format_doi(dandiset.identifier)
has_published = dandiset.versions.exclude(version='draft').exists()

self.stdout.write(
f' {dandiset.identifier}: concept_doi={concept_doi} '
f'({"published" if has_published else "draft only"})'
)

if not dry_run:
try:
dandiset.concept_doi = concept_doi
dandiset.save(update_fields=['concept_doi'])

# Set concept DOI on draft version too
draft = dandiset.versions.filter(version='draft').first()
if draft:
draft.doi = concept_doi
draft.save(update_fields=['doi'])

# Register on DataCite
create_dandiset_doi(dandiset)

self.stdout.write(' OK — Draft concept DOI registered')
except (DataCiteAPIError, requests.exceptions.RequestException) as e:
self.stderr.write(f' FAILED — {e}')

time.sleep(delay)
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Generated by Django 5.2.7 on 2026-04-28 14:06
from __future__ import annotations

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
('api', '0032_remove_upload_embargoed_upload_zarr_and_more'),
]

operations = [
migrations.AddField(
model_name='dandiset',
name='concept_doi',
field=models.CharField(blank=True, default=None, max_length=64, null=True),
),
migrations.AddField(
model_name='version',
name='doi_state',
field=models.CharField(
blank=True,
choices=[
('draft', 'Draft'),
('findable', 'Findable'),
('pending', 'Pending'),
('failed', 'Failed'),
],
default=None,
max_length=20,
null=True,
),
),
]
Loading
Loading