Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 43 additions & 9 deletions learning_resources/etl/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from django.contrib.auth import get_user_model
from django.core.cache import caches
from django.db import transaction
from django.db.models import Q
from django.db.models import Max, Q

from learning_resources.constants import (
CONTENT_TYPE_PAGE,
Expand Down Expand Up @@ -657,6 +657,15 @@ class ProgramLoadResult(NamedTuple):
child_programs_data: list[dict]


class LoadedProgramCourse(NamedTuple):
"""A loaded course paired with the explicit child position to assign,
or None to fall back to the sequential index in load_program.
"""

position: int | None
resource: LearningResource


def load_program(
program_data: dict,
blocklist: list[str],
Expand Down Expand Up @@ -695,7 +704,6 @@ def load_program(
program_data.setdefault("delivery", [LearningResourceDelivery.online.name])
runs_data = program_data.get("runs", [])

course_resources = []
with transaction.atomic():
learning_resource, created = upsert_course_or_program(
program_data, [], [], LearningResourceType.program.name
Expand Down Expand Up @@ -729,16 +737,22 @@ def load_program(

load_run_dependent_values(learning_resource)

loaded_courses: list[LoadedProgramCourse] = []
for course_data in courses_data:
# skip courses that don't define a readable_id
if not course_data.get("readable_id", None):
continue

explicit_position = course_data.pop("position", None)
course_resource = load_course(
course_data, blocklist, duplicates, config=config.courses
)
if course_resource:
course_resources.append(course_resource)
loaded_courses.append(
LoadedProgramCourse(
position=explicit_position, resource=course_resource
)
)
# Replace all children with position-ordered course relationships.
# Pass 2 in load_programs() will re-create child-program
# relationships after all programs exist.
Expand All @@ -747,11 +761,13 @@ def load_program(
[
LearningResourceRelationship(
parent=learning_resource,
child=course_resource,
child=loaded.resource,
relation_type=LearningResourceRelationTypes.PROGRAM_COURSES,
position=position,
position=(
loaded.position if loaded.position is not None else fallback_idx
),
)
for position, course_resource in enumerate(course_resources)
for fallback_idx, loaded in enumerate(loaded_courses)
]
)

Expand Down Expand Up @@ -786,7 +802,20 @@ def _create_child_program_relationships(
)
}

next_position = parent_resource.children.count()
existing_max = parent_resource.children.aggregate(Max("position")).get(
"position__max"
)
explicit_max = max(
(
cpd["position"]
for cpd in child_programs_data
if cpd.get("position") is not None
),
default=-1,
)
next_position = (
max(existing_max if existing_max is not None else -1, explicit_max) + 1
)
Comment on lines +805 to +818
Copy link
Copy Markdown
Member Author

@mbertrand mbertrand May 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this comment is incorrect because of the two-pass design:

In pass 1, load_program calls learning_resource.children.all().delete(), which wipes out all prior children — both PROGRAM_COURSES and PROGRAM_PROGRAMS — and then bulk-creates fresh PROGRAM_COURSES with assigned positions.

Pass 2 only runs after pass 1 has committed. So when it computes existing_max = parent_resource.children.aggregate(Max("position")), the only children present are the freshly-created PROGRAM_COURSES from this run. Stale PROGRAM_PROGRAMS from prior runs are already gone.

kept_child_ids = set()
for child_program_data in child_programs_data:
readable_id = child_program_data.get("readable_id")
Expand All @@ -811,16 +840,21 @@ def _create_child_program_relationships(
if child_program_data.get("display_mode") == "course"
else LearningResourceRelationTypes.PROGRAM_PROGRAMS
)
explicit_position = child_program_data.get("position")
if explicit_position is not None:
position = explicit_position
else:
position = next_position
next_position += 1
_, _created = LearningResourceRelationship.objects.update_or_create(
parent=parent_resource,
child=child_resource,
defaults={
"relation_type": relation_type,
"position": next_position,
"position": position,
},
)
kept_child_ids.add(child_resource.id)
next_position += 1

# Remove stale child-program relationships no longer in req_tree.
# Only delete relationships whose child is a program resource;
Expand Down
205 changes: 174 additions & 31 deletions learning_resources/etl/loaders_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1406,28 +1406,37 @@ def test_load_programs(mocker, mock_blocklist, mock_duplicates):
mock_duplicates.assert_called_once_with("mitx")


def test_load_programs_with_child_program_relationships(mocker, settings):
"""End-to-end loader test for mixed child course/program relationships."""
@pytest.fixture
def mitxonline_program_children_fixture(mocker, settings):
"""
Set up topics/platform, patch _fetch_courses_by_ids, and return the parsed
mitxonline_program_children_loader JSON fixture.
"""
set_up_topics(is_mitx=True)
LearningResourcePlatformFactory.create(code=PlatformType.mitxonline.name)

with open("./test_json/mitxonline_program_children_loader.json") as f: # noqa: PTH123
fixture_data = json.load(f)

def _mock_fetch_courses_by_ids(course_ids):
return [
mocker.patch(
"learning_resources.etl.mitxonline._fetch_courses_by_ids",
side_effect=lambda course_ids: [
course
for course in fixture_data["courses"]
if course["id"] in set(course_ids)
]

mocker.patch(
"learning_resources.etl.mitxonline._fetch_courses_by_ids",
side_effect=_mock_fetch_courses_by_ids,
],
)
settings.MITX_ONLINE_BASE_URL = "https://mitxonline.mit.edu"
return fixture_data


transformed_programs = list(transform_programs(fixture_data["programs"]))
def test_load_programs_with_child_program_relationships(
mitxonline_program_children_fixture,
):
"""End-to-end loader test for mixed child course/program relationships."""
transformed_programs = list(
transform_programs(mitxonline_program_children_fixture["programs"])
)
load_programs(
ETLSource.mitxonline.name,
transformed_programs,
Expand Down Expand Up @@ -1470,43 +1479,177 @@ def _mock_fetch_courses_by_ids(course_ids):
assert child_program_positions == sorted(set(child_program_positions))


def test_load_programs_idempotent_child_relationships(mocker, settings):
"""Running load_programs twice should not duplicate child program relationships."""
set_up_topics(is_mitx=True)
LearningResourcePlatformFactory.create(code=PlatformType.mitxonline.name)

with open("./test_json/mitxonline_program_children_loader.json") as f: # noqa: PTH123
fixture_data = json.load(f)
def test_load_program_honors_explicit_course_position(mock_upsert_tasks):
"""An explicit `position` on each course_data entry should be honored, preserving gaps for pass 2 to fill."""
platform = LearningResourcePlatformFactory.create()
program = ProgramFactory.build(courses=[], platform=platform.code)
courses = CourseFactory.create_batch(3, platform=platform.code)

def _mock_fetch_courses_by_ids(course_ids):
return [
course
for course in fixture_data["courses"]
if course["id"] in set(course_ids)
]
# Simulate gaps where pass 2 would later insert display_mode="course"
# child programs at positions 0 and 2.
program_courses = [
{
"readable_id": courses[0].learning_resource.readable_id,
"platform": platform.code,
"availability": courses[0].learning_resource.availability,
"position": 1,
},
{
"readable_id": courses[1].learning_resource.readable_id,
"platform": platform.code,
"availability": courses[1].learning_resource.availability,
"position": 3,
},
{
"readable_id": courses[2].learning_resource.readable_id,
"platform": platform.code,
"availability": courses[2].learning_resource.availability,
"position": 4,
},
]

mocker.patch(
"learning_resources.etl.mitxonline._fetch_courses_by_ids",
side_effect=_mock_fetch_courses_by_ids,
result, _, _ = load_program(
{
"platform": platform.code,
"readable_id": program.learning_resource.readable_id,
"professional": False,
"title": program.learning_resource.title,
"url": program.learning_resource.url,
"image": {"url": program.learning_resource.image.url},
"published": True,
"runs": [
{
"run_id": program.learning_resource.readable_id,
"start_date": "2024-01-01T00:00:00Z",
"enrollment_start": "2023-12-01T00:00:00Z",
"end_date": "2024-06-01T00:00:00Z",
}
],
"availability": program.learning_resource.availability,
"courses": program_courses,
},
[],
[],
)
settings.MITX_ONLINE_BASE_URL = "https://mitxonline.mit.edu"

transformed = list(transform_programs(fixture_data["programs"]))
positions_by_readable = {
rel.child.readable_id: rel.position for rel in result.children.all()
}
assert positions_by_readable == {
courses[0].learning_resource.readable_id: 1,
courses[1].learning_resource.readable_id: 3,
courses[2].learning_resource.readable_id: 4,
}


# Run twice
def test_load_programs_orders_courses_by_req_tree_with_display_mode_course_children(
mitxonline_program_children_fixture,
):
"""
PROGRAM_COURSES children should land in req_tree order, interleaving
display_mode="course" sub-programs among regular courses.
"""
transformed = list(
transform_programs(mitxonline_program_children_fixture["programs"])
)
load_programs(
ETLSource.mitxonline.name,
transformed,
config=ProgramLoaderConfig(prune=False),
)
# Re-transform since transform_programs pops keys
transformed = list(transform_programs(fixture_data["programs"]))

parent_resource = LearningResource.objects.get(readable_id="mitx-parent-program")
program_courses = (
parent_resource.children.filter(
relation_type=LearningResourceRelationTypes.PROGRAM_COURSES.value
)
.order_by("position")
.values_list("child__readable_id", flat=True)
)
assert list(program_courses) == [
"course-10",
"course-70",
"mitx-child-program-displayed-as-course",
]


def test_load_programs_appends_program_program_children_after_courses(
mitxonline_program_children_fixture,
):
"""PROGRAM_PROGRAMS children must sit at positions strictly greater than every PROGRAM_COURSES position so pass 2's display_mode="course" children don't collide with them."""
transformed = list(
transform_programs(mitxonline_program_children_fixture["programs"])
)
load_programs(
ETLSource.mitxonline.name,
transformed,
config=ProgramLoaderConfig(prune=False),
)

parent_resource = LearningResource.objects.get(readable_id="mitx-parent-program")
program_courses_max = max(
parent_resource.children.filter(
relation_type=LearningResourceRelationTypes.PROGRAM_COURSES.value
).values_list("position", flat=True)
)
program_programs_positions = list(
parent_resource.children.filter(
relation_type=LearningResourceRelationTypes.PROGRAM_PROGRAMS.value
).values_list("position", flat=True)
)
assert program_programs_positions, "expected at least one PROGRAM_PROGRAMS child"
assert min(program_programs_positions) > program_courses_max


def test_create_child_program_relationships_uses_existing_max_position():
"""New child without explicit position should land at existing_max + 1."""
platform = LearningResourcePlatformFactory.create(code=PlatformType.mitxonline.name)
parent = ProgramFactory.create(
platform=platform.code,
learning_resource__readable_id="parent-with-existing-children",
).learning_resource
existing_child = CourseFactory.create(platform=platform.code).learning_resource
LearningResourceRelationship.objects.create(
parent=parent,
child=existing_child,
relation_type=LearningResourceRelationTypes.PROGRAM_COURSES.value,
position=5,
)
new_child_program = ProgramFactory.create(
platform=platform.code,
learning_resource__readable_id="new-child-program",
).learning_resource

loaders._create_child_program_relationships( # noqa: SLF001
[
(
parent,
[{"readable_id": new_child_program.readable_id}],
)
]
)

new_rel = LearningResourceRelationship.objects.get(
parent=parent, child=new_child_program
)
assert new_rel.position == 6


def test_load_programs_idempotent_child_relationships(
mitxonline_program_children_fixture,
):
"""Running load_programs twice should not duplicate child program relationships."""
for _ in range(2):
# Re-transform each iteration since transform_programs pops keys.
transformed = list(
transform_programs(mitxonline_program_children_fixture["programs"])
)
load_programs(
ETLSource.mitxonline.name,
transformed,
config=ProgramLoaderConfig(prune=False),
)

parent_resource = LearningResource.objects.get(readable_id="mitx-parent-program")
# Should have exactly 2 child-program relationships, not 4
program_child_count = parent_resource.children.filter(
Expand Down
Loading
Loading