From ba7546a200849714bfbe30449b7405af50080001 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Wed, 20 May 2026 10:29:12 -0400 Subject: [PATCH 1/5] Fix ordering for mitxonline programs --- learning_resources/etl/loaders.py | 50 ++++++- learning_resources/etl/loaders_test.py | 168 ++++++++++++++++++++++ learning_resources/etl/mitxonline.py | 79 +++++++++- learning_resources/etl/mitxonline_test.py | 144 ++++++++++++++++++- 4 files changed, 421 insertions(+), 20 deletions(-) diff --git a/learning_resources/etl/loaders.py b/learning_resources/etl/loaders.py index c25c1610b4..5bdc1b1f6d 100644 --- a/learning_resources/etl/loaders.py +++ b/learning_resources/etl/loaders.py @@ -8,7 +8,7 @@ from django.contrib.auth import get_user_model from django.core.cache import caches from django.db import transaction -from django.db.models import Q +from django.db.models import Max, Q from learning_resources.constants import ( CONTENT_TYPE_PAGE, @@ -657,6 +657,15 @@ class ProgramLoadResult(NamedTuple): child_programs_data: list[dict] +class LoadedProgramCourse(NamedTuple): + """A loaded course paired with the explicit child position to assign, + or None to fall back to the sequential index in load_program. + """ + + position: int | None + resource: LearningResource + + def load_program( program_data: dict, blocklist: list[str], @@ -729,16 +738,23 @@ def load_program( load_run_dependent_values(learning_resource) + loaded_courses: list[LoadedProgramCourse] = [] for course_data in courses_data: # skip courses that don't define a readable_id if not course_data.get("readable_id", None): continue + explicit_position = course_data.pop("position", None) course_resource = load_course( course_data, blocklist, duplicates, config=config.courses ) if course_resource: course_resources.append(course_resource) + loaded_courses.append( + LoadedProgramCourse( + position=explicit_position, resource=course_resource + ) + ) # Replace all children with position-ordered course relationships. # Pass 2 in load_programs() will re-create child-program # relationships after all programs exist. @@ -747,11 +763,13 @@ def load_program( [ LearningResourceRelationship( parent=learning_resource, - child=course_resource, + child=loaded.resource, relation_type=LearningResourceRelationTypes.PROGRAM_COURSES, - position=position, + position=( + loaded.position if loaded.position is not None else fallback_idx + ), ) - for position, course_resource in enumerate(course_resources) + for fallback_idx, loaded in enumerate(loaded_courses) ] ) @@ -786,7 +804,20 @@ def _create_child_program_relationships( ) } - next_position = parent_resource.children.count() + existing_max = parent_resource.children.aggregate(Max("position")).get( + "position__max" + ) + explicit_max = max( + ( + cpd["position"] + for cpd in child_programs_data + if cpd.get("position") is not None + ), + default=-1, + ) + next_position = ( + max(existing_max if existing_max is not None else -1, explicit_max) + 1 + ) kept_child_ids = set() for child_program_data in child_programs_data: readable_id = child_program_data.get("readable_id") @@ -811,16 +842,21 @@ def _create_child_program_relationships( if child_program_data.get("display_mode") == "course" else LearningResourceRelationTypes.PROGRAM_PROGRAMS ) + explicit_position = child_program_data.get("position") + if explicit_position is not None: + position = explicit_position + else: + position = next_position + next_position += 1 _, _created = LearningResourceRelationship.objects.update_or_create( parent=parent_resource, child=child_resource, defaults={ "relation_type": relation_type, - "position": next_position, + "position": position, }, ) kept_child_ids.add(child_resource.id) - next_position += 1 # Remove stale child-program relationships no longer in req_tree. # Only delete relationships whose child is a program resource; diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py index be089ddfc7..e7cbced375 100644 --- a/learning_resources/etl/loaders_test.py +++ b/learning_resources/etl/loaders_test.py @@ -1470,6 +1470,174 @@ def _mock_fetch_courses_by_ids(course_ids): assert child_program_positions == sorted(set(child_program_positions)) +def test_load_program_honors_explicit_course_position(mock_upsert_tasks): + """ + load_program should honor an explicit `position` field on each + course_data entry, leaving positional gaps for items pass 2 will fill. + """ + platform = LearningResourcePlatformFactory.create() + program = ProgramFactory.build(courses=[], platform=platform.code) + courses = CourseFactory.create_batch(3, platform=platform.code) + + # Simulate gaps where pass 2 would later insert display_mode="course" + # child programs at positions 0 and 2. + program_courses = [ + { + "readable_id": courses[0].learning_resource.readable_id, + "platform": platform.code, + "availability": courses[0].learning_resource.availability, + "position": 1, + }, + { + "readable_id": courses[1].learning_resource.readable_id, + "platform": platform.code, + "availability": courses[1].learning_resource.availability, + "position": 3, + }, + { + "readable_id": courses[2].learning_resource.readable_id, + "platform": platform.code, + "availability": courses[2].learning_resource.availability, + "position": 4, + }, + ] + + result, _, _ = load_program( + { + "platform": platform.code, + "readable_id": program.learning_resource.readable_id, + "professional": False, + "title": program.learning_resource.title, + "url": program.learning_resource.url, + "image": {"url": program.learning_resource.image.url}, + "published": True, + "runs": [ + { + "run_id": program.learning_resource.readable_id, + "start_date": "2024-01-01T00:00:00Z", + "enrollment_start": "2023-12-01T00:00:00Z", + "end_date": "2024-06-01T00:00:00Z", + } + ], + "availability": program.learning_resource.availability, + "courses": program_courses, + }, + [], + [], + ) + + positions_by_readable = { + rel.child.readable_id: rel.position for rel in result.children.all() + } + assert positions_by_readable == { + courses[0].learning_resource.readable_id: 1, + courses[1].learning_resource.readable_id: 3, + courses[2].learning_resource.readable_id: 4, + } + + +def test_load_programs_orders_courses_by_req_tree_with_display_mode_course_children( + mocker, settings +): + """ + A parent program whose req_tree mixes courses and display_mode="course" + sub-programs (the Universal AI shape) must end up with PROGRAM_COURSES + children in req_tree order — interleaving the display_mode="course" + sub-program among the courses according to the req_tree, not appending + it after. + """ + set_up_topics(is_mitx=True) + LearningResourcePlatformFactory.create(code=PlatformType.mitxonline.name) + + with open("./test_json/mitxonline_program_children_loader.json") as f: # noqa: PTH123 + fixture_data = json.load(f) + + def _mock_fetch_courses_by_ids(course_ids): + return [ + course + for course in fixture_data["courses"] + if course["id"] in set(course_ids) + ] + + mocker.patch( + "learning_resources.etl.mitxonline._fetch_courses_by_ids", + side_effect=_mock_fetch_courses_by_ids, + ) + settings.MITX_ONLINE_BASE_URL = "https://mitxonline.mit.edu" + + transformed = list(transform_programs(fixture_data["programs"])) + load_programs( + ETLSource.mitxonline.name, + transformed, + config=ProgramLoaderConfig(prune=False), + ) + + parent_resource = LearningResource.objects.get(readable_id="mitx-parent-program") + # The parent's req_tree is: course 10, program 1001 (display_mode=program, + # expands to course 70), program 1002 (display_mode=course → PROGRAM_COURSES child). + # Expected PROGRAM_COURSES order: course-10, course-70, mitx-child-program-displayed-as-course + program_courses = ( + parent_resource.children.filter( + relation_type=LearningResourceRelationTypes.PROGRAM_COURSES.value + ) + .order_by("position") + .values_list("child__readable_id", flat=True) + ) + assert list(program_courses) == [ + "course-10", + "course-70", + "mitx-child-program-displayed-as-course", + ] + + +def test_load_programs_appends_program_program_children_after_courses(mocker, settings): + """ + PROGRAM_PROGRAMS children (display_mode != "course") have no explicit + position; they must land at positions strictly greater than every + PROGRAM_COURSES position to avoid colliding with display_mode="course" + children inserted in pass 2. + """ + set_up_topics(is_mitx=True) + LearningResourcePlatformFactory.create(code=PlatformType.mitxonline.name) + + with open("./test_json/mitxonline_program_children_loader.json") as f: # noqa: PTH123 + fixture_data = json.load(f) + + def _mock_fetch_courses_by_ids(course_ids): + return [ + course + for course in fixture_data["courses"] + if course["id"] in set(course_ids) + ] + + mocker.patch( + "learning_resources.etl.mitxonline._fetch_courses_by_ids", + side_effect=_mock_fetch_courses_by_ids, + ) + settings.MITX_ONLINE_BASE_URL = "https://mitxonline.mit.edu" + + transformed = list(transform_programs(fixture_data["programs"])) + load_programs( + ETLSource.mitxonline.name, + transformed, + config=ProgramLoaderConfig(prune=False), + ) + + parent_resource = LearningResource.objects.get(readable_id="mitx-parent-program") + program_courses_max = max( + parent_resource.children.filter( + relation_type=LearningResourceRelationTypes.PROGRAM_COURSES.value + ).values_list("position", flat=True) + ) + program_programs_positions = list( + parent_resource.children.filter( + relation_type=LearningResourceRelationTypes.PROGRAM_PROGRAMS.value + ).values_list("position", flat=True) + ) + assert program_programs_positions, "expected at least one PROGRAM_PROGRAMS child" + assert min(program_programs_positions) > program_courses_max + + def test_load_programs_idempotent_child_relationships(mocker, settings): """Running load_programs twice should not duplicate child program relationships.""" set_up_topics(is_mitx=True) diff --git a/learning_resources/etl/mitxonline.py b/learning_resources/etl/mitxonline.py index f4a69d2c28..9e207b920c 100644 --- a/learning_resources/etl/mitxonline.py +++ b/learning_resources/etl/mitxonline.py @@ -516,11 +516,69 @@ def _collect_program_ids( _collect_program_ids(node.get("children", []), seen_ids, program_ids) +def get_child_positions( + nodes: list[dict], + programs_by_id: dict[int, dict] | None = None, +) -> dict[tuple[str, int], int]: + """ + Return a {(resource_type, id): position} map for items that appear as + PROGRAM_COURSES children of a program, in req_tree traversal order. + + resource_type is "course" for course nodes and "program" for program nodes + whose program has display_mode="course". + + Positions are stamped onto transformed courses and child_programs in + transform_programs so the loader can persist req_tree order across + pass 1 (courses) and pass 2 (display_mode="course" sub-programs). + """ + positions: dict[tuple[str, int], int] = {} + visited_programs: set[int] = set() + _collect_child_positions(nodes, programs_by_id, visited_programs, positions) + return positions + + +def _collect_child_positions( + nodes: list[dict], + programs_by_id: dict[int, dict] | None, + visited_programs: set[int], + positions: dict[tuple[str, int], int], +) -> None: + """Recursive helper for get_child_positions.""" + for node in nodes: + data = node.get("data", {}) + node_type = data.get("node_type") + if node_type == "course": + cid = data.get("course") + if isinstance(cid, int) and ("course", cid) not in positions: + positions[("course", cid)] = len(positions) + elif node_type == "program" and programs_by_id: + pid = data.get("required_program") + if isinstance(pid, int) and pid not in visited_programs: + visited_programs.add(pid) + child = programs_by_id.get(pid) + if child and child.get("display_mode") == "course": + if ("program", pid) not in positions: + positions[("program", pid)] = len(positions) + elif child: + _collect_child_positions( + child.get("req_tree", []), + programs_by_id, + visited_programs, + positions, + ) + _collect_child_positions( + node.get("children", []), + programs_by_id, + visited_programs, + positions, + ) + + def _fetch_courses_by_ids(course_ids): if not course_ids: return [] if settings.MITX_ONLINE_COURSES_API_URL: - return list( + fetched = list( _fetch_data( settings.MITX_ONLINE_COURSES_API_URL, params={ @@ -529,6 +587,8 @@ def _fetch_courses_by_ids(course_ids): }, ) ) + courses_by_id = {course["id"]: course for course in fetched} + return [courses_by_id[cid] for cid in course_ids if cid in courses_by_id] log.warning("Missing required setting MITX_ONLINE_COURSES_API_URL") return [] @@ -548,17 +608,23 @@ def transform_programs(programs: list[dict]) -> Iterator[dict]: # normalize the MITx Online data programs_by_id = {p["id"]: p for p in programs} for program in programs: + child_positions = get_child_positions( + program.get("req_tree", []), programs_by_id + ) + fetched_courses = _fetch_courses_by_ids( + get_course_ids_from_req_tree(program.get("req_tree", []), programs_by_id) + ) + mitx_id_by_readable = {c["readable_id"]: c["id"] for c in fetched_courses} courses = transform_courses( [ course - for course in _fetch_courses_by_ids( - get_course_ids_from_req_tree( - program.get("req_tree", []), programs_by_id - ) - ) + for course in fetched_courses if not re.search(EXCLUDE_REGEX, course["title"], re.IGNORECASE) ] ) + for course in courses: + mitx_id = mitx_id_by_readable.get(course["readable_id"]) + course["position"] = child_positions.get(("course", mitx_id)) pace = sorted( {course_pace for course in courses for course_pace in course["pace"]} ) @@ -616,6 +682,7 @@ def transform_programs(programs: list[dict]) -> Iterator[dict]: { "readable_id": programs_by_id[pid]["readable_id"], "display_mode": programs_by_id[pid].get("display_mode"), + "position": child_positions.get(("program", pid)), } ) has_certification = parse_certification(OFFERED_BY["code"], [run]) diff --git a/learning_resources/etl/mitxonline_test.py b/learning_resources/etl/mitxonline_test.py index ebbe5320a2..947009425c 100644 --- a/learning_resources/etl/mitxonline_test.py +++ b/learning_resources/etl/mitxonline_test.py @@ -30,6 +30,7 @@ _transform_run, extract_courses, extract_programs, + get_child_positions, get_course_ids_from_req_tree, get_program_ids_from_req_tree, is_fully_enrollable, @@ -369,6 +370,106 @@ def test_get_course_ids_from_req_tree_display_mode_course(): assert result == [10, 500] +def test_get_child_positions_interleaves_courses_and_display_mode_course_programs(): + """ + Courses and display_mode="course" sub-programs share the PROGRAM_COURSES + children of a parent and must be positioned in req_tree traversal order. + """ + programs_by_id = { + 36: {"display_mode": "course", "req_tree": []}, + 37: {"display_mode": "course", "req_tree": []}, + } + req_tree = [ + { + "data": {"node_type": "operator", "operator": "all_of"}, + "id": 1, + "children": [ + { + "data": {"node_type": "program", "required_program": 36}, + "id": 2, + }, + { + "data": {"node_type": "program", "required_program": 37}, + "id": 3, + }, + {"data": {"node_type": "course", "course": 159}, "id": 4}, + {"data": {"node_type": "course", "course": 152}, "id": 5}, + ], + } + ] + assert get_child_positions(req_tree, programs_by_id) == { + ("program", 36): 0, + ("program", 37): 1, + ("course", 159): 2, + ("course", 152): 3, + } + + +def test_get_child_positions_recurses_through_display_mode_program(): + """ + A display_mode="program" child has its req_tree expanded inline; the + program itself does not occupy a PROGRAM_COURSES position. + """ + programs_by_id = { + 50: { + "display_mode": "program", + "req_tree": [ + {"data": {"node_type": "course", "course": 600}, "id": 100}, + {"data": {"node_type": "course", "course": 601}, "id": 101}, + ], + }, + } + req_tree = [ + {"data": {"node_type": "course", "course": 10}, "id": 1}, + { + "data": {"node_type": "program", "required_program": 50}, + "id": 2, + }, + {"data": {"node_type": "course", "course": 20}, "id": 3}, + ] + # Program 50's courses (600, 601) take the slot between courses 10 and 20. + # Program 50 itself gets no PROGRAM_COURSES position. + assert get_child_positions(req_tree, programs_by_id) == { + ("course", 10): 0, + ("course", 600): 1, + ("course", 601): 2, + ("course", 20): 3, + } + + +def test_get_child_positions_handles_circular_reference(): + """Circular program references must not loop infinitely.""" + programs_by_id = { + 1: { + "display_mode": "program", + "req_tree": [ + {"data": {"node_type": "program", "required_program": 2}, "id": 10}, + {"data": {"node_type": "course", "course": 100}, "id": 11}, + ], + }, + 2: { + "display_mode": "program", + "req_tree": [ + {"data": {"node_type": "program", "required_program": 1}, "id": 20}, + {"data": {"node_type": "course", "course": 200}, "id": 21}, + ], + }, + } + # Start from program 1's req_tree (as transform_programs would). + positions = get_child_positions(programs_by_id[1]["req_tree"], programs_by_id) + assert set(positions.keys()) == {("course", 100), ("course", 200)} + + +def test_get_child_positions_skips_program_nodes_without_programs_by_id(): + """Program nodes are ignored if programs_by_id is None or empty.""" + req_tree = [ + {"data": {"node_type": "course", "course": 10}, "id": 1}, + {"data": {"node_type": "program", "required_program": 999}, "id": 2}, + ] + assert get_child_positions(req_tree, None) == {("course", 10): 0} + assert get_child_positions(req_tree, {}) == {("course", 10): 0} + + @pytest.mark.parametrize( ("req_tree", "expected_ids"), [ @@ -533,13 +634,19 @@ def test_mitxonline_transform_programs( result = transform_programs(mock_mitxonline_programs_data["results"]) expected = [] + programs_by_id = {p["id"]: p for p in mock_mitxonline_programs_data["results"]} + courses_by_id = {c["id"]: c for c in mock_mitxonline_courses_data["results"]} for program_data in mock_mitxonline_programs_data["results"]: + program_course_ids = get_course_ids_from_req_tree( + program_data.get("req_tree", []), programs_by_id + ) + child_positions = get_child_positions( + program_data.get("req_tree", []), programs_by_id + ) expected_courses = [] - for course_data in sorted( - mock_mitxonline_courses_data["results"], - key=lambda x: x["readable_id"], - ): - if "PROCTORED EXAM" in course_data["title"]: + for cid in program_course_ids: + course_data = courses_by_id.get(cid) + if course_data is None or "PROCTORED EXAM" in course_data["title"]: continue has_course_product_page = bool( parse_page_attribute(course_data, "page_url") @@ -611,6 +718,7 @@ def test_mitxonline_transform_programs( } ] }, + "position": child_positions.get(("course", cid)), } ) has_program_product_page = bool(parse_page_attribute(program_data, "page_url")) @@ -626,6 +734,10 @@ def test_mitxonline_transform_programs( else None ) ) + # Pace is derived from the program's courses, so it depends on which + # courses survived the req_tree filter — programs whose req_tree + # references courses absent from the courses fixture get an empty pace. + expected_pace = sorted({p for c in expected_courses for p in c["pace"]}) expected.append( { "readable_id": program_data["readable_id"], @@ -662,7 +774,7 @@ def test_mitxonline_transform_programs( "availability": program_data["availability"], "topics": transform_topics(program_data["topics"], OFFERED_BY["code"]), "format": [Format.asynchronous.name], - "pace": [Pace.instructor_paced.name], + "pace": expected_pace, "runs": [ { "run_id": program_data["readable_id"], @@ -690,7 +802,7 @@ def test_mitxonline_transform_programs( else RunStatus.archived.value, "availability": program_data["availability"], "format": [Format.asynchronous.name], - "pace": [Pace.instructor_paced.name], + "pace": expected_pace, "duration": program_data.get("duration") or "", "time_commitment": program_data.get("time_commitment") or "", "min_weeks": program_data.get("min_weeks"), @@ -1386,3 +1498,21 @@ def test_fetch_courses_by_ids_empty_list(mocker, settings): result = _fetch_courses_by_ids([]) assert result == [] mock_fetch.assert_not_called() + + +def test_fetch_courses_by_ids_preserves_requested_order(mocker, settings): + """ + The MITx Online API returns courses in its own order; _fetch_courses_by_ids + must restore the requested ID order. Missing courses are silently dropped. + """ + settings.MITX_ONLINE_COURSES_API_URL = "http://localhost/test/courses/api" + # API returns reversed order, and omits id=30 (e.g. no longer live). + mocker.patch( + "learning_resources.etl.mitxonline._fetch_data", + return_value=[ + {"id": 20, "readable_id": "course-v1:T+B"}, + {"id": 10, "readable_id": "course-v1:T+A"}, + ], + ) + result = _fetch_courses_by_ids([10, 20, 30]) + assert [c["id"] for c in result] == [10, 20] From f7dc7ad42cfd9d888094d7fb7b72a97f62723521 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Wed, 20 May 2026 11:35:36 -0400 Subject: [PATCH 2/5] feedback --- learning_resources/etl/loaders.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/learning_resources/etl/loaders.py b/learning_resources/etl/loaders.py index 5bdc1b1f6d..08ac72e60a 100644 --- a/learning_resources/etl/loaders.py +++ b/learning_resources/etl/loaders.py @@ -704,7 +704,6 @@ def load_program( program_data.setdefault("delivery", [LearningResourceDelivery.online.name]) runs_data = program_data.get("runs", []) - course_resources = [] with transaction.atomic(): learning_resource, created = upsert_course_or_program( program_data, [], [], LearningResourceType.program.name @@ -749,7 +748,6 @@ def load_program( course_data, blocklist, duplicates, config=config.courses ) if course_resource: - course_resources.append(course_resource) loaded_courses.append( LoadedProgramCourse( position=explicit_position, resource=course_resource From ed214ffbf3cac58ec35dd9c8d541e9db060d3434 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Wed, 20 May 2026 12:08:04 -0400 Subject: [PATCH 3/5] Simplify tests --- learning_resources/etl/loaders_test.py | 149 +++++++--------------- learning_resources/etl/mitxonline_test.py | 15 +-- 2 files changed, 49 insertions(+), 115 deletions(-) diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py index e7cbced375..5b8ec4cde8 100644 --- a/learning_resources/etl/loaders_test.py +++ b/learning_resources/etl/loaders_test.py @@ -1406,28 +1406,37 @@ def test_load_programs(mocker, mock_blocklist, mock_duplicates): mock_duplicates.assert_called_once_with("mitx") -def test_load_programs_with_child_program_relationships(mocker, settings): - """End-to-end loader test for mixed child course/program relationships.""" +@pytest.fixture +def mitxonline_program_children_fixture(mocker, settings): + """ + Set up topics/platform, patch _fetch_courses_by_ids, and return the parsed + mitxonline_program_children_loader JSON fixture. + """ set_up_topics(is_mitx=True) LearningResourcePlatformFactory.create(code=PlatformType.mitxonline.name) with open("./test_json/mitxonline_program_children_loader.json") as f: # noqa: PTH123 fixture_data = json.load(f) - def _mock_fetch_courses_by_ids(course_ids): - return [ + mocker.patch( + "learning_resources.etl.mitxonline._fetch_courses_by_ids", + side_effect=lambda course_ids: [ course for course in fixture_data["courses"] if course["id"] in set(course_ids) - ] - - mocker.patch( - "learning_resources.etl.mitxonline._fetch_courses_by_ids", - side_effect=_mock_fetch_courses_by_ids, + ], ) settings.MITX_ONLINE_BASE_URL = "https://mitxonline.mit.edu" + return fixture_data + - transformed_programs = list(transform_programs(fixture_data["programs"])) +def test_load_programs_with_child_program_relationships( + mitxonline_program_children_fixture, +): + """End-to-end loader test for mixed child course/program relationships.""" + transformed_programs = list( + transform_programs(mitxonline_program_children_fixture["programs"]) + ) load_programs( ETLSource.mitxonline.name, transformed_programs, @@ -1471,10 +1480,7 @@ def _mock_fetch_courses_by_ids(course_ids): def test_load_program_honors_explicit_course_position(mock_upsert_tasks): - """ - load_program should honor an explicit `position` field on each - course_data entry, leaving positional gaps for items pass 2 will fill. - """ + """An explicit `position` on each course_data entry should be honored, preserving gaps for pass 2 to fill.""" platform = LearningResourcePlatformFactory.create() program = ProgramFactory.build(courses=[], platform=platform.code) courses = CourseFactory.create_batch(3, platform=platform.code) @@ -1537,35 +1543,15 @@ def test_load_program_honors_explicit_course_position(mock_upsert_tasks): def test_load_programs_orders_courses_by_req_tree_with_display_mode_course_children( - mocker, settings + mitxonline_program_children_fixture, ): """ - A parent program whose req_tree mixes courses and display_mode="course" - sub-programs (the Universal AI shape) must end up with PROGRAM_COURSES - children in req_tree order — interleaving the display_mode="course" - sub-program among the courses according to the req_tree, not appending - it after. + PROGRAM_COURSES children should land in req_tree order, interleaving + display_mode="course" sub-programs among regular courses. """ - set_up_topics(is_mitx=True) - LearningResourcePlatformFactory.create(code=PlatformType.mitxonline.name) - - with open("./test_json/mitxonline_program_children_loader.json") as f: # noqa: PTH123 - fixture_data = json.load(f) - - def _mock_fetch_courses_by_ids(course_ids): - return [ - course - for course in fixture_data["courses"] - if course["id"] in set(course_ids) - ] - - mocker.patch( - "learning_resources.etl.mitxonline._fetch_courses_by_ids", - side_effect=_mock_fetch_courses_by_ids, + transformed = list( + transform_programs(mitxonline_program_children_fixture["programs"]) ) - settings.MITX_ONLINE_BASE_URL = "https://mitxonline.mit.edu" - - transformed = list(transform_programs(fixture_data["programs"])) load_programs( ETLSource.mitxonline.name, transformed, @@ -1573,9 +1559,8 @@ def _mock_fetch_courses_by_ids(course_ids): ) parent_resource = LearningResource.objects.get(readable_id="mitx-parent-program") - # The parent's req_tree is: course 10, program 1001 (display_mode=program, - # expands to course 70), program 1002 (display_mode=course → PROGRAM_COURSES child). - # Expected PROGRAM_COURSES order: course-10, course-70, mitx-child-program-displayed-as-course + # Parent req_tree: course 10, program 1001 (display_mode=program → expands to + # course 70), program 1002 (display_mode=course → PROGRAM_COURSES child). program_courses = ( parent_resource.children.filter( relation_type=LearningResourceRelationTypes.PROGRAM_COURSES.value @@ -1590,33 +1575,13 @@ def _mock_fetch_courses_by_ids(course_ids): ] -def test_load_programs_appends_program_program_children_after_courses(mocker, settings): - """ - PROGRAM_PROGRAMS children (display_mode != "course") have no explicit - position; they must land at positions strictly greater than every - PROGRAM_COURSES position to avoid colliding with display_mode="course" - children inserted in pass 2. - """ - set_up_topics(is_mitx=True) - LearningResourcePlatformFactory.create(code=PlatformType.mitxonline.name) - - with open("./test_json/mitxonline_program_children_loader.json") as f: # noqa: PTH123 - fixture_data = json.load(f) - - def _mock_fetch_courses_by_ids(course_ids): - return [ - course - for course in fixture_data["courses"] - if course["id"] in set(course_ids) - ] - - mocker.patch( - "learning_resources.etl.mitxonline._fetch_courses_by_ids", - side_effect=_mock_fetch_courses_by_ids, +def test_load_programs_appends_program_program_children_after_courses( + mitxonline_program_children_fixture, +): + """PROGRAM_PROGRAMS children must sit at positions strictly greater than every PROGRAM_COURSES position so pass 2's display_mode="course" children don't collide with them.""" + transformed = list( + transform_programs(mitxonline_program_children_fixture["programs"]) ) - settings.MITX_ONLINE_BASE_URL = "https://mitxonline.mit.edu" - - transformed = list(transform_programs(fixture_data["programs"])) load_programs( ETLSource.mitxonline.name, transformed, @@ -1638,42 +1603,20 @@ def _mock_fetch_courses_by_ids(course_ids): assert min(program_programs_positions) > program_courses_max -def test_load_programs_idempotent_child_relationships(mocker, settings): +def test_load_programs_idempotent_child_relationships( + mitxonline_program_children_fixture, +): """Running load_programs twice should not duplicate child program relationships.""" - set_up_topics(is_mitx=True) - LearningResourcePlatformFactory.create(code=PlatformType.mitxonline.name) - - with open("./test_json/mitxonline_program_children_loader.json") as f: # noqa: PTH123 - fixture_data = json.load(f) - - def _mock_fetch_courses_by_ids(course_ids): - return [ - course - for course in fixture_data["courses"] - if course["id"] in set(course_ids) - ] - - mocker.patch( - "learning_resources.etl.mitxonline._fetch_courses_by_ids", - side_effect=_mock_fetch_courses_by_ids, - ) - settings.MITX_ONLINE_BASE_URL = "https://mitxonline.mit.edu" - - transformed = list(transform_programs(fixture_data["programs"])) - - # Run twice - load_programs( - ETLSource.mitxonline.name, - transformed, - config=ProgramLoaderConfig(prune=False), - ) - # Re-transform since transform_programs pops keys - transformed = list(transform_programs(fixture_data["programs"])) - load_programs( - ETLSource.mitxonline.name, - transformed, - config=ProgramLoaderConfig(prune=False), - ) + for _ in range(2): + # Re-transform each iteration since transform_programs pops keys. + transformed = list( + transform_programs(mitxonline_program_children_fixture["programs"]) + ) + load_programs( + ETLSource.mitxonline.name, + transformed, + config=ProgramLoaderConfig(prune=False), + ) parent_resource = LearningResource.objects.get(readable_id="mitx-parent-program") # Should have exactly 2 child-program relationships, not 4 diff --git a/learning_resources/etl/mitxonline_test.py b/learning_resources/etl/mitxonline_test.py index 947009425c..4672602bab 100644 --- a/learning_resources/etl/mitxonline_test.py +++ b/learning_resources/etl/mitxonline_test.py @@ -371,10 +371,7 @@ def test_get_course_ids_from_req_tree_display_mode_course(): def test_get_child_positions_interleaves_courses_and_display_mode_course_programs(): - """ - Courses and display_mode="course" sub-programs share the PROGRAM_COURSES - children of a parent and must be positioned in req_tree traversal order. - """ + """Courses and display_mode="course" sub-programs share PROGRAM_COURSES slots in req_tree traversal order.""" programs_by_id = { 36: {"display_mode": "course", "req_tree": []}, 37: {"display_mode": "course", "req_tree": []}, @@ -406,10 +403,7 @@ def test_get_child_positions_interleaves_courses_and_display_mode_course_program def test_get_child_positions_recurses_through_display_mode_program(): - """ - A display_mode="program" child has its req_tree expanded inline; the - program itself does not occupy a PROGRAM_COURSES position. - """ + """A display_mode="program" child's req_tree is expanded inline; the program itself takes no slot.""" programs_by_id = { 50: { "display_mode": "program", @@ -1501,10 +1495,7 @@ def test_fetch_courses_by_ids_empty_list(mocker, settings): def test_fetch_courses_by_ids_preserves_requested_order(mocker, settings): - """ - The MITx Online API returns courses in its own order; _fetch_courses_by_ids - must restore the requested ID order. Missing courses are silently dropped. - """ + """Requested ID order must be restored from the API response, with missing IDs silently dropped.""" settings.MITX_ONLINE_COURSES_API_URL = "http://localhost/test/courses/api" # API returns reversed order, and omits id=30 (e.g. no longer live). mocker.patch( From 7c1cc3db345e30090aab27a30892b60d8c9322e0 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Wed, 20 May 2026 12:33:43 -0400 Subject: [PATCH 4/5] Clean up tests --- learning_resources/etl/loaders_test.py | 36 +++++++++++++++++++++-- learning_resources/etl/mitxonline_test.py | 34 +++++++++++---------- 2 files changed, 53 insertions(+), 17 deletions(-) diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py index 5b8ec4cde8..ac9eec1714 100644 --- a/learning_resources/etl/loaders_test.py +++ b/learning_resources/etl/loaders_test.py @@ -1559,8 +1559,6 @@ def test_load_programs_orders_courses_by_req_tree_with_display_mode_course_child ) parent_resource = LearningResource.objects.get(readable_id="mitx-parent-program") - # Parent req_tree: course 10, program 1001 (display_mode=program → expands to - # course 70), program 1002 (display_mode=course → PROGRAM_COURSES child). program_courses = ( parent_resource.children.filter( relation_type=LearningResourceRelationTypes.PROGRAM_COURSES.value @@ -1603,6 +1601,40 @@ def test_load_programs_appends_program_program_children_after_courses( assert min(program_programs_positions) > program_courses_max +def test_create_child_program_relationships_uses_existing_max_position(): + """New child without explicit position should land at existing_max + 1.""" + platform = LearningResourcePlatformFactory.create(code=PlatformType.mitxonline.name) + parent = ProgramFactory.create( + platform=platform.code, + learning_resource__readable_id="parent-with-existing-children", + ).learning_resource + existing_child = CourseFactory.create(platform=platform.code).learning_resource + LearningResourceRelationship.objects.create( + parent=parent, + child=existing_child, + relation_type=LearningResourceRelationTypes.PROGRAM_COURSES.value, + position=5, + ) + new_child_program = ProgramFactory.create( + platform=platform.code, + learning_resource__readable_id="new-child-program", + ).learning_resource + + loaders._create_child_program_relationships( # noqa: SLF001 + [ + ( + parent, + [{"readable_id": new_child_program.readable_id}], + ) + ] + ) + + new_rel = LearningResourceRelationship.objects.get( + parent=parent, child=new_child_program + ) + assert new_rel.position == 6 + + def test_load_programs_idempotent_child_relationships( mitxonline_program_children_fixture, ): diff --git a/learning_resources/etl/mitxonline_test.py b/learning_resources/etl/mitxonline_test.py index 4672602bab..99eeca7225 100644 --- a/learning_resources/etl/mitxonline_test.py +++ b/learning_resources/etl/mitxonline_test.py @@ -298,10 +298,7 @@ def test_get_course_ids_from_req_tree_circular_reference(): ], }, } - # Starting from program 1's req_tree result = get_course_ids_from_req_tree(programs_by_id[1]["req_tree"], programs_by_id) - # Should get course 200 (from program 2) and course 100 (from program 1) - # but NOT recurse infinitely back into program 1 from program 2 assert set(result) == {100, 200} @@ -365,8 +362,6 @@ def test_get_course_ids_from_req_tree_display_mode_course(): } ] result = get_course_ids_from_req_tree(req_tree, programs_by_id) - # Course 10 from direct child, course 500 from program 60 (normal program), - # but NOT courses 300/400 from program 50 (display_mode="course") assert result == [10, 500] @@ -421,8 +416,6 @@ def test_get_child_positions_recurses_through_display_mode_program(): }, {"data": {"node_type": "course", "course": 20}, "id": 3}, ] - # Program 50's courses (600, 601) take the slot between courses 10 and 20. - # Program 50 itself gets no PROGRAM_COURSES position. assert get_child_positions(req_tree, programs_by_id) == { ("course", 10): 0, ("course", 600): 1, @@ -449,7 +442,6 @@ def test_get_child_positions_handles_circular_reference(): ], }, } - # Start from program 1's req_tree (as transform_programs would). positions = get_child_positions(programs_by_id[1]["req_tree"], programs_by_id) assert set(positions.keys()) == {("course", 100), ("course", 200)} @@ -464,6 +456,24 @@ def test_get_child_positions_skips_program_nodes_without_programs_by_id(): assert get_child_positions(req_tree, {}) == {("course", 10): 0} +def test_transform_programs_stamps_position_on_child_programs(mocker, settings): + """Only display_mode="course" sub-programs get a stamped position.""" + settings.MITX_ONLINE_BASE_URL = "https://mitxonline.mit.edu" + settings.APP_BASE_URL = "https://learn.example.test/" + mocker.patch( + "learning_resources.etl.mitxonline._fetch_courses_by_ids", + return_value=[], + ) + with open("./test_json/mitxonline_child_program_positions.json") as f: # noqa: PTH123 + programs = json.load(f) + + result = list(transform_programs(programs)) + parent = next(p for p in result if p["readable_id"] == "parent") + by_readable = {cp["readable_id"]: cp for cp in parent["child_programs"]} + assert by_readable["child-as-course"]["position"] == 1 + assert by_readable["child-as-program"]["position"] is None + + @pytest.mark.parametrize( ("req_tree", "expected_ids"), [ @@ -634,9 +644,6 @@ def test_mitxonline_transform_programs( program_course_ids = get_course_ids_from_req_tree( program_data.get("req_tree", []), programs_by_id ) - child_positions = get_child_positions( - program_data.get("req_tree", []), programs_by_id - ) expected_courses = [] for cid in program_course_ids: course_data = courses_by_id.get(cid) @@ -712,7 +719,7 @@ def test_mitxonline_transform_programs( } ] }, - "position": child_positions.get(("course", cid)), + "position": len(expected_courses), } ) has_program_product_page = bool(parse_page_attribute(program_data, "page_url")) @@ -728,9 +735,6 @@ def test_mitxonline_transform_programs( else None ) ) - # Pace is derived from the program's courses, so it depends on which - # courses survived the req_tree filter — programs whose req_tree - # references courses absent from the courses fixture get an empty pace. expected_pace = sorted({p for c in expected_courses for p in c["pace"]}) expected.append( { From 90cfe8b3114a2f7050a8b4f6cefcbcbcf73a4e7e Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Wed, 20 May 2026 12:49:28 -0400 Subject: [PATCH 5/5] new test json --- .../mitxonline_child_program_positions.json | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 test_json/mitxonline_child_program_positions.json diff --git a/test_json/mitxonline_child_program_positions.json b/test_json/mitxonline_child_program_positions.json new file mode 100644 index 0000000000..ac6a8b6ac5 --- /dev/null +++ b/test_json/mitxonline_child_program_positions.json @@ -0,0 +1,72 @@ +[ + { + "id": 1, + "readable_id": "parent", + "title": "Parent", + "departments": [], + "topics": [], + "availability": "anytime", + "display_mode": "program", + "page": { + "page_url": null, + "live": true, + "include_in_learn_catalog": true + }, + "req_tree": [ + { + "data": { "node_type": "operator", "operator": "all_of" }, + "id": 100, + "children": [ + { "data": { "node_type": "course", "course": 50 }, "id": 101 }, + { + "data": { "node_type": "program", "required_program": 2 }, + "id": 102 + }, + { + "data": { "node_type": "program", "required_program": 3 }, + "id": 103 + } + ] + } + ], + "start_date": null, + "end_date": null, + "enrollment_start": null + }, + { + "id": 2, + "readable_id": "child-as-course", + "title": "Child shown as course", + "departments": [], + "topics": [], + "availability": "anytime", + "display_mode": "course", + "page": { + "page_url": null, + "live": true, + "include_in_learn_catalog": true + }, + "req_tree": [], + "start_date": null, + "end_date": null, + "enrollment_start": null + }, + { + "id": 3, + "readable_id": "child-as-program", + "title": "Child shown as program", + "departments": [], + "topics": [], + "availability": "anytime", + "display_mode": "program", + "page": { + "page_url": null, + "live": true, + "include_in_learn_catalog": true + }, + "req_tree": [], + "start_date": null, + "end_date": null, + "enrollment_start": null + } +]