diff --git a/src/extrai/core/batch/batch_processor.py b/src/extrai/core/batch/batch_processor.py index 18f3cff..7a8f1ad 100644 --- a/src/extrai/core/batch/batch_processor.py +++ b/src/extrai/core/batch/batch_processor.py @@ -130,7 +130,9 @@ async def _process_counting_completion( lines = [] if isinstance(results_content, str): lines = [ - li.strip() for li in results_content.strip().split("\n") if li.strip() + li.strip() + for li in results_content.strip().split("\n") + if li.strip() ] elif isinstance(results_content, list): lines = results_content diff --git a/src/extrai/core/counting_consensus.py b/src/extrai/core/counting_consensus.py index 2696dd6..54a87c9 100644 --- a/src/extrai/core/counting_consensus.py +++ b/src/extrai/core/counting_consensus.py @@ -110,7 +110,6 @@ async def achieve_consensus( # Step 2c: Discrepancy & Fallback (LLM Resolution) self.logger.warning("Counting consensus failed. Triggering Merger LLM Call.") - # We need to recreate the system prompt but with conflicting_revisions injected. # However, we only have the raw `system_prompt` string. # Actually, if we're inside the LLM call, we can append the revisions manually diff --git a/tests/core/test_schema_inspector.py b/tests/core/test_schema_inspector.py index ca7345c..4682089 100644 --- a/tests/core/test_schema_inspector.py +++ b/tests/core/test_schema_inspector.py @@ -63,24 +63,36 @@ def inspector(engine): ( Employee, [ - lambda s: s["comment"] - == "Stores detailed information about company employees.", + lambda s: ( + s["comment"] + == "Stores detailed information about company employees." + ), lambda s: s["info_dict"] == {"confidentiality": "high"}, lambda s: s["columns"]["id"]["comment"] == "Unique Employee ID (PK)", - lambda s: s["columns"]["email"]["info_dict"]["validation_rule"] - == "standard_email_format", - lambda s: s["relationships"]["department"]["info_dict"]["description"] - == "The department this employee is assigned to.", - lambda s: s["relationships"]["department"]["nested_schema"][ - "table_name" - ] - == "departments", - lambda s: s["relationships"]["department"]["nested_schema"][ - "relationships" - ]["employees"]["nested_schema"]["recursion_detected_for_type"] - == "Employee", - lambda s: "employees.department_id" - in s["relationships"]["department"]["foreign_key_constraints_involved"], + lambda s: ( + s["columns"]["email"]["info_dict"]["validation_rule"] + == "standard_email_format" + ), + lambda s: ( + s["relationships"]["department"]["info_dict"]["description"] + == "The department this employee is assigned to." + ), + lambda s: ( + s["relationships"]["department"]["nested_schema"]["table_name"] + == "departments" + ), + lambda s: ( + s["relationships"]["department"]["nested_schema"]["relationships"][ + "employees" + ]["nested_schema"]["recursion_detected_for_type"] + == "Employee" + ), + lambda s: ( + "employees.department_id" + in s["relationships"]["department"][ + "foreign_key_constraints_involved" + ] + ), ], ), ( @@ -88,72 +100,93 @@ def inspector(engine): [ lambda s: s["comment"] == "Stores all company departments.", lambda s: s["columns"]["id"]["comment"] == "Unique Department ID (PK)", - lambda s: s["relationships"]["employees"]["info_dict"][ - "relationship_detail" - ] - == "All employees belonging to this department.", - lambda s: s["relationships"]["employees"]["nested_schema"]["table_name"] - == "employees", - lambda s: s["relationships"]["employees"]["nested_schema"][ - "relationships" - ]["department"]["nested_schema"]["recursion_detected_for_type"] - == "Department", + lambda s: ( + s["relationships"]["employees"]["info_dict"]["relationship_detail"] + == "All employees belonging to this department." + ), + lambda s: ( + s["relationships"]["employees"]["nested_schema"]["table_name"] + == "employees" + ), + lambda s: ( + s["relationships"]["employees"]["nested_schema"]["relationships"][ + "department" + ]["nested_schema"]["recursion_detected_for_type"] + == "Department" + ), ], ), ( Project, [ - lambda s: s["relationships"]["members"]["related_model_name"] - == "Member", - lambda s: s["relationships"]["members"]["secondary_table_name"] - == "project_member", - lambda s: "project_member.project_id" - in s["relationships"]["members"]["foreign_key_constraints_involved"], + lambda s: ( + s["relationships"]["members"]["related_model_name"] == "Member" + ), + lambda s: ( + s["relationships"]["members"]["secondary_table_name"] + == "project_member" + ), + lambda s: ( + "project_member.project_id" + in s["relationships"]["members"]["foreign_key_constraints_involved"] + ), ], ), ( TableModel, [ lambda s: s["table_name"] == "tables", - lambda s: s["relationships"]["supports"]["nested_schema"][ - "relationships" - ]["screws_list"]["nested_schema"]["relationships"]["support"][ - "nested_schema" - ]["recursion_detected_for_type"] - == "Support", + lambda s: ( + s["relationships"]["supports"]["nested_schema"]["relationships"][ + "screws_list" + ]["nested_schema"]["relationships"]["support"]["nested_schema"][ + "recursion_detected_for_type" + ] + == "Support" + ), ], ), (ModelWithColumnProperty, [lambda s: "data_length" not in s["columns"]]), ( ModelWithCustomColType, [ - lambda s: s["columns"]["custom_field"]["python_type"] - == "unknown_error_accessing_type" + lambda s: ( + s["columns"]["custom_field"]["python_type"] + == "unknown_error_accessing_type" + ) ], ), ( FKParent, [ - lambda s: "fk_child_sync.parent_id_col" - in s["relationships"]["children_sync"][ - "foreign_key_constraints_involved" - ] + lambda s: ( + "fk_child_sync.parent_id_col" + in s["relationships"]["children_sync"][ + "foreign_key_constraints_involved" + ] + ) ], ), ( FKParentDirect, [ - lambda s: "fk_child_direct.parent_fk_col_name" - in s["relationships"]["children_direct"][ - "foreign_key_constraints_involved" - ] + lambda s: ( + "fk_child_direct.parent_fk_col_name" + in s["relationships"]["children_direct"][ + "foreign_key_constraints_involved" + ] + ) ], ), ( ViewOnlyParent, [ - lambda s: "viewonly_child.parent_id" - in s["relationships"]["children"]["foreign_key_constraints_involved"] + lambda s: ( + "viewonly_child.parent_id" + in s["relationships"]["children"][ + "foreign_key_constraints_involved" + ] + ) ], ), ], @@ -343,8 +376,10 @@ def test_process_relationship_for_llm_schema( ), ( [Project, Member], - lambda s: "members_ref_ids" in s["Project"]["fields"] - and "projects_ref_ids" in s["Member"]["fields"], + lambda s: ( + "members_ref_ids" in s["Project"]["fields"] + and "projects_ref_ids" in s["Member"]["fields"] + ), ), ([], lambda s: s == {}), ( @@ -355,8 +390,10 @@ def test_process_relationship_for_llm_schema( ), ( [ArticleScenarioModel], - lambda s: "array[string]" - in s["ArticleScenarioModel"]["fields"]["key_topics"].lower(), + lambda s: ( + "array[string]" + in s["ArticleScenarioModel"]["fields"]["key_topics"].lower() + ), ), ( [PlainSQLAlchemyModelWithPydanticHints], diff --git a/tests/utils/test_alignment_utils.py b/tests/utils/test_alignment_utils.py index 20dbff3..8cf72fe 100644 --- a/tests/utils/test_alignment_utils.py +++ b/tests/utils/test_alignment_utils.py @@ -77,7 +77,7 @@ def test_align_entity_arrays(self): [{"id": 1, "name": "A"}, {"id": 2, "name": "B"}], [{"id": 1, "name": "A"}, {"id": 2, "name": "B"}], ], - "check": lambda res: (res[0][0]["id"] == 1 and res[1][0]["id"] == 1), + "check": lambda res: res[0][0]["id"] == 1 and res[1][0]["id"] == 1, }, { "name": "reorder needed", @@ -98,7 +98,7 @@ def test_align_entity_arrays(self): [{"id": 1, "val": "X"}], [{"id": 99, "val": "completely different"}], ], - "check": lambda res: (res[0][0]["id"] == 1 and res[1][0] is not None), + "check": lambda res: res[0][0]["id"] == 1 and res[1][0] is not None, }, { "name": "deeply nested objects", @@ -112,7 +112,7 @@ def test_align_entity_arrays(self): {"id": 1, "d": {"n": {"v": "deep"}}}, ], ], - "check": lambda res: (res[0][0]["id"] == 1 and res[1][0]["id"] == 1), + "check": lambda res: res[0][0]["id"] == 1 and res[1][0]["id"] == 1, }, { "name": "lists in objects", @@ -120,7 +120,7 @@ def test_align_entity_arrays(self): [{"id": 1, "tags": ["a", "b"]}, {"id": 2, "tags": ["x", "y"]}], [{"id": 2, "tags": ["x", "y"]}, {"id": 1, "tags": ["a", "b"]}], ], - "check": lambda res: (res[0][0]["id"] == 1 and res[1][0]["id"] == 1), + "check": lambda res: res[0][0]["id"] == 1 and res[1][0]["id"] == 1, }, { "name": "three arrays alignment",