Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/extrai/core/batch/batch_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@ async def _process_counting_completion(
lines = []
if isinstance(results_content, str):
lines = [
li.strip() for li in results_content.strip().split("\n") if li.strip()
li.strip()
for li in results_content.strip().split("\n")
if li.strip()
]
elif isinstance(results_content, list):
lines = results_content
Expand Down
1 change: 0 additions & 1 deletion src/extrai/core/counting_consensus.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ async def achieve_consensus(
# Step 2c: Discrepancy & Fallback (LLM Resolution)
self.logger.warning("Counting consensus failed. Triggering Merger LLM Call.")


# We need to recreate the system prompt but with conflicting_revisions injected.
# However, we only have the raw `system_prompt` string.
# Actually, if we're inside the LLM call, we can append the revisions manually
Expand Down
145 changes: 91 additions & 54 deletions tests/core/test_schema_inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,97 +63,130 @@ def inspector(engine):
(
Employee,
[
lambda s: s["comment"]
== "Stores detailed information about company employees.",
lambda s: (
s["comment"]
== "Stores detailed information about company employees."
),
lambda s: s["info_dict"] == {"confidentiality": "high"},
lambda s: s["columns"]["id"]["comment"] == "Unique Employee ID (PK)",
lambda s: s["columns"]["email"]["info_dict"]["validation_rule"]
== "standard_email_format",
lambda s: s["relationships"]["department"]["info_dict"]["description"]
== "The department this employee is assigned to.",
lambda s: s["relationships"]["department"]["nested_schema"][
"table_name"
]
== "departments",
lambda s: s["relationships"]["department"]["nested_schema"][
"relationships"
]["employees"]["nested_schema"]["recursion_detected_for_type"]
== "Employee",
lambda s: "employees.department_id"
in s["relationships"]["department"]["foreign_key_constraints_involved"],
lambda s: (
s["columns"]["email"]["info_dict"]["validation_rule"]
== "standard_email_format"
),
lambda s: (
s["relationships"]["department"]["info_dict"]["description"]
== "The department this employee is assigned to."
),
lambda s: (
s["relationships"]["department"]["nested_schema"]["table_name"]
== "departments"
),
lambda s: (
s["relationships"]["department"]["nested_schema"]["relationships"][
"employees"
]["nested_schema"]["recursion_detected_for_type"]
== "Employee"
),
lambda s: (
"employees.department_id"
in s["relationships"]["department"][
"foreign_key_constraints_involved"
]
),
],
),
(
Department,
[
lambda s: s["comment"] == "Stores all company departments.",
lambda s: s["columns"]["id"]["comment"] == "Unique Department ID (PK)",
lambda s: s["relationships"]["employees"]["info_dict"][
"relationship_detail"
]
== "All employees belonging to this department.",
lambda s: s["relationships"]["employees"]["nested_schema"]["table_name"]
== "employees",
lambda s: s["relationships"]["employees"]["nested_schema"][
"relationships"
]["department"]["nested_schema"]["recursion_detected_for_type"]
== "Department",
lambda s: (
s["relationships"]["employees"]["info_dict"]["relationship_detail"]
== "All employees belonging to this department."
),
lambda s: (
s["relationships"]["employees"]["nested_schema"]["table_name"]
== "employees"
),
lambda s: (
s["relationships"]["employees"]["nested_schema"]["relationships"][
"department"
]["nested_schema"]["recursion_detected_for_type"]
== "Department"
),
],
),
(
Project,
[
lambda s: s["relationships"]["members"]["related_model_name"]
== "Member",
lambda s: s["relationships"]["members"]["secondary_table_name"]
== "project_member",
lambda s: "project_member.project_id"
in s["relationships"]["members"]["foreign_key_constraints_involved"],
lambda s: (
s["relationships"]["members"]["related_model_name"] == "Member"
),
lambda s: (
s["relationships"]["members"]["secondary_table_name"]
== "project_member"
),
lambda s: (
"project_member.project_id"
in s["relationships"]["members"]["foreign_key_constraints_involved"]
),
],
),
(
TableModel,
[
lambda s: s["table_name"] == "tables",
lambda s: s["relationships"]["supports"]["nested_schema"][
"relationships"
]["screws_list"]["nested_schema"]["relationships"]["support"][
"nested_schema"
]["recursion_detected_for_type"]
== "Support",
lambda s: (
s["relationships"]["supports"]["nested_schema"]["relationships"][
"screws_list"
]["nested_schema"]["relationships"]["support"]["nested_schema"][
"recursion_detected_for_type"
]
== "Support"
),
],
),
(ModelWithColumnProperty, [lambda s: "data_length" not in s["columns"]]),
(
ModelWithCustomColType,
[
lambda s: s["columns"]["custom_field"]["python_type"]
== "unknown_error_accessing_type"
lambda s: (
s["columns"]["custom_field"]["python_type"]
== "unknown_error_accessing_type"
)
],
),
(
FKParent,
[
lambda s: "fk_child_sync.parent_id_col"
in s["relationships"]["children_sync"][
"foreign_key_constraints_involved"
]
lambda s: (
"fk_child_sync.parent_id_col"
in s["relationships"]["children_sync"][
"foreign_key_constraints_involved"
]
)
],
),
(
FKParentDirect,
[
lambda s: "fk_child_direct.parent_fk_col_name"
in s["relationships"]["children_direct"][
"foreign_key_constraints_involved"
]
lambda s: (
"fk_child_direct.parent_fk_col_name"
in s["relationships"]["children_direct"][
"foreign_key_constraints_involved"
]
)
],
),
(
ViewOnlyParent,
[
lambda s: "viewonly_child.parent_id"
in s["relationships"]["children"]["foreign_key_constraints_involved"]
lambda s: (
"viewonly_child.parent_id"
in s["relationships"]["children"][
"foreign_key_constraints_involved"
]
)
],
),
],
Expand Down Expand Up @@ -343,8 +376,10 @@ def test_process_relationship_for_llm_schema(
),
(
[Project, Member],
lambda s: "members_ref_ids" in s["Project"]["fields"]
and "projects_ref_ids" in s["Member"]["fields"],
lambda s: (
"members_ref_ids" in s["Project"]["fields"]
and "projects_ref_ids" in s["Member"]["fields"]
),
),
([], lambda s: s == {}),
(
Expand All @@ -355,8 +390,10 @@ def test_process_relationship_for_llm_schema(
),
(
[ArticleScenarioModel],
lambda s: "array[string]"
in s["ArticleScenarioModel"]["fields"]["key_topics"].lower(),
lambda s: (
"array[string]"
in s["ArticleScenarioModel"]["fields"]["key_topics"].lower()
),
),
(
[PlainSQLAlchemyModelWithPydanticHints],
Expand Down
8 changes: 4 additions & 4 deletions tests/utils/test_alignment_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def test_align_entity_arrays(self):
[{"id": 1, "name": "A"}, {"id": 2, "name": "B"}],
[{"id": 1, "name": "A"}, {"id": 2, "name": "B"}],
],
"check": lambda res: (res[0][0]["id"] == 1 and res[1][0]["id"] == 1),
"check": lambda res: res[0][0]["id"] == 1 and res[1][0]["id"] == 1,
},
{
"name": "reorder needed",
Expand All @@ -98,7 +98,7 @@ def test_align_entity_arrays(self):
[{"id": 1, "val": "X"}],
[{"id": 99, "val": "completely different"}],
],
"check": lambda res: (res[0][0]["id"] == 1 and res[1][0] is not None),
"check": lambda res: res[0][0]["id"] == 1 and res[1][0] is not None,
},
{
"name": "deeply nested objects",
Expand All @@ -112,15 +112,15 @@ def test_align_entity_arrays(self):
{"id": 1, "d": {"n": {"v": "deep"}}},
],
],
"check": lambda res: (res[0][0]["id"] == 1 and res[1][0]["id"] == 1),
"check": lambda res: res[0][0]["id"] == 1 and res[1][0]["id"] == 1,
},
{
"name": "lists in objects",
"input": [
[{"id": 1, "tags": ["a", "b"]}, {"id": 2, "tags": ["x", "y"]}],
[{"id": 2, "tags": ["x", "y"]}, {"id": 1, "tags": ["a", "b"]}],
],
"check": lambda res: (res[0][0]["id"] == 1 and res[1][0]["id"] == 1),
"check": lambda res: res[0][0]["id"] == 1 and res[1][0]["id"] == 1,
},
{
"name": "three arrays alignment",
Expand Down
Loading