From 581589965f197bef3ce83cea9fc2305f577de965 Mon Sep 17 00:00:00 2001 From: cramet Date: Thu, 23 Apr 2026 15:00:08 +0200 Subject: [PATCH] =?UTF-8?q?feat(import):=20Added=20Prioritization=20for=20?= =?UTF-8?q?GEOM=20Now,=20WKT=20>=20X/Y=20>=20code=20Maille=20>=20Code=20Mu?= =?UTF-8?q?ncipalit=C3=A9=20>=20Code=20departement=20.=20If=20two=20of=20t?= =?UTF-8?q?hem=20are=20present=20we=20return=20a=20new=20warning=20:MULTIP?= =?UTF-8?q?LE=5FGEO=5FINFO=5FWARNING?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../geonature/core/gn_synthese/imports/geo.py | 2 +- .../core/imports/checks/dataframe/geometry.py | 65 ++++++++----------- .../core/imports/checks/dataframe/utils.py | 18 +++-- .../geonature/core/imports/checks/errors.py | 3 +- ...1feb3f297_add_multiple_geo_info_warning.py | 42 ++++++++++++ .../imports/files/synthese/geom_file.csv | 39 +++++------ .../tests/imports/test_dataframe_checks.py | 11 +--- .../tests/imports/test_imports_synthese.py | 62 ++++++++++++++++-- backend/geonature/tests/test_utils.py | 1 - 9 files changed, 164 insertions(+), 79 deletions(-) create mode 100644 backend/geonature/migrations/versions/f6a1feb3f297_add_multiple_geo_info_warning.py diff --git a/backend/geonature/core/gn_synthese/imports/geo.py b/backend/geonature/core/gn_synthese/imports/geo.py index 043971b3fd..8991613090 100644 --- a/backend/geonature/core/gn_synthese/imports/geo.py +++ b/backend/geonature/core/gn_synthese/imports/geo.py @@ -57,9 +57,9 @@ def set_geom_columns_from_area_codes( transient_table = imprt.destination.get_transient_table() for field, area_type_filter in [ + (codemaille_field, BibAreasTypes.type_code.in_(["M1", "M5", "M10"])), (codecommune_field, BibAreasTypes.type_code == "COM"), (codedepartement_field, BibAreasTypes.type_code == "DEP"), - (codemaille_field, BibAreasTypes.type_code.in_(["M1", "M5", "M10"])), ]: if field is None: continue diff --git a/backend/geonature/core/imports/checks/dataframe/geometry.py b/backend/geonature/core/imports/checks/dataframe/geometry.py index 952087b5b3..34322ae282 100644 --- a/backend/geonature/core/imports/checks/dataframe/geometry.py +++ b/backend/geonature/core/imports/checks/dataframe/geometry.py @@ -95,8 +95,9 @@ def check_geometry( """ What this check do: - - check there is at least a wkt, a x/y or a code defined for each row - (report NO-GEOM if there are not, or MULTIPLE_ATTACHMENT_TYPE_CODE if several are defined) + - check there is at least a wkt, a x/y or a code defined for each row. If multiple are defined, we use this priority: + `wkt > x/y> code` + (report NO-GEOM if there are not, or MULTIPLE_GEO_INFO_WARNING if several are defined) - set geom_local or geom_4326 or both (depending of file_srid) from wkt or x/y - check wkt validity - check x/y validity @@ -148,6 +149,12 @@ def check_geometry( geom = pd.Series(name="geom", index=df.index, dtype="object") + wkt_mask = pd.Series(False, index=df.index) + xy_mask = pd.Series(False, index=df.index) + codemaille_mask = pd.Series(False, index=df.index) + codecommune_mask = pd.Series(False, index=df.index) + codedepartement_mask = pd.Series(False, index=df.index) + if wkt_col and wkt_col in df: wkt_mask = df[wkt_col].notnull() if wkt_mask.any(): @@ -159,34 +166,23 @@ def check_geometry( "column": "WKT", "invalid_rows": invalid_wkt, } - else: - wkt_mask = pd.Series(False, index=df.index) + if latitude_col and latitude_col in df and longitude_col and longitude_col in df: - # take xy when no wkt and xy are not null xy_mask = df[latitude_col].notnull() & df[longitude_col].notnull() + xy_mask_effective = ( + xy_mask & ~wkt_mask + ) # This mask is necessary so we don't override wkt if it already exists. if xy_mask.any(): - geom.loc[xy_mask] = df[xy_mask].apply( + geom.loc[xy_mask_effective] = df[xy_mask_effective].apply( lambda row: xy_to_geometry(row[longitude_col], row[latitude_col]), axis=1 ) - invalid_xy = df[xy_mask & geom.isnull()] + invalid_xy = df[xy_mask_effective & geom.isnull()] if not invalid_xy.empty: yield { "error_code": ImportCodeError.INVALID_GEOMETRY, "column": "longitude", "invalid_rows": invalid_xy, } - else: - xy_mask = pd.Series(False, index=df.index) - - # Check multiple geo-referencement - multiple_georef = df[wkt_mask & xy_mask] - if len(multiple_georef): - geom[wkt_mask & xy_mask] = None - yield { - "error_code": ImportCodeError.MULTIPLE_ATTACHMENT_TYPE_CODE, - "column": "Champs géométriques", - "invalid_rows": multiple_georef, - } # Check out-of-bound geo-referencement for mask, column in [(wkt_mask, "WKT"), (xy_mask, "longitude")]: @@ -204,32 +200,27 @@ def check_geometry( if codecommune_col and codecommune_col in df: codecommune_mask = df[codecommune_col].notnull() - else: - codecommune_mask = pd.Series(False, index=df.index) + if codemaille_col and codemaille_col in df: codemaille_mask = df[codemaille_col].notnull() - else: - codemaille_mask = pd.Series(False, index=df.index) + if codedepartement_col and codedepartement_col in df: codedepartement_mask = df[codedepartement_col].notnull() - else: - codedepartement_mask = pd.Series(False, index=df.index) # Check for multiple code when no wkt or xy - multiple_code = df[ - ~wkt_mask - & ~xy_mask - & ( - (codecommune_mask & codemaille_mask) - | (codecommune_mask & codedepartement_mask) - | (codemaille_mask & codedepartement_mask) - ) - ] - if len(multiple_code): + num_geom_types = ( + wkt_mask.astype(int) + + xy_mask.astype(int) + + codecommune_mask.astype(int) + + codemaille_mask.astype(int) + + codedepartement_mask.astype(int) + ) + multiple_geom_types = df[num_geom_types >= 2] + if len(multiple_geom_types): yield { - "error_code": ImportCodeError.MULTIPLE_CODE_ATTACHMENT, + "error_code": ImportCodeError.MULTIPLE_GEO_INFO_WARNING, "column": "Champs géométriques", - "invalid_rows": multiple_code, + "invalid_rows": multiple_geom_types, } if file_srid == 4326: diff --git a/backend/geonature/core/imports/checks/dataframe/utils.py b/backend/geonature/core/imports/checks/dataframe/utils.py index 6dff9ff6d1..2423c83631 100644 --- a/backend/geonature/core/imports/checks/dataframe/utils.py +++ b/backend/geonature/core/imports/checks/dataframe/utils.py @@ -16,6 +16,10 @@ def dataframe_check(check_function): Decorator for check functions. Check functions must yield errors, and return updated_cols (or None if no column have been modified). + + The error level (ERROR or WARNING) is determined by the ImportUserErrorType.level column. + - If level == "ERROR": the row is marked as invalid + - If level == "WARNING": the error is reported but row remains valid """ parameters = signature(check_function).parameters @@ -128,7 +132,7 @@ def report_error(imprt: TImports, entity, df, error): Returns ------- set - set containing the name of the entity validity column. + set containing the name of the entity validity column (only if error level is "ERROR"). Raises ------ @@ -142,9 +146,11 @@ def report_error(imprt: TImports, entity, df, error): except NoResultFound: raise Exception(f"Error code '{error['error_code']}' not found.") invalid_rows = error["invalid_rows"] - df.loc[invalid_rows.index, entity.validity_column] = False - # df['gn_invalid_reason'][invalid_rows.index.intersection(df['gn_invalid_reason'].isnull())] = \ - # f'{error_type.name}' # FIXME comment + + # Only mark row as invalid if error level is "ERROR" + if error_type.level == "ERROR": + df.loc[invalid_rows.index, entity.validity_column] = False + ordered_invalid_rows = sorted(invalid_rows["line_no"]) column = generated_fields.get(error["column"], error["column"]) column = imprt.fieldmapping.get(column, {}).get("column_src", column) @@ -168,4 +174,6 @@ def report_error(imprt: TImports, entity, df, error): }, ) db.session.execute(stmt) - return {entity.validity_column} + + if error_type.level == "ERROR": + return {entity.validity_column} diff --git a/backend/geonature/core/imports/checks/errors.py b/backend/geonature/core/imports/checks/errors.py index 0e2d05004d..2917fdb475 100644 --- a/backend/geonature/core/imports/checks/errors.py +++ b/backend/geonature/core/imports/checks/errors.py @@ -128,8 +128,7 @@ class ImportCodeError: DATASET_NOT_FOUND = "DATASET_NOT_FOUND" DATASET_NOT_AUTHORIZED = "DATASET_NOT_AUTHORIZED" DATASET_NOT_ACTIVE = "DATASET_NOT_ACTIVE" - MULTIPLE_ATTACHMENT_TYPE_CODE = "MULTIPLE_ATTACHMENT_TYPE_CODE" - MULTIPLE_CODE_ATTACHMENT = "MULTIPLE_CODE_ATTACHMENT" + MULTIPLE_GEO_INFO_WARNING = "MULTIPLE_GEO_INFO_WARNING" # Invalid type error INVALID_DATE = "INVALID_DATE" diff --git a/backend/geonature/migrations/versions/f6a1feb3f297_add_multiple_geo_info_warning.py b/backend/geonature/migrations/versions/f6a1feb3f297_add_multiple_geo_info_warning.py new file mode 100644 index 0000000000..6e826fdc6a --- /dev/null +++ b/backend/geonature/migrations/versions/f6a1feb3f297_add_multiple_geo_info_warning.py @@ -0,0 +1,42 @@ +"""[import] add MULTIPLE_GEO_INFO_WARNING error type + +Revision ID: f6a1feb3f297 +Revises: cb663f039774 +Create Date: 2026-04-23 10:00:00.000000 + +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.schema import Table, MetaData + +# revision identifiers, used by Alembic. +revision = "f6a1feb3f297" +down_revision = "cb663f039774" +branch_labels = None +depends_on = None + + +def upgrade(): + metadata = MetaData(bind=op.get_bind()) + bib_errors_types = Table("bib_errors_types", metadata, schema="gn_imports", autoload=True) + op.execute( + sa.insert(bib_errors_types).values( + error_type="Géometrie", + name="MULTIPLE_GEO_INFO_WARNING", + description=( + "Plusieurs informations de géoreferencement ont été remplies pour une même ligne. " + "La plus précise a été selectionée automatiquement selon cette ordre de priorité : " + "WKT > longitude/latitude > Maille > Commune > Departement" + ), + error_level="WARNING", + ) + ) + + +def downgrade(): + metadata = MetaData(bind=op.get_bind()) + bib_errors_types = Table("bib_errors_types", metadata, schema="gn_imports", autoload=True) + op.execute( + sa.delete(bib_errors_types).where(bib_errors_types.c.name == "MULTIPLE_GEO_INFO_WARNING") + ) diff --git a/backend/geonature/tests/imports/files/synthese/geom_file.csv b/backend/geonature/tests/imports/files/synthese/geom_file.csv index 2fc4c23d41..59f54021a1 100644 --- a/backend/geonature/tests/imports/files/synthese/geom_file.csv +++ b/backend/geonature/tests/imports/files/synthese/geom_file.csv @@ -1,21 +1,22 @@ date_min;cd_nom;nom_cite;observers;WKT;latitude;longitude;codecommune;codedepartement;codemaille;erreur attendue 2017-01-01;67111;Ablette;Toto;;;;13088;;;Valide (codecommune) -2017-01-01;67111;Ablette;Toto;;;;code com invalide;;;INVALID_ATTACHMENT_CODE (codecommune) -2017-01-01;67111;Ablette;Toto;;;;;13;;Valide (codedépartement) # FIXME invalide altitude_min (bord de mer) -2017-01-01;67111;Ablette;Toto;;;;;code dep invalide;;INVALID_ATTACHMENT_CODE (codedepartement) -2017-01-01;67111;Ablette;Toto;;;;;;5kmL93E0905N6250;Valide (codemaille) -2017-01-01;67111;Ablette;Toto;;;;;;code maille invalide;INVALID_ATTACHMENT_CODE (codemaille) -2017-01-01;67111;Ablette;Toto;;;;5101;5;5kmL93E0905N6250;MULTIPLE_CODE_ATTACHMENT -2017-01-01;67111;Ablette;Toto;POINT(5.4877 43.3056);;;5101;5;5kmL93E0905N6250;Valide (WKT) -2017-01-01;67111;Ablette;Toto;;43.3056;5.4877;5101;5;5kmL93E0905N6250;Valide (X/Y) -2017-01-01;67111;Ablette;Toto;POINT(5.4877 43.3056);44.85;6.5;5101;5;5kmL93E0905N6250;MULTIPLE_ATTACHMENT_TYPE_CODE -2017-01-01;67111;Ablette;Toto;POINT(5.4877 43.3056);;;;;;Valide (WKT) -2017-01-01;67111;Ablette;Toto;;43.3056;5.4877;;;;Valide (X/Y) -2017-01-01;67111;Ablette;Toto;;43,3056;5,4877;;;;Valide (X/Y) -2017-01-01;67111;Ablette;Toto;POINT(6.5 44.85);44.85;6.5;;;;MULTIPLE_ATTACHMENT_TYPE_CODE -2017-01-01;67111;Ablette;Toto;;;;;;;NO-GEOM -2017-01-01;67111;Ablette;Toto;POLYGON((0 0, 1 1, 1 2, 1 1, 0 0));;;;;;INVALID_GEOMETRY -2017-01-01;67111;Ablette;Toto;POINT(6.5 44.85);;;;;;GEOMETRY_OUTSIDE -2017-01-01;67111;Ablette;Toto;;44.85;6.5;;;;GEOMETRY_OUTSIDE -2017-01-01;67111;Ablette;Toto;;44.85;;;;;MISSING_VALUE(longitude) -2017-01-01;67111;Ablette;Toto;;;6.5;;;;MISSING_VALUE(latitude) +2017-01-02;67111;Ablette;Toto;;;;code com invalide;;;INVALID_ATTACHMENT_CODE (codecommune) +2017-01-03;67111;Ablette;Toto;;;;;13;;Valide (codedépartement) # FIXME invalide altitude_min (bord de mer) +2017-01-04;67111;Ablette;Toto;;;;;code dep invalide;;INVALID_ATTACHMENT_CODE (codedepartement) +2017-01-05;67111;Ablette;Toto;;;;;;5kmL93E0905N6250;Valide (codemaille) +2017-01-06;67111;Ablette;Toto;;;;;;code maille invalide;INVALID_ATTACHMENT_CODE (codemaille) +2017-01-07;67111;Ablette;Toto;;;;5101;5;5kmL93E0905N6250;MULTIPLE_GEO_INFO_WARNING (Use code maille) +2017-01-08;67111;Ablette;Toto;POINT(5.4877 43.3056);;;5101;5;5kmL93E0905N6250;MULTIPLE_GEO_INFO_WARNING (use WKT) +2017-01-09;67111;Ablette;Toto;;43.5;5.6;5101;5;5kmL93E0905N6250;MULTIPLE_GEO_INFO_WARNING (use xy) +2017-01-10;67111;Ablette;Toto;POINT(5.4877 43.3056);43.5;5.6;5101;5;5kmL93E0905N6250;MULTIPLE_GEO_INFO_WARNING (use WKT) +2017-01-11;67111;Ablette;Toto;POINT(5.4877 43.3056);;;;;;Valide (WKT) +2017-01-12;67111;Ablette;Toto;;43.5;5.6;;;;Valide (X/Y) +2017-01-13;67111;Ablette;Toto;;43.5;5.6;;;;Valide (X/Y) +2017-01-14;67111;Ablette;Toto;POINT(5.4877 43.3056);43.5;5.6;;;;MULTIPLE_GEO_INFO_WARNING (use WKT) +2017-01-15;67111;Ablette;Toto;;;;;;;NO-GEOM +2017-01-16;67111;Ablette;Toto;POLYGON((0 0, 1 1, 1 2, 1 1, 0 0));;;;;;INVALID_GEOMETRY +2017-01-17;67111;Ablette;Toto;POINT(6.5 44.85);;;;;;GEOMETRY_OUTSIDE +2017-01-18;67111;Ablette;Toto;;44.85;6.5;;;;GEOMETRY_OUTSIDE +2017-01-19;67111;Ablette;Toto;;43.5;;;;;MISSING_VALUE(longitude) +2017-01-20;67111;Ablette;Toto;;;5.6;;;;MISSING_VALUE(latitude) +2017-01-21;67111;Ablette;Toto;;;;13088;5;"";MULTIPLE_GEO_INFO_WARNING (Use code commune) diff --git a/backend/geonature/tests/imports/test_dataframe_checks.py b/backend/geonature/tests/imports/test_dataframe_checks.py index 4936342f8a..8ba930e3b3 100644 --- a/backend/geonature/tests/imports/test_dataframe_checks.py +++ b/backend/geonature/tests/imports/test_dataframe_checks.py @@ -299,7 +299,7 @@ def test_check_geography(self, imprt): Error( error_code=ImportCodeError.GEOMETRY_OUT_OF_BOX, column="WKT", - invalid_rows=frozenset([5]), + invalid_rows=frozenset([5, 7]), ), Error( error_code=ImportCodeError.GEOMETRY_OUT_OF_BOX, @@ -307,14 +307,9 @@ def test_check_geography(self, imprt): invalid_rows=frozenset([6]), ), Error( - error_code=ImportCodeError.MULTIPLE_ATTACHMENT_TYPE_CODE, + error_code=ImportCodeError.MULTIPLE_GEO_INFO_WARNING, column="Champs géométriques", - invalid_rows=frozenset([7]), - ), - Error( - error_code=ImportCodeError.MULTIPLE_CODE_ATTACHMENT, - column="Champs géométriques", - invalid_rows=frozenset([8, 9, 10, 11]), + invalid_rows=frozenset([7, 8, 9, 10, 11, 17, 18]), ), Error( error_code=ImportCodeError.INVALID_WKT, diff --git a/backend/geonature/tests/imports/test_imports_synthese.py b/backend/geonature/tests/imports/test_imports_synthese.py index 16482bbad6..488fc4ec44 100644 --- a/backend/geonature/tests/imports/test_imports_synthese.py +++ b/backend/geonature/tests/imports/test_imports_synthese.py @@ -4,7 +4,6 @@ from operator import or_ from functools import reduce import csv -import json from geonature.core.imports.checks.errors import ImportCodeError from geonature.core.imports.checks.sql.user import user_matching @@ -26,14 +25,14 @@ ) from geonature.core.gn_permissions.models import PermAction, Permission, PermObject from geonature.core.gn_commons.models import TModules -from geonature.core.gn_meta.models import TDatasets, TAcquisitionFramework from geonature.core.gn_synthese.models import CorObserverSynthese, Synthese from geonature.tests.fixtures import synthese_data, celery_eager from pypnusershub.db.models import User, Organisme from pypnnomenclature.models import TNomenclatures, BibNomenclaturesTypes from ref_geo.tests.test_ref_geo import has_french_dem -from ref_geo.models import LAreas +from ref_geo.models import LAreas, BibAreasTypes +from geoalchemy2.elements import WKTElement from geonature.core.imports.models import ( TImports, @@ -841,11 +840,10 @@ def test_import_geometry_file(self, area_restriction, prepared_import): (ImportCodeError.INVALID_ATTACHMENT_CODE, "codecommune", frozenset([3])), (ImportCodeError.INVALID_ATTACHMENT_CODE, "codedepartement", frozenset([5])), (ImportCodeError.INVALID_ATTACHMENT_CODE, "codemaille", frozenset([7])), - (ImportCodeError.MULTIPLE_CODE_ATTACHMENT, "Champs géométriques", frozenset([8])), ( - ImportCodeError.MULTIPLE_ATTACHMENT_TYPE_CODE, + ImportCodeError.MULTIPLE_GEO_INFO_WARNING, "Champs géométriques", - frozenset([11, 15]), + frozenset([8, 9, 10, 11, 15, 22]), ), (ImportCodeError.NO_GEOM, "Champs géométriques", frozenset([16])), (ImportCodeError.INVALID_GEOMETRY, "WKT", frozenset([17])), @@ -854,6 +852,58 @@ def test_import_geometry_file(self, area_restriction, prepared_import): (ImportCodeError.MISSING_VALUE, "longitude", frozenset([20])), }, ) + transient_table = prepared_import.destination.get_transient_table() + + geoms = db.session.execute( + select( + transient_table.c.line_no, + transient_table.c.the_geom_local, + ) + .where(transient_table.c.id_import == prepared_import.id_import) + .order_by(transient_table.c.line_no) + ).fetchall() + + def get_geom_from_code(code: str) -> WKTElement: + return db.session.execute(select(LAreas.geom).where(LAreas.area_code == code)).scalar() + + def geom_are_equal(geom1, geom2): + return db.session.execute(select(func.ST_Equals(geom1, geom2))).scalar() + + line_expected_geom = { + "maille": [8], + "commune": [2, 22], + "département": [4], + "wkt": [9, 11, 12, 15], + "x/y": [10, 13, 14], + } + for line_no, geom in geoms: + if line_no in line_expected_geom["commune"]: + assert geom is not None, f"Ligne {line_no}: géométrie manquante" + assert geom_are_equal( + geom, get_geom_from_code("13088") + ), f"Ligne {line_no}: géométrie ne correspond pas, type attendu commune" + if line_no in line_expected_geom["département"]: + assert geom is not None, f"Ligne {line_no}: géométrie manquante" + assert geom_are_equal( + geom, get_geom_from_code("13") + ), f"Ligne {line_no}: géométrie ne correspond pas, type attendu département" + elif line_no in line_expected_geom["maille"]: + assert geom is not None, f"Ligne {line_no}: géométrie manquante" + assert geom_are_equal( + geom, get_geom_from_code("5kmL93E0905N6250") + ), f"Ligne {line_no}: géométrie ne correspond pas, type attendu maille" + elif line_no == line_expected_geom["wkt"]: + assert geom is not None, f"Ligne {line_no}: géométrie WKT manquante" + expected = WKTElement("POINT(5.4877 43.3056)", srid=geom.srid) + assert geom_are_equal( + geom, expected + ), f"Ligne {line_no}: géométrie WKT ne correspond pas" + elif line_no == line_expected_geom["x/y"]: + assert geom is not None, f"Ligne {line_no}: géométrie X/Y manquante" + expected = WKTElement("POINT(5.6 43.5)", srid=geom.srid) + assert geom_are_equal( + geom, expected, 0 + ), f"Ligne {line_no}: géométrie X/Y ne correspond pas" @pytest.mark.parametrize("import_file_name,fieldmapping_preset_name", [("cd_file.csv", None)]) def test_import_cd_file(self, change_id_list_conf, prepared_import): diff --git a/backend/geonature/tests/test_utils.py b/backend/geonature/tests/test_utils.py index 8b001b4bab..02f01b143d 100644 --- a/backend/geonature/tests/test_utils.py +++ b/backend/geonature/tests/test_utils.py @@ -18,7 +18,6 @@ from .fixtures import * - ############################################################################# # BASIC TEMPLATE CONFIG FILE #############################################################################