From 72ec9f686df6802c17781d12dea539e10ea16d5f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 26 Oct 2025 12:52:33 +0000 Subject: [PATCH 1/3] Initial plan From 6c3283eacb8efa58b6dea709032bffed4cc257f2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 26 Oct 2025 13:00:20 +0000 Subject: [PATCH 2/3] Add improved error message for characterize with wrong column names Co-authored-by: konstantinstadler <6782923+konstantinstadler@users.noreply.github.com> --- pymrio/core/mriosystem.py | 51 ++++++++++++++++++++++++++++++++++----- tests/test_core.py | 50 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 6 deletions(-) diff --git a/pymrio/core/mriosystem.py b/pymrio/core/mriosystem.py index 4929dc20..d5e43308 100644 --- a/pymrio/core/mriosystem.py +++ b/pymrio/core/mriosystem.py @@ -1788,12 +1788,51 @@ def characterize( ) return ret_value(validation=validation, extension=None) - fac_calc = ( - factors.set_index(index_col + [characterized_name_column]) - .loc[:, characterization_factors_column] - .unstack(characterized_name_column) - .fillna(0) - ) + # Check for duplicate indices before unstacking + # This can happen if region/sector columns are not named correctly + factors_indexed = factors.set_index(index_col + [characterized_name_column]) + + if factors_indexed.index.duplicated().any(): + # Provide a helpful error message + error_msg = ( + "Duplicate indices found in characterization factors. " + "This typically occurs when region or sector specific characterization factors " + "are provided but the column names don't match pymrio's expectations.\n\n" + "Expected column names:\n" + " - 'region' (lowercase) for region-specific factors\n" + " - 'sector' (lowercase) for sector-specific factors\n\n" + f"Current columns in factors dataframe: {list(factors.columns)}\n\n" + ) + + # Check for common misnaming patterns + possible_region_cols = [col for col in factors.columns if col.lower() == "region" and col != "region"] + possible_sector_cols = [col for col in factors.columns if col.lower() == "sector" and col != "sector"] + + if possible_region_cols: + error_msg += f"Found possible region column with different case: {possible_region_cols}\n" + error_msg += "Please rename it to 'region' (lowercase).\n" + if possible_sector_cols: + error_msg += f"Found possible sector column with different case: {possible_sector_cols}\n" + error_msg += "Please rename it to 'sector' (lowercase).\n" + + if not possible_region_cols and not possible_sector_cols: + # Check for other common region/sector column names + common_region_names = ["country", "countries", "regions", "reg", "location"] + common_sector_names = ["sectors", "industry", "industries", "sec", "activity"] + + found_region_alternatives = [col for col in factors.columns if col.lower() in common_region_names] + found_sector_alternatives = [col for col in factors.columns if col.lower() in common_sector_names] + + if found_region_alternatives: + error_msg += f"Found possible alternative region column names: {found_region_alternatives}\n" + error_msg += "Please rename to 'region' (lowercase) if these are region identifiers.\n" + if found_sector_alternatives: + error_msg += f"Found possible alternative sector column names: {found_sector_alternatives}\n" + error_msg += "Please rename to 'sector' (lowercase) if these are sector identifiers.\n" + + raise ValueError(error_msg) + + fac_calc = factors_indexed.loc[:, characterization_factors_column].unstack(characterized_name_column).fillna(0) new_ext = Extension(name=name) diff --git a/tests/test_core.py b/tests/test_core.py index 9a616b01..6b280a36 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -747,6 +747,56 @@ def test_characterize_extension_over_extensions(fix_testmrio): pdt.assert_frame_equal(ex_reg_one.unit, ex_reg_method.unit) +def test_characterize_error_wrong_column_names(fix_testmrio): + """Test improved error message when region/sector column names are wrong.""" + tmrio = fix_testmrio.testmrio + + # Load region-specific factors + factors_reg_spec = pd.read_csv( + Path(PYMRIO_PATH["test_mrio"] / Path("concordance") / "emissions_charact_reg_spec.tsv"), + sep="\t", + ) + + # Test case 1: Wrong case for region column (Region instead of region) + factors_wrong_case = factors_reg_spec.copy() + factors_wrong_case = factors_wrong_case.rename(columns={"region": "Region"}) + + with pytest.raises(ValueError) as exc_info: + tmrio.emissions.characterize(factors_wrong_case) + + error_msg = str(exc_info.value) + assert "Duplicate indices found" in error_msg + assert "column names don't match pymrio's expectations" in error_msg + assert "'region' (lowercase)" in error_msg + assert "Found possible region column with different case: ['Region']" in error_msg + + # Test case 2: Alternative column name (country instead of region) + factors_country = factors_reg_spec.copy() + factors_country = factors_country.rename(columns={"region": "country"}) + + with pytest.raises(ValueError) as exc_info: + tmrio.emissions.characterize(factors_country) + + error_msg = str(exc_info.value) + assert "Duplicate indices found" in error_msg + assert "Found possible alternative region column names: ['country']" in error_msg + + # Test case 3: Abbreviated column name (reg instead of region) + factors_reg = factors_reg_spec.copy() + factors_reg = factors_reg.rename(columns={"region": "reg"}) + + with pytest.raises(ValueError) as exc_info: + tmrio.emissions.characterize(factors_reg) + + error_msg = str(exc_info.value) + assert "Duplicate indices found" in error_msg + assert "Found possible alternative region column names: ['reg']" in error_msg + + # Test case 4: Verify correct column names still work + result = tmrio.emissions.characterize(factors_reg_spec) + assert result.extension is not None + + def test_extension_convert_simple(fix_testmrio): """Testing the convert function within extensions object.""" tt_pre = fix_testmrio.testmrio.copy() From 171a5c7586f9ada781b65890271cb5dc3252381e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 26 Oct 2025 13:04:02 +0000 Subject: [PATCH 3/3] Refactor characterize error handling based on code review Co-authored-by: konstantinstadler <6782923+konstantinstadler@users.noreply.github.com> --- pymrio/core/mriosystem.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pymrio/core/mriosystem.py b/pymrio/core/mriosystem.py index d5e43308..9781071e 100644 --- a/pymrio/core/mriosystem.py +++ b/pymrio/core/mriosystem.py @@ -55,6 +55,11 @@ # warnings.warn(message, DeprecationWarning, stacklevel=2) +# Constants for column name validation +ALTERNATIVE_REGION_NAMES = ["country", "countries", "regions", "reg", "location"] +ALTERNATIVE_SECTOR_NAMES = ["sectors", "industry", "industries", "sec", "activity"] + + # Exceptions class ResetError(Exception): """Base class for errors while reseting the system.""" @@ -1793,7 +1798,7 @@ def characterize( factors_indexed = factors.set_index(index_col + [characterized_name_column]) if factors_indexed.index.duplicated().any(): - # Provide a helpful error message + # Build helpful error message error_msg = ( "Duplicate indices found in characterization factors. " "This typically occurs when region or sector specific characterization factors " @@ -1804,7 +1809,7 @@ def characterize( f"Current columns in factors dataframe: {list(factors.columns)}\n\n" ) - # Check for common misnaming patterns + # Check for case mismatches possible_region_cols = [col for col in factors.columns if col.lower() == "region" and col != "region"] possible_sector_cols = [col for col in factors.columns if col.lower() == "sector" and col != "sector"] @@ -1815,13 +1820,10 @@ def characterize( error_msg += f"Found possible sector column with different case: {possible_sector_cols}\n" error_msg += "Please rename it to 'sector' (lowercase).\n" + # Check for alternative column names if no case mismatch found if not possible_region_cols and not possible_sector_cols: - # Check for other common region/sector column names - common_region_names = ["country", "countries", "regions", "reg", "location"] - common_sector_names = ["sectors", "industry", "industries", "sec", "activity"] - - found_region_alternatives = [col for col in factors.columns if col.lower() in common_region_names] - found_sector_alternatives = [col for col in factors.columns if col.lower() in common_sector_names] + found_region_alternatives = [col for col in factors.columns if col.lower() in ALTERNATIVE_REGION_NAMES] + found_sector_alternatives = [col for col in factors.columns if col.lower() in ALTERNATIVE_SECTOR_NAMES] if found_region_alternatives: error_msg += f"Found possible alternative region column names: {found_region_alternatives}\n"