diff --git a/AUTHORS.rst b/AUTHORS.rst index 8461e5089..0a4c4b29a 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -23,7 +23,8 @@ The following persons contributed to the development of the |pyam| package: - Pietro Monticone `@pitmonticone `_ - Edward Byers `@byersiiasa `_ - Fridolin Glatter `@glatterf42 `_ -- Linh Ho `@linhho ` +- Linh Ho `@linhho `_ +- Zachary Schmidt `@zacharyschmidt | The core maintenance of the |pyam| package is done by the *Scenario Services & Scientific Software* research theme diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 047418811..0a4b65798 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -8,6 +8,7 @@ ## Individual updates +- [#875](https://github.com/IAMconsortium/pyam/pull/875) Add methods to the `compute` module implementing Kaya decomposition analysis. - [#901](https://github.com/IAMconsortium/pyam/pull/901) Add support for Python 3.13 - [#899](https://github.com/IAMconsortium/pyam/pull/899) Add `to_netcdf()` method - [#896](https://github.com/IAMconsortium/pyam/pull/896) Add `sort_data()` method diff --git a/pyam/compute.py b/pyam/compute.py index f2c247eb0..5e0729e1b 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -4,8 +4,10 @@ import pandas as pd import wquantiles +import pyam from pyam._debiasing import _compute_bias from pyam.index import replace_index_values +from pyam.kaya import kaya_factors, kaya_variables from pyam.timeseries import growth_rate from pyam.utils import remove_from_list @@ -249,6 +251,127 @@ def bias(self, name, method, axis): """ _compute_bias(self._df, name, method, axis) + def kaya_variables(self, append=False): + """Create the set of variables needed to compute Kaya factors. + + Parameters + ---------- + append : bool, optional + Whether to append computed timeseries data to this instance. + + Returns + ------- + :class:`IamDataFrame` or **None** + Computed timeseries data or None if `append=True`. + + Notes + ----- + + Example of calling the method: + + .. code-block:: python + + df.compute.kaya_variables(append=True) + + The IamDataFrame must contain the following variables, otherwise the method + will return None: + .. list-table:: + - Required Variables + - Population + - GDP (MER or PPP) + - Final Energy + - Primary Energy + - Primary Energy|Coal + - Primary Energy|Oil + - Primary Energy|Gas + - Emissions|CO2|Industrial Processes + - Emissions|CO2|Carbon Capture and Storage + - Emissions|CO2|Carbon Capture and Storage|Biomass + - Emissions|CO2|Fossil Fuels and Industry + - Emissions|CO2|AFOLU + - Carbon Sequestration|CCS|Fossil|Energy + - Carbon Sequestration|CCS|Fossil|Industrial Processes + - Carbon Sequestration|CCS|Biomass|Energy + - Carbon Sequestration|CCS|Biomass|Industrial Processes + + """ + + kaya_variables_frame = kaya_variables.compute_kaya_variables(self._df) + if kaya_variables_frame is None: + return None + if append: + self._df.append( + _find_non_duplicate_rows(self._df, kaya_variables_frame), inplace=True + ) + return None + + return kaya_variables_frame + + def kaya_factors(self, append=False): + """Compute the factors for the Kaya Decomposition Analysis + + Parameters + ---------- + append : bool, optional + Whether to append computed timeseries data to this instance. + + Returns + ------- + :class:`IamDataFrame` or **None** + Computed timeseries data or None if `append=True`. + + Notes + ----- + + Example of calling the method: + + .. code-block:: python + + df.compute.kaya_factors(append=True) + + The IamDataFrame must contain the following variables, otherwise the method + will return None: + .. list-table:: + - Required Variables + - Population + - GDP (MER or PPP) + - Final Energy + - Primary Energy + - Primary Energy|Coal + - Primary Energy|Oil + - Primary Energy|Gas + - Emissions|CO2|Industrial Processes + - Emissions|CO2|Carbon Capture and Storage + - Emissions|CO2|Carbon Capture and Storage|Biomass + - Emissions|CO2|Fossil Fuels and Industry + - Emissions|CO2|AFOLU + - Carbon Sequestration|CCS|Fossil|Energy + - Carbon Sequestration|CCS|Fossil|Industrial Processes + - Carbon Sequestration|CCS|Biomass|Energy + - Carbon Sequestration|CCS|Biomass|Industrial Processes + """ + kaya_variables = self.kaya_variables(append=False) + if kaya_variables is None: + return None + kaya_factors_frame = kaya_factors.compute_kaya_factors(kaya_variables) + if kaya_factors_frame is None: + return None + if append: + self._df.append( + _find_non_duplicate_rows(self._df, kaya_factors_frame), inplace=True + ) + return kaya_factors_frame + + +def _find_non_duplicate_rows(original_df, variables_to_add): + variables_for_append = pyam.IamDataFrame( + variables_to_add.as_pandas(meta_cols=False) + .merge(original_df.as_pandas(meta_cols=False), how="left", indicator=True) + .query('_merge=="left_only"') + .drop(columns="_merge") + ) + return variables_for_append + def _compute_learning_rate(x, performance, experience): """Internal implementation for computing implicit learning rate from timeseries data diff --git a/pyam/kaya/input_variable_names.py b/pyam/kaya/input_variable_names.py new file mode 100644 index 000000000..30e138e25 --- /dev/null +++ b/pyam/kaya/input_variable_names.py @@ -0,0 +1,17 @@ +POPULATION = "Population" +GDP_MER = "GDP|MER" +GDP_PPP = "GDP|PPP" +FINAL_ENERGY = "Final Energy" +PRIMARY_ENERGY = "Primary Energy" +PRIMARY_ENERGY_COAL = "Primary Energy|Coal" +PRIMARY_ENERGY_OIL = "Primary Energy|Oil" +PRIMARY_ENERGY_GAS = "Primary Energy|Gas" +EMISSIONS_CO2_INDUSTRIAL_PROCESSES = "Emissions|CO2|Industrial Processes" +EMISSIONS_CO2_CCS = "Emissions|CO2|Carbon Capture and Storage" +EMISSIONS_CO2_CCS_BIOMASS = "Emissions|CO2|Carbon Capture and Storage|Biomass" +EMISSIONS_CO2_FOSSIL_FUELS_AND_INDUSTRY = "Emissions|CO2|Fossil Fuels and Industry" +EMISSIONS_CO2_AFOLU = "Emissions|CO2|AFOLU" +CCS_FOSSIL_ENERGY = "Carbon Sequestration|CCS|Fossil|Energy" +CCS_FOSSIL_INDUSTRY = "Carbon Sequestration|CCS|Fossil|Industrial Processes" +CCS_BIOMASS_ENERGY = "Carbon Sequestration|CCS|Biomass|Energy" +CCS_BIOMASS_INDUSTRY = "Carbon Sequestration|CCS|Biomass|Industrial Processes" diff --git a/pyam/kaya/kaya_factor_names.py b/pyam/kaya/kaya_factor_names.py new file mode 100644 index 000000000..664700ca8 --- /dev/null +++ b/pyam/kaya/kaya_factor_names.py @@ -0,0 +1,6 @@ +GNP_per_P = "GNP/P" +FE_per_GNP = "FE/GNP" +PEdeq_per_FE = "PEDEq/FE" +PEFF_per_PEDEq = "PEFF/PEDEq" +TFC_per_PEFF = "TFC/PEFF" +NFC_per_TFC = "NFC/TFC" diff --git a/pyam/kaya/kaya_factors.py b/pyam/kaya/kaya_factors.py new file mode 100644 index 000000000..fa7c0a87e --- /dev/null +++ b/pyam/kaya/kaya_factors.py @@ -0,0 +1,81 @@ +import pyam +from pyam.kaya import input_variable_names, kaya_factor_names, kaya_variable_names + + +def compute_kaya_factors(kaya_variables_frame): + kaya_factors = pyam.concat( + [ + _calc_gnp_per_p(kaya_variables_frame), + _calc_fe_per_gnp(kaya_variables_frame), + _calc_pedeq_per_fe(kaya_variables_frame), + _calc_peff_per_pedeq(kaya_variables_frame), + _calc_tfc_per_peff(kaya_variables_frame), + _calc_nfc_per_tfc(kaya_variables_frame), + kaya_variables_frame.filter( + variable=[kaya_variable_names.TFC, input_variable_names.POPULATION] + ), + ] + ) + return kaya_factors + + +def _calc_gnp_per_p(input_data): + variable = input_variable_names.GDP_PPP + if input_data.filter(variable=variable).empty: + variable = input_variable_names.GDP_MER + return input_data.divide( + variable, + input_variable_names.POPULATION, + kaya_factor_names.GNP_per_P, + append=False, + ) + + +def _calc_fe_per_gnp(input_data): + variable = input_variable_names.GDP_PPP + if input_data.filter(variable=variable).empty: + variable = input_variable_names.GDP_MER + return input_data.divide( + input_variable_names.FINAL_ENERGY, + variable, + kaya_factor_names.FE_per_GNP, + append=False, + ) + + +def _calc_pedeq_per_fe(input_data): + return input_data.divide( + input_variable_names.PRIMARY_ENERGY, + input_variable_names.FINAL_ENERGY, + kaya_factor_names.PEdeq_per_FE, + append=False, + ) + + +def _calc_peff_per_pedeq(input_data): + return input_data.divide( + kaya_variable_names.PRIMARY_ENERGY_FF, + input_variable_names.PRIMARY_ENERGY, + kaya_factor_names.PEFF_per_PEDEq, + append=False, + ) + + +def _calc_tfc_per_peff(input_data): + return input_data.divide( + kaya_variable_names.TFC, + kaya_variable_names.PRIMARY_ENERGY_FF, + kaya_factor_names.TFC_per_PEFF, + ignore_units="Mt CO2/EJ", + append=False, + ) + + +def _calc_nfc_per_tfc(input_data): + return input_data.divide( + kaya_variable_names.NFC, + kaya_variable_names.TFC, + kaya_factor_names.NFC_per_TFC, + ignore_units="", + append=False, + ) # .rename(unit={"unknown": ""}) diff --git a/pyam/kaya/kaya_variable_names.py b/pyam/kaya/kaya_variable_names.py new file mode 100644 index 000000000..31efa86d5 --- /dev/null +++ b/pyam/kaya/kaya_variable_names.py @@ -0,0 +1,3 @@ +PRIMARY_ENERGY_FF = "Primary Energy|Fossil" +TFC = "Total Fossil Carbon" +NFC = "Net Fossil Carbon" diff --git a/pyam/kaya/kaya_variables.py b/pyam/kaya/kaya_variables.py new file mode 100644 index 000000000..4b4869719 --- /dev/null +++ b/pyam/kaya/kaya_variables.py @@ -0,0 +1,179 @@ +import logging +import warnings + +import pyam +from pyam.kaya import input_variable_names, kaya_variable_names + +logger = logging.getLogger(__name__) + +required_input_variables = [ + vars(input_variable_names)[variable_name] + for variable_name in dir(input_variable_names) + if not variable_name.startswith("__") +] + + +def compute_kaya_variables(input_data): + if _is_input_data_incomplete(input_data): + return None + + kaya_variables = pyam.concat( + [ + _calc_pop(input_data), + _calc_gdp(input_data), + _calc_fe(input_data), + _calc_pe(input_data), + _calc_pe_ff(input_data), + _calc_tfc(input_data), + _calc_nfc(input_data), + ] + ) + return kaya_variables + + +def _is_input_data_incomplete(input_data): + # copy data so we don't create side effects + # in particular, require_data will change the "exclude" series + input_data = input_data.copy() + # Get all unique model/scenario/region combinations + scenario_model_region = input_data.data[ + ["model", "scenario", "region"] + ].drop_duplicates() + + # Check each combination + for _, row in scenario_model_region.iterrows(): + single_combination = input_data.filter( + model=row["model"], scenario=row["scenario"], region=row["region"] + ) + + # Get variables present for this combination + single_combination_variables = set(single_combination.data["variable"].unique()) + # special case for GDP: either form is acceptable, so don't check for either + # as long as one is present + required_variables_set = make_required_variables_set( + single_combination_variables + ) + # Check if any required variables are missing + missing_variables = set(required_variables_set) - single_combination_variables + if missing_variables: + logger.info( + f"""Variables missing for + model: {row['model']}, + scenario: {row['scenario']}, + region: {row['region']}\nMissing variables: {missing_variables}""" + ) + + # special case for GDP: either form is acceptable, so don't check for either + # as long as one is present + required_variables_set = make_required_variables_set( + set(input_data.data["variable"].unique()) + ) + # exclude model/scenario combinations that have missing variables, + # disregarding region. even if all variables are not present for a region, + # arithmetic operations will return an empty dataframe, + # not throw an error, so it is safe to proceed + input_data.require_data(variable=list(required_variables_set), exclude_on_fail=True) + # supress warning about empty dataframe if filtering excludes all scenarios + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + return input_data.filter(exclude=False).empty + + +def make_required_variables_set(input_variables): + required_variables_set = set(required_input_variables) + if _has_at_least_one_gdp(input_variables): + # either form of GDP is acceptable, so don't check for both + # as long as one is present + return required_variables_set - set( + [input_variable_names.GDP_PPP, input_variable_names.GDP_MER] + ) + return required_variables_set + + +def _has_at_least_one_gdp(input_variables): + return ( + input_variable_names.GDP_PPP in input_variables + or input_variable_names.GDP_MER in input_variables + ) + + +def _calc_pop(input_data): + return input_data.filter(variable=input_variable_names.POPULATION) + + +def _calc_gdp(input_data): + variable = input_variable_names.GDP_PPP + if input_data.filter(variable=variable).empty: + variable = input_variable_names.GDP_MER + return input_data.filter(variable=variable) + + +def _calc_fe(input_data): + return input_data.filter(variable=input_variable_names.FINAL_ENERGY) + + +def _calc_pe(input_data): + return input_data.filter(variable=input_variable_names.PRIMARY_ENERGY) + + +def _calc_pe_ff(input_data): + input_data = input_data.copy() + input_data.add( + input_variable_names.PRIMARY_ENERGY_COAL, + input_variable_names.PRIMARY_ENERGY_OIL, + "pe_coal_oil", + append=True, + ) + return input_data.add( + input_variable_names.PRIMARY_ENERGY_GAS, + "pe_coal_oil", + kaya_variable_names.PRIMARY_ENERGY_FF, + ) + + +def _calc_nfc(input_data): + input_data = input_data.copy() + input_data.subtract( + input_variable_names.EMISSIONS_CO2_FOSSIL_FUELS_AND_INDUSTRY, + input_variable_names.EMISSIONS_CO2_INDUSTRIAL_PROCESSES, + "net_energy_emissions_with_biomass_ccs", + ignore_units="Mt CO2/yr", + append=True, + ) + return input_data.add( + input_variable_names.EMISSIONS_CO2_CCS_BIOMASS, + "net_energy_emissions_with_biomass_ccs", + kaya_variable_names.NFC, + ignore_units="Mt CO2/yr", + append=False, + ) + + +def _calc_tfc(input_data): + input_data = input_data.copy() + ccs_fossil_energy = _calc_ccs_fossil_energy(input_data) + nfc = _calc_nfc(input_data) + nfc_with_ccs_fossil_energy = nfc.append(ccs_fossil_energy) + return nfc_with_ccs_fossil_energy.add( + "ccs_fossil_energy", + kaya_variable_names.NFC, + kaya_variable_names.TFC, + ignore_units="Mt CO2/yr", + ) + + +def _calc_ccs_fossil_energy(input_data): + input_data = input_data.copy() + input_data.subtract( + input_variable_names.EMISSIONS_CO2_CCS, + input_variable_names.EMISSIONS_CO2_CCS_BIOMASS, + "ccs_fossil", + ignore_units="Mt CO2/yr", + append=True, + ) + return input_data.subtract( + "ccs_fossil", + input_variable_names.CCS_FOSSIL_INDUSTRY, + "ccs_fossil_energy", + ignore_units="Mt CO2/yr", + ) diff --git a/tests/test_feature_kaya_factors.py b/tests/test_feature_kaya_factors.py new file mode 100644 index 000000000..5baebd882 --- /dev/null +++ b/tests/test_feature_kaya_factors.py @@ -0,0 +1,147 @@ +import pandas as pd +import pytest + +from pyam import IamDataFrame +from pyam.testing import assert_iamframe_equal + +TEST_DF = IamDataFrame( + pd.DataFrame( + [ + ["Population", "million", 1000], + ["GDP|PPP", "billion USD_2005/yr", 6], + ["GDP|MER", "billion USD_2005/yr", 5], + ["Final Energy", "EJ/yr", 8], + ["Primary Energy", "EJ/yr", 10], + ["Primary Energy|Coal", "EJ/yr", 5], + ["Primary Energy|Gas", "EJ/yr", 2], + ["Primary Energy|Oil", "EJ/yr", 2], + [ + "Emissions|CO2|Fossil Fuels and Industry", + "Mt CO2/yr", + 10, + ], + ["Emissions|CO2|Industrial Processes", "Mt CO2/yr", 1], + ["Emissions|CO2|AFOLU", "Mt CO2/yr", 1], + ["Emissions|CO2|Carbon Capture and Storage", "Mt CO2/yr", 4], + ["Emissions|CO2|Carbon Capture and Storage|Biomass", "Mt CO2/yr", 1], + ["Carbon Sequestration|CCS|Fossil|Energy", "Mt CO2/yr", 2], + ["Carbon Sequestration|CCS|Fossil|Industrial Processes", "Mt CO2/yr", 1], + ["Carbon Sequestration|CCS|Biomass|Energy", "Mt CO2/yr", 0.5], + ["Carbon Sequestration|CCS|Biomass|Industrial Processes", "Mt CO2/yr", 0.5], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_a", + region="World", +) + +EXP_DF = IamDataFrame( + pd.DataFrame( + [ + ["FE/GNP", "EJ / USD / billion", 1.33333], + ["GNP/P", "USD * billion / million / a", 0.006000], + ["NFC/TFC", "", 0.833333], + ["PEDEq/FE", "", 1.250000], + ["PEFF/PEDEq", "", 0.900000], + ["TFC/PEFF", "Mt CO2/EJ", 1.333333], + ["Population", "million", 1000], + ["Total Fossil Carbon", "Mt CO2/yr", 12.0], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_a", + region="World", +) + +# can't append EXP_DF to TEST_DF because of overlapping values +# append this dataframe to create full results for comparison +EXP_DF_FOR_APPEND = IamDataFrame( + pd.DataFrame( + [ + ["FE/GNP", "EJ / USD / billion", 1.33333], + ["GNP/P", "USD * billion / million / a", 0.006000], + ["NFC/TFC", "", 0.833333], + ["PEDEq/FE", "", 1.250000], + ["PEFF/PEDEq", "", 0.900000], + ["TFC/PEFF", "Mt CO2/EJ", 1.333333], + ["Total Fossil Carbon", "Mt CO2/yr", 12.0], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_a", + region="World", +) + + +@pytest.mark.parametrize("append", (False, True)) +def test_kaya_factors(append): + """Test computing kaya factors""" + + if append: + obs = TEST_DF.copy() + obs.compute.kaya_factors(append=True) + assert_iamframe_equal(TEST_DF.append(EXP_DF_FOR_APPEND), obs) + else: + obs = TEST_DF.compute.kaya_factors() + assert_iamframe_equal(EXP_DF, obs) + + +@pytest.mark.parametrize("append", (False, True)) +def test_kaya_variables_none_when_input_variables_missing(append): + """Assert that computing kaya variables with + missing input variables returns None + """ + + if append: + obs = TEST_DF.copy() + # select subset of required input variables + (obs.filter(variable="Population").compute.kaya_factors(append=True)) + # assert that no data was added + assert_iamframe_equal(TEST_DF, obs) + else: + obs = TEST_DF.filter(variable="Population").compute.kaya_factors() + assert obs is None + + +def test_calling_kaya_factors_multiple_times(): + """Test calling the method a second time has no effect""" + + obs = TEST_DF.copy() + obs.compute.kaya_factors(append=True) + obs.compute.kaya_factors(append=True) + assert_iamframe_equal(TEST_DF.append(EXP_DF_FOR_APPEND), obs) + +def test_kaya_factors_uses_gdp_mer_fallback(): + """Test that kaya_factors uses GDP_MER when GDP_PPP is not available""" + # Create test data without GDP_PPP + df_no_gdp_ppp = TEST_DF.filter(variable="GDP|PPP", keep=False) + + # Create expected result using GDP|MER instead of GDP|PPP for calculations + exp_no_gdp_ppp = IamDataFrame( + pd.DataFrame( + [ # 8 EJ / 5 billion USD = 1.6 + ["FE/GNP", "EJ / USD / billion", 1.6], + # 5 billion USD / 1000 million = 0.005 + ["GNP/P", "USD * billion / million / a", 0.005], + ["NFC/TFC", "", 0.833333], + ["PEDEq/FE", "", 1.250000], + ["PEFF/PEDEq", "", 0.900000], + ["TFC/PEFF", "Mt CO2/EJ", 1.333333], + ["Population", "million", 1000], + ["Total Fossil Carbon", "Mt CO2/yr", 12.0], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_a", + region="World", + ) + + # Compute kaya factors + obs = df_no_gdp_ppp.compute.kaya_factors() + + # Verify results match expected + assert_iamframe_equal(exp_no_gdp_ppp, obs) diff --git a/tests/test_feature_kaya_variables.py b/tests/test_feature_kaya_variables.py new file mode 100644 index 000000000..d520cccea --- /dev/null +++ b/tests/test_feature_kaya_variables.py @@ -0,0 +1,161 @@ +import logging + +import pandas as pd +import pytest + +from pyam import IamDataFrame +from pyam.testing import assert_iamframe_equal + +TEST_DF = IamDataFrame( + pd.DataFrame( + [ + ["Population", "million", 1000], + ["GDP|PPP", "billion USD_2005/yr", 6], + ["GDP|MER", "billion USD_2005/yr", 5], + ["Final Energy", "EJ/yr", 8], + ["Primary Energy", "EJ/yr", 10], + ["Primary Energy|Coal", "EJ/yr", 5], + ["Primary Energy|Gas", "EJ/yr", 2], + ["Primary Energy|Oil", "EJ/yr", 2], + [ + "Emissions|CO2|Fossil Fuels and Industry", + "Mt CO2/yr", + 10, + ], + ["Emissions|CO2|Industrial Processes", "Mt CO2/yr", 1], + ["Emissions|CO2|AFOLU", "Mt CO2/yr", 1], + ["Emissions|CO2|Carbon Capture and Storage", "Mt CO2/yr", 4], + ["Emissions|CO2|Carbon Capture and Storage|Biomass", "Mt CO2/yr", 1], + ["Carbon Sequestration|CCS|Fossil|Energy", "Mt CO2/yr", 2], + ["Carbon Sequestration|CCS|Fossil|Industrial Processes", "Mt CO2/yr", 1], + ["Carbon Sequestration|CCS|Biomass|Energy", "Mt CO2/yr", 0.5], + ["Carbon Sequestration|CCS|Biomass|Industrial Processes", "Mt CO2/yr", 0.5], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_a", + region="World", +) + +EXP_DF = IamDataFrame( + pd.DataFrame( + [ + ["Population", "million", 1000], + ["GDP|PPP", "billion USD_2005/yr", 6], + ["Final Energy", "EJ/yr", 8.0], + ["Primary Energy", "EJ/yr", 10.0], + ["Primary Energy|Fossil", "EJ/yr", 9.0], + ["Total Fossil Carbon", "Mt CO2/yr", 12.0], + ["Net Fossil Carbon", "Mt CO2/yr", 10.0], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_a", + region="World", +) + +# can't append EXP_DF to TEST_DF because of overlapping values +# append this dataframe to create full results for comparison +EXP_DF_FOR_APPEND = IamDataFrame( + pd.DataFrame( + [ + ["Primary Energy|Fossil", "EJ/yr", 9.0], + ["Total Fossil Carbon", "Mt CO2/yr", 12.0], + ["Net Fossil Carbon", "Mt CO2/yr", 10.0], + ], + columns=["variable", "unit", 2010], + ), + model="model_a", + scenario="scen_a", + region="World", +) + + +@pytest.mark.parametrize("append", (False, True)) +def test_kaya_variables(append): + """Test computing kaya variables""" + + if append: + obs = TEST_DF.copy() + obs.compute.kaya_variables(append=True) + assert_iamframe_equal(TEST_DF.append(EXP_DF_FOR_APPEND), obs) + else: + obs = TEST_DF.compute.kaya_variables() + assert_iamframe_equal(EXP_DF, obs) + + +@pytest.mark.parametrize("append", (False, True)) +def test_kaya_variables_none_when_input_variables_missing(append): + """Assert that computing kaya variables with + missing input variables returns None + """ + + if append: + obs = TEST_DF.copy() + # select subset of required input variables + (obs.filter(variable="Population").compute.kaya_variables(append=True)) + # assert that no data was added + assert_iamframe_equal(TEST_DF, obs) + else: + obs = TEST_DF.filter(variable="Population").compute.kaya_variables() + assert obs is None + + +def test_calling_kaya_variables_multiple_times(): + """Test calling the method a second time has no effect""" + + obs = TEST_DF.copy() + obs.compute.kaya_variables(append=True) + obs.compute.kaya_variables(append=True) + assert_iamframe_equal(TEST_DF.append(EXP_DF_FOR_APPEND), obs) + + +def test_kaya_variables_logs_missing_variables(caplog): + """Test that missing variables are correctly logged""" + # Create test data with only population + df_no_pop = TEST_DF.filter(variable="Population", keep=False) + + with caplog.at_level(logging.INFO): + df_no_pop.compute.kaya_variables() + + # Check that the log message contains expected information + assert "model: model_a" in caplog.text + assert "scenario: scen_a" in caplog.text + assert "region: World" in caplog.text + + assert "Population" in caplog.text + + +def test_kaya_variables_uses_gdp_mer_fallback(): + """Test that kaya_variables uses GDP_MER when GDP_PPP is not available""" + # Create test data without GDP_PPP + df_no_gdp_ppp = TEST_DF.filter(variable="GDP|PPP", keep=False) + + # Create expected result without GDP_MER instead of GDP_PPP + exp_no_gdp_ppp = EXP_DF.filter(variable="GDP|PPP", keep=False).append( + TEST_DF.filter(variable="GDP|MER") + ) + + # Compute kaya variables + obs = df_no_gdp_ppp.compute.kaya_variables() + + # Verify results match expected + assert_iamframe_equal(exp_no_gdp_ppp, obs) + + +def test_kaya_variables_returns_none_when_no_gdp_available(): + """Test that kaya_variables returns None both + GDP_MER and GDP_PPP are unavailable""" + # Create test data without GDP_PPP + df_no_gdp = TEST_DF.filter( + variable=["GDP|PPP", "GDP|MER"], + keep=False, + ) + + # Compute kaya variables + obs = df_no_gdp.compute.kaya_variables() + + # Verify results match expected + assert obs is None