From 318815e834edd660585a1a7b7c0c055164232a81 Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Fri, 6 Mar 2026 13:11:34 +0000 Subject: [PATCH 1/4] update costing function to comply with new output format 1. Add suspended_results_folder to extract population scaling factor 2. update get_staff_count_by_facid_and_officer_type to get available_staff_count_by_facid_and_officertype --- src/scripts/costing/cost_estimation.py | 133 +++++++++++++++++++++---- 1 file changed, 115 insertions(+), 18 deletions(-) diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py index 02d0971955..119ece66fd 100644 --- a/src/scripts/costing/cost_estimation.py +++ b/src/scripts/costing/cost_estimation.py @@ -282,6 +282,7 @@ def clean_equipment_name(name: str, equipment_drop_list = None) -> str: def estimate_input_cost_of_scenarios(results_folder: Path, resourcefilepath: Path, + suspended_results_folder: Path = None, _draws: Optional[list[int]] = None, _runs: Optional[list[int]] = None, summarize: bool = False, @@ -298,6 +299,10 @@ def estimate_input_cost_of_scenarios(results_folder: Path, Path to the directory containing simulation output files. resourcefilepath : Path, optional Path to the resource files + suspended_results_folder: Path, optional + Path to the directory containing suspended simulation output files (using the suspend and resume functionality), + This is used to extract the scaling_factor to scale result to actual population size. If None, then the + 'scaling_factor' is obtained from the results_folder. _draws : list, optional Specific draws to include in the cost estimation. Defaults to all available draws. _runs : list, optional @@ -477,25 +482,68 @@ def merge_cost_and_model_data(cost_df, model_df, varnames): return merged_df # Get available staff count for each year and draw - def get_staff_count_by_facid_and_officer_type(_df: pd.Series) -> pd.Series: - """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series.""" - _df = _df.set_axis(_df['date'].dt.year).drop(columns=['date']) - _df.index.name = 'year' + def get_staff_count_by_facid_and_officer_type(_df: pd.DataFrame) -> pd.Series: + """ + Convert logged staff dictionary output into tidy format, + summing staff counts across all clinic columns. + + Returns pd.Series indexed by: + (year, FacilityID, Officer) + """ + + df = _df.copy() + df["year"] = df["date"].dt.year + df = df.drop(columns=["date"]) + + clinic_cols = df.columns.difference(["year"]) + + long_frames = [] + + for clinic in clinic_cols: + expanded = df[[clinic, "year"]].copy() + expanded = expanded[expanded[clinic].notna()] - def change_to_standard_flattened_index_format(col): - parts = col.split("_", 3) # Split by "_" only up to 3 parts - if len(parts) > 2: - return parts[0] + "=" + parts[1] + "|" + parts[2] + "=" + parts[ - 3] # Rejoin with "I" at the second occurrence - return col # If there's no second underscore, return the string as it is + expanded_dict = expanded[clinic].apply(pd.Series) + expanded_dict["year"] = expanded["year"].values - _df.columns = [change_to_standard_flattened_index_format(col) for col in _df.columns] + long_frames.append(expanded_dict) - return unflatten_flattened_multi_index_in_logging(_df).stack(level=[0, 1]) # expanded flattened axis + # Combine all clinics + combined = pd.concat(long_frames, ignore_index=True) + + # Melt to long format + long_df = ( + combined + .melt(id_vars=["year"], + var_name="facility_officer", + value_name="count") + .dropna(subset=["count"]) + ) + + # Split FacilityID and Officer + parts = long_df["facility_officer"].str.split("_Officer_", expand=True) + + long_df["FacilityID"] = ( + parts[0] + .str.replace("FacilityID_", "", regex=False) + .astype(int) + ) + long_df["Officer"] = parts[1] + + # SUM ACROSS CLINICS HERE + result = ( + long_df + .groupby(["year", "FacilityID", "Officer"])["count"] + .sum() + .sort_index() + ) + + return result # Staff count by Facility ID available_staff_count_by_facid_and_officertype = extract_results( Path(results_folder), + suspended_results_folder=suspended_results_folder, module='tlo.methods.healthsystem.summary', key='number_of_hcw_staff', custom_generate_series=get_staff_count_by_facid_and_officer_type, @@ -525,16 +573,64 @@ def change_to_standard_flattened_index_format(col): # Get list of cadres which were utilised in each run to get the count of staff used in the simulation # Note that we still cost the full staff count for any cadre-Facility_Level combination that was ever used in a run, and # not the amount of time which was used - def get_capacity_used_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Series: - """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series.""" - _df = _df.set_axis(_df['date'].dt.year).drop(columns=['date']) - _df.index.name = 'year' - return unflatten_flattened_multi_index_in_logging(_df).stack(level=[0, 1]) # expanded flattened axis + def get_capacity_used_by_officer_type_and_facility_level( + _df: pd.DataFrame + ) -> pd.Series: + """ + Parse logging output and return a Series indexed by: + (year, OfficerType, FacilityLevel) + + Collapses (sums) across clinics. + Uses facility_id_levels_dict to map FacilityID → FacilityLevel. + """ + + # ---- 1. Set year index ---- + _df = _df.set_axis(_df["date"].dt.year).drop(columns=["date"]) + _df.index.name = "year" + + # ---- 2. Unflatten logging columns ---- + _df = unflatten_flattened_multi_index_in_logging(_df) + + # Expect columns like: + # ('Clinic', 'facID_and_officer') + + col_df = _df.columns.to_frame(index=False) + + # ---- 3. Extract OfficerType ---- + col_df["OfficerType"] = ( + col_df["facID_and_officer"] + .str.split("_Officer_") + .str[-1] + ) + + # ---- 4. Extract FacilityID ---- + col_df["FacilityID"] = ( + col_df["facID_and_officer"] + .str.split("_Officer_") + .str[0] + .str.replace("FacilityID_", "", regex=False) + .astype(int) + ) + + # ---- 5. Map to FacilityLevel ---- + col_df["FacilityLevel"] = col_df["FacilityID"].map(facility_id_levels_dict) + + # ---- 6. Rebuild MultiIndex (drop clinic level) ---- + _df.columns = pd.MultiIndex.from_frame( + col_df[["OfficerType", "FacilityLevel"]] + ) + + # ---- 7. Collapse across clinics ---- + _df = _df.groupby(level=["OfficerType", "FacilityLevel"], axis=1).sum() + + # ---- 8. Return stacked format ---- + return _df.stack(["OfficerType", "FacilityLevel"]) annual_capacity_used_by_cadre_and_level = extract_results( Path(results_folder), + suspended_results_folder=suspended_results_folder, module='tlo.methods.healthsystem.summary', - key='Capacity_By_OfficerType_And_FacilityLevel', + key='Capacity_By_FacID_and_Officer', custom_generate_series=get_capacity_used_by_officer_type_and_facility_level, do_scaling=False, ) @@ -741,6 +837,7 @@ def get_counts_of_items_requested(_df): cons_req = extract_results( results_folder, + suspended_results_folder=suspended_results_folder, module='tlo.methods.healthsystem.summary', key='Consumables', custom_generate_series=get_counts_of_items_requested, From 0dcbb2c3226b2d2872d86f5fbacea73d20eebc0a Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Fri, 6 Mar 2026 13:25:49 +0000 Subject: [PATCH 2/4] update extract_results function to use suspended_results_folder --- src/tlo/analysis/utils.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 6b4d2cbf9b..073a6d1ad9 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -295,6 +295,7 @@ def extract_results(results_folder: Path, index: str = None, custom_generate_series=None, do_scaling: bool = False, + suspended_results_folder: Path = None, ) -> pd.DataFrame: """Utility function to unpack results. @@ -307,16 +308,18 @@ def extract_results(results_folder: Path, `custom_generate_series`. Optionally, with `do_scaling=True`, each element is multiplied by the scaling_factor recorded in the simulation. + If the suspend-and-resume functionality is used, scaling factor may be avaialble in the folder where the log of the suspended run are stored. Note that if runs in the batch have failed (such that logs have not been generated), these are dropped silently. """ - def get_multiplier(_draw, _run): + + def get_multiplier(results_folder, _draw, _run): """Helper function to get the multiplier from the simulation. Note that if the scaling factor cannot be found a `KeyError` is thrown.""" return load_pickled_dataframes( - results_folder, _draw, _run, 'tlo.methods.population' - )['tlo.methods.population']['scaling_factor']['scaling_factor'].values[0] + results_folder, _draw, _run, 'tlo.methods.demography' + )['tlo.methods.demography']['scaling_factor']['scaling_factor'].values[0] if custom_generate_series is None: # If there is no `custom_generate_series` provided, it implies that function required selects the specified @@ -352,7 +355,10 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series: 'Custom command does not generate a pd.Series' ) if do_scaling: - res[draw_run] = output_from_eval * get_multiplier(draw, run) + if suspended_results_folder is not None: + res[draw_run] = output_from_eval * get_multiplier(suspended_results_folder, 0, 0) + else: + res[draw_run] = output_from_eval * get_multiplier(results_folder, draw, run) else: res[draw_run] = output_from_eval @@ -386,7 +392,7 @@ def check_info_value_changes(df): prev_info = row["Info"] return problems - + def remove_events_for_individual_after_death(df): rows_to_drop = [] @@ -430,8 +436,8 @@ def reconstruct_individual_histories(df): if len(problems)>0: print("Values didn't change but were still detected") print(problems) - - + + return df_final From 84172201c77b422824d836911b2a497ede8136fb Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Fri, 6 Mar 2026 13:46:06 +0000 Subject: [PATCH 3/4] linting --- src/scripts/costing/cost_estimation.py | 142 ++++++++++++++++--------- src/tlo/analysis/utils.py | 3 +- 2 files changed, 91 insertions(+), 54 deletions(-) diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py index 119ece66fd..5cc3f5e5cd 100644 --- a/src/scripts/costing/cost_estimation.py +++ b/src/scripts/costing/cost_estimation.py @@ -156,7 +156,8 @@ def get_discount_factor(year): # Compute the cumulative discount factor as the product of (1 + discount_rate) for all previous years discount_factor = 1 for y in range(_initial_year + 1, - year + 1): # only starting from initial year + 1 as the discount factor for initial year should be 1 + year + 1): # only starting from initial year + 1 as the discount factor for initial year + # should be 1 discount_factor *= (1 + _discount_rate.get(y, 0)) # Default to 0 if year not in dictionary return discount_factor else: @@ -321,8 +322,10 @@ def estimate_input_cost_of_scenarios(results_folder: Path, Returns: ------- pd.DataFrame - A dataframe containing discounted costs disaggregated by category, sub-category, category-specific subgroup, year, draw, and run. - Note that if a discount rate is used, the dataframe will provide cost as the NPV during the first year of the dataframe + A dataframe containing discounted costs disaggregated by category, sub-category, category-specific subgroup, + year, draw, and run. + Note that if a discount rate is used, the dataframe will provide cost as the NPV during the first year of the + dataframe """ # Useful common functions @@ -359,7 +362,8 @@ def melt_model_output_draws_and_runs(_df, id_vars): facility_id_levels_dict = dict(zip(mfl['Facility_ID'], mfl['Facility_Level'])) fac_levels = set(mfl.Facility_Level) - # If variable discount rate is provided, use the average across the relevant years for the purpose of annuitization of HR and equipment costs + # If variable discount rate is provided, use the average across the relevant years for the purpose of annuitization + # of HR and equipment costs def calculate_annuitization_rate(_discount_rate, _years): if isinstance(_discount_rate, (int, float)): # Single discount rate, return as is @@ -567,12 +571,13 @@ def get_staff_count_by_facid_and_officer_type(_df: pd.DataFrame) -> pd.Series: 'Facility_Level'].astype(str) # make sure facility level is stored as string available_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.drop( available_staff_count_by_level_and_officer_type[available_staff_count_by_level_and_officer_type[ - 'Facility_Level'] == '5'].index) # drop headquarters because we're only concerned with staff engaged in service delivery + 'Facility_Level'] == '5'].index) # drop headquarters + # because we're only concerned with staff engaged in service delivery available_staff_count_by_level_and_officer_type.rename(columns={'value': 'staff_count'}, inplace=True) # Get list of cadres which were utilised in each run to get the count of staff used in the simulation - # Note that we still cost the full staff count for any cadre-Facility_Level combination that was ever used in a run, and - # not the amount of time which was used + # Note that we still cost the full staff count for any cadre-Facility_Level combination that was ever used in a run, + # and not the amount of time which was used def get_capacity_used_by_officer_type_and_facility_level( _df: pd.DataFrame ) -> pd.Series: @@ -648,7 +653,10 @@ def get_capacity_used_by_officer_type_and_facility_level( average_capacity_used_by_cadre_and_level[average_capacity_used_by_cadre_and_level['capacity_used'] != 0][ ['OfficerType', 'FacilityLevel', 'draw', 'run']] print( - f"Out of {average_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).size().count()} cadre and level combinations available, {list_of_cadre_and_level_combinations_used.groupby(['OfficerType', 'FacilityLevel']).size().count()} are used across the simulations") + f"Out of {average_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).size().count()} " + f"cadre and level combinations available, " + f"{list_of_cadre_and_level_combinations_used.groupby(['OfficerType', 'FacilityLevel']).size().count()} " + f"are used across the simulations") list_of_cadre_and_level_combinations_used = list_of_cadre_and_level_combinations_used.rename( columns={'FacilityLevel': 'Facility_Level'}) @@ -660,11 +668,13 @@ def get_capacity_used_by_officer_type_and_facility_level( if (cost_only_used_staff): print( - "The input for 'cost_only_used_staff' implies that only cadre-level combinations which have been used in the run are costed") + "The input for 'cost_only_used_staff' implies that only cadre-level combinations which have been used in " + "the run are costed") staff_size_chosen_for_costing = used_staff_count_by_level_and_officer_type else: print( - "The input for 'cost_only_used_staff' implies that all staff are costed regardless of the cadre-level combinations which have been used in the run are costed") + "The input for 'cost_only_used_staff' implies that all staff are costed regardless of the cadre-level " + "combinations which have been used in the run are costed") staff_size_chosen_for_costing = available_staff_count_by_level_and_officer_type # Calculate various components of HR cost @@ -703,7 +713,8 @@ def calculate_npv_past_training_expenses_by_row(row, r=_discount_rate): if partial_year > 0: npv += annual_cost * partial_year * (1 + r) ** (1 + r) - # Add recruitment cost assuming this happens during the partial year or the year after graduation if partial year == 0 + # Add recruitment cost assuming this happens during the partial year or the year after graduation if + # partial year == 0 npv += row['recruitment_cost_per_person_recruited_usd'] * (1 + r) return npv @@ -715,34 +726,33 @@ def calculate_npv_past_training_expenses_by_row(row, r=_discount_rate): npv_values.append(npv) preservice_training_cost['npv_of_training_and_recruitment_cost'] = npv_values - preservice_training_cost['npv_of_training_and_recruitment_cost_per_recruit'] = preservice_training_cost[ - 'npv_of_training_and_recruitment_cost'] * \ - (1 / (preservice_training_cost[ - 'absorption_rate_of_students_into_public_workforce'] + - preservice_training_cost[ - 'proportion_of_workforce_recruited_from_abroad'])) * \ - (1 / preservice_training_cost[ - 'graduation_rate']) * (1 / - preservice_training_cost[ - 'licensure_exam_passing_rate']) - if _discount_rate == 0: # if the discount rate is 0, then the pre-service + recruitment cost simply needs to be divided by the number of years in tenure + preservice_training_cost['npv_of_training_and_recruitment_cost_per_recruit'] \ + = (preservice_training_cost['npv_of_training_and_recruitment_cost'] * + (1 / (preservice_training_cost['absorption_rate_of_students_into_public_workforce'] + + preservice_training_cost['proportion_of_workforce_recruited_from_abroad'])) * + (1 / preservice_training_cost['graduation_rate']) * + (1 /preservice_training_cost['licensure_exam_passing_rate'])) + if _discount_rate == 0: # if the discount rate is 0, then the pre-service + recruitment cost simply + # needs to be divided by the number of years in tenure preservice_training_cost['annuitisation_rate'] = preservice_training_cost[ 'average_length_of_tenure_in_the_public_sector'] else: preservice_training_cost['annuitisation_rate'] = 1 + (1 - (1 + annuitization_rate) ** ( -preservice_training_cost[ 'average_length_of_tenure_in_the_public_sector'] + 1)) / annuitization_rate - preservice_training_cost['annuitised_training_and_recruitment_cost_per_recruit'] = preservice_training_cost[ - 'npv_of_training_and_recruitment_cost_per_recruit'] / \ - preservice_training_cost[ - 'annuitisation_rate'] - - # Cost per student trained * 1/Rate of absorption from the local and foreign graduates * 1/Graduation rate * attrition rate - # the inverse of attrition rate is the average expected tenure; and the preservice training cost needs to be divided by the average tenure + preservice_training_cost['annuitised_training_and_recruitment_cost_per_recruit'] = \ + (preservice_training_cost['npv_of_training_and_recruitment_cost_per_recruit'] / + preservice_training_cost['annuitisation_rate']) + + # Cost per student trained * 1/Rate of absorption from the local and foreign graduates + # * 1/Graduation rate * attrition rate + # the inverse of attrition rate is the average expected tenure; and the preservice training cost needs to + # be divided by the average tenure preservice_training_cost['cost'] = preservice_training_cost[ 'annuitised_training_and_recruitment_cost_per_recruit'] * \ preservice_training_cost['staff_count'] * preservice_training_cost[ - 'annual_attrition_rate'] # not multiplied with attrition rate again because this is already factored into 'Annual_cost_per_staff_recruited' + 'annual_attrition_rate'] # not multiplied with attrition rate again + # because this is already factored into 'Annual_cost_per_staff_recruited' preservice_training_cost = preservice_training_cost[ ['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'cost']] @@ -776,7 +786,8 @@ def label_rows_of_cost_dataframe(_df, label_var, label): # Initialize HR with the salary data if (cost_only_used_staff): human_resource_costs = retain_relevant_column_subset( - label_rows_of_cost_dataframe(salary_for_staff, 'cost_subcategory', 'salary_for_cadres_used'), 'OfficerType') + label_rows_of_cost_dataframe(salary_for_staff, 'cost_subcategory', 'salary_for_cadres_used'), + 'OfficerType') # Concatenate additional cost categories additional_costs = [ (preservice_training_cost, 'preservice_training_and_recruitment_cost_for_attrited_workers'), @@ -785,7 +796,8 @@ def label_rows_of_cost_dataframe(_df, label_var, label): ] else: human_resource_costs = retain_relevant_column_subset( - label_rows_of_cost_dataframe(salary_for_staff, 'cost_subcategory', 'salary_for_all_staff'), 'OfficerType') + label_rows_of_cost_dataframe(salary_for_staff, 'cost_subcategory', 'salary_for_all_staff'), + 'OfficerType') # Concatenate additional cost categories additional_costs = [ (preservice_training_cost, 'preservice_training_and_recruitment_cost_for_attrited_workers'), @@ -859,7 +871,8 @@ def get_counts_of_items_requested(_df): # 2.1 Cost of consumables dispensed # --------------------------------------------------------------------------------------------------------------- # Multiply number of items needed by cost of consumable - # consumables_dispensed.columns = consumables_dispensed.columns.get_level_values(0).str() + "_" + consumables_dispensed.columns.get_level_values(1) # Flatten multi-level columns for pandas merge + # consumables_dispensed.columns = consumables_dispensed.columns.get_level_values(0).str() + "_" + + # consumables_dispensed.columns.get_level_values(1) # Flatten multi-level columns for pandas merge unit_costs['consumables'].columns = pd.MultiIndex.from_arrays( [unit_costs['consumables'].columns, [''] * len(unit_costs['consumables'].columns)]) cost_of_consumables_dispensed = consumables_dispensed.merge(unit_costs['consumables'], on=idx['Item_Code'], @@ -893,8 +906,20 @@ def get_counts_of_items_requested(_df): left_on='Item_Code', right_on='item_code', validate='m:1', how='left') + # Identify rows where excess_stock_proportion_of_dispensed is NaN + missing_excess_stock = ( + cost_of_excess_consumables_stocked + ['excess_stock_proportion_of_dispensed'] + .isna() + ) + + # Fill missing values with the average inflow-to-outflow ratio minus 1 + fill_value = average_inflow_to_outflow_ratio_ratio - 1 + cost_of_excess_consumables_stocked.loc[ - cost_of_excess_consumables_stocked.excess_stock_proportion_of_dispensed.isna(), 'excess_stock_proportion_of_dispensed'] = average_inflow_to_outflow_ratio_ratio - 1 # TODO disaggregate the average by program + missing_excess_stock, + 'excess_stock_proportion_of_dispensed' + ] = fill_value # TODO: disaggregate the average by program cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[ quantity_columns].multiply(cost_of_excess_consumables_stocked[idx[price_column]], axis=0) cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[ @@ -912,7 +937,8 @@ def get_counts_of_items_requested(_df): def melt_and_label_consumables_cost(_df, label): multi_index = pd.MultiIndex.from_tuples(_df.columns) _df.columns = multi_index - # Select 'Item_Code', 'year', and all columns where both levels of the MultiIndex are numeric (these are the (draw,run) columns with cost values) + # Select 'Item_Code', 'year', and all columns where both levels of the MultiIndex are numeric + # (these are the (draw,run) columns with cost values) selected_columns = [col for col in _df.columns if (col[0] in ['Item_Code', 'year']) or (isinstance(col[0], int) and isinstance(col[1], int))] _df = _df[selected_columns] # Subset the dataframe with the selected columns @@ -924,13 +950,14 @@ def melt_and_label_consumables_cost(_df, label): melted_df['consumable'] = melted_df['Item_Code'].map(consumables_dict) melted_df['cost_subcategory'] = label melted_df[ - 'Facility_Level'] = 'all' # TODO this is temporary until 'tlo.methods.healthsystem.summary' only logs consumable at the aggregate level + 'Facility_Level'] = 'all' + # TODO this is temporary until 'tlo.methods.healthsystem.summary' only logs consumable at the aggregate level melted_df = melted_df.rename(columns={'value': 'cost'}) return melted_df def disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df, _consumables_dict, - # This is a dictionary mapping codes to names + # This is a dictionary mapping codes to names list_of_unique_medical_products): reversed_consumables_dict = {value: key for key, value in _consumables_dict.items()} # reverse dictionary to map names to codes @@ -946,24 +973,26 @@ def disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df, columns='item_code') separately_managed_medical_supplies = [127, 141, 161] # Oxygen, Blood, IRS - cost_of_consumables_dispensed, cost_of_separately_managed_medical_supplies_dispensed = disaggregate_separately_managed_medical_supplies_from_consumable_costs( + cost_of_consumables_dispensed, cost_of_separately_managed_medical_supplies_dispensed = ( + disaggregate_separately_managed_medical_supplies_from_consumable_costs( _df=retain_relevant_column_subset( melt_and_label_consumables_cost(cost_of_consumables_dispensed, 'cost_of_consumables_dispensed'), 'consumable'), _consumables_dict=consumables_dict, - list_of_unique_medical_products=separately_managed_medical_supplies) - cost_of_excess_consumables_stocked, cost_of_separately_managed_medical_supplies_excess_stock = disaggregate_separately_managed_medical_supplies_from_consumable_costs( + list_of_unique_medical_products=separately_managed_medical_supplies)) + cost_of_excess_consumables_stocked, cost_of_separately_managed_medical_supplies_excess_stock = ( + disaggregate_separately_managed_medical_supplies_from_consumable_costs( _df=retain_relevant_column_subset( melt_and_label_consumables_cost(cost_of_excess_consumables_stocked, 'cost_of_excess_consumables_stocked'), 'consumable'), _consumables_dict=consumables_dict, - list_of_unique_medical_products=separately_managed_medical_supplies) + list_of_unique_medical_products=separately_managed_medical_supplies)) consumable_costs = pd.concat([cost_of_consumables_dispensed, cost_of_excess_consumables_stocked]) # 2.4 Supply chain costs # --------------------------------------------------------------------------------------------------------------- - # Assume that the cost of procurement, warehousing and distribution is a fixed proportion of consumable purchase costs + # Assume that the cost of procurement,warehousing and distribution is a fixed proportion of consumable purchase # The fixed proportion is based on Resource Mapping Expenditure data from 2018 resource_mapping_data = unit_costs['actual_expenditure_data'] # Make sure values are numeric @@ -1019,7 +1048,8 @@ def disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df, # -------------------------------------------- print("Now estimating Medical equipment costs...") - # Total cost of equipment required as per SEL (HSSP-III) only at facility IDs where it has been used in the simulation + # Total cost of equipment required as per SEL (HSSP-III) only at facility IDs where it has been used in the + # simulation # Get list of equipment used in the simulation by district and level def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series: """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series.""" @@ -1084,7 +1114,8 @@ def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series: on=['District', 'Facility_Level'], how='left') equipment_df.loc[equipment_df.Facility_Count.isna(), 'Facility_Count'] = 0 - # Because levels 1b and 2 are collapsed together, we assume that the same equipment is used by level 1b as that recorded for level 2 + # Because levels 1b and 2 are collapsed together, we assume that the same equipment is used by level 1b as + # that recorded for level 2 def update_itemuse_for_level1b_using_level2_data(_df): # Create a list of District and Item_code combinations for which use == True list_of_equipment_used_at_level2 = \ @@ -1140,7 +1171,8 @@ def update_itemuse_for_level1b_using_level2_data(_df): # Assume that the annual costs are constant each year of the simulation equipment_costs = pd.concat([equipment_costs.assign(year=year) for year in years]) - # TODO If the logger is updated to include year, we may wish to calculate equipment costs by year - currently we assume the same annuitised equipment cost each year + # TODO If the logger is updated to include year, we may wish to calculate equipment costs by year + # (currently we assume the same annuitised equipment cost each year) equipment_costs = equipment_costs.reset_index(drop=True) equipment_costs = equipment_costs.rename(columns={'Equipment_tlo': 'Equipment'}) equipment_costs = prepare_cost_dataframe(equipment_costs, _category_specific_group='Equipment', @@ -1234,8 +1266,8 @@ def update_itemuse_for_level1b_using_level2_data(_df): # Define a function to summarize cost data from -# Note that the dataframe needs to have draw as index and run as columns. if the dataframe is long with draw and run as index, then -# first unstack the dataframe and subsequently apply the summarize function +# Note that the dataframe needs to have draw as index and run as columns. if the dataframe is long with draw and run as +# index, then first unstack the dataframe and subsequently apply the summarize function def summarize_cost_data(_df, _metric: Literal['mean', 'median'] = 'mean') -> pd.DataFrame: """ @@ -1291,8 +1323,8 @@ def estimate_projected_health_spending(resourcefilepath: Path, """ Estimate total projected health spending for a simulation period. - Combines health spending per capita projections (Dieleman et al, 2019) with simulated population estimates to calculate - total health expenditure, optionally applying a discount rate and summarizing across runs. + Combines health spending per capita projections (Dieleman et al, 2019) with simulated population estimates to + calculate total health expenditure, optionally applying a discount rate and summarizing across runs. Parameters: ---------- @@ -1521,7 +1553,8 @@ def do_stacked_bar_plot_of_cost_by_category(_df: pd.DataFrame, if (_disaggregate_by_subgroup is True): for name, df in dfs.items(): dfs[name] = df.copy() # Choose the dataframe to modify - # If sub-groups are more than 10 in number, then disaggregate the top 10 and group the rest into an 'other' category + # If sub-groups are more than 10 in number, then disaggregate the top 10 and group the rest into an + # 'other' category if (len(dfs[name]['cost_subgroup'].unique()) > 10): # Calculate total cost per subgroup subgroup_totals = dfs[name].groupby('cost_subgroup')['cost'].sum() @@ -1967,7 +2000,8 @@ def wrap_text(text, width=15): if (len(_df['cost_subgroup'].unique()) > 10): # Step 2: Group all other consumables into "Other" other_cost = _df.iloc[10:]["cost"].sum() - top_10 = pd.concat([top_10, pd.DataFrame([{"cost_subgroup": "Other", "cost": other_cost}])], ignore_index=True) + top_10 = pd.concat([top_10, pd.DataFrame([{"cost_subgroup": "Other", "cost": other_cost}])], + ignore_index=True) # Prepare data for the treemap total_cost = top_10["cost"].sum() @@ -2112,7 +2146,8 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health: # Initialize an empty DataFrame to store values for each 'run' all_run_values = pd.DataFrame() - # Create an array of implementation costs ranging from 0 to the max value of max ability to pay for the current draw + # Create an array of implementation costs ranging from 0 to the max value of max ability to pay for the current + # draw implementation_costs = np.linspace(0, max_ability_to_pay_for_implementation.loc[draw_index].max(), 50) # Add fixed values for ROI ratio calculation additional_costs = np.array([1_000_000_000, 3_000_000_000]) @@ -2218,7 +2253,8 @@ def generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health: # Replace specific x-ticks with % of health spending values if _projected_health_spending: xtick_labels[ - 1] = f'{xticks[1]:,.0f}\n({xticks[1] / (_projected_health_spending / 1e6) :.2%} of \n projected total \n health spend)' + 1] = (f'{xticks[1]:,.0f}\n({xticks[1] / (_projected_health_spending / 1e6) :.2%} of \n projected total ' + f'\n health spend)') for i, tick in enumerate(xticks): if (i != 0) & (i != 1): # Replace for 4000 xtick_labels[i] = f'{tick:,.0f}\n({tick / (_projected_health_spending / 1e6) :.2%})' diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 073a6d1ad9..487f965ea9 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -308,7 +308,8 @@ def extract_results(results_folder: Path, `custom_generate_series`. Optionally, with `do_scaling=True`, each element is multiplied by the scaling_factor recorded in the simulation. - If the suspend-and-resume functionality is used, scaling factor may be avaialble in the folder where the log of the suspended run are stored. + If the suspend-and-resume functionality is used, scaling factor may be avaialble in the folder where the log + of the suspended run are stored. Note that if runs in the batch have failed (such that logs have not been generated), these are dropped silently. """ From 3b908a9755a9616a659cad057bc545f22a6cd68a Mon Sep 17 00:00:00 2001 From: sakshimohan Date: Sat, 7 Mar 2026 18:42:04 +0000 Subject: [PATCH 4/4] allow cost estimation function to work without specifying suspended_results_folder - if suspend and resume is not used --- src/scripts/costing/cost_estimation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py index 5cc3f5e5cd..594c2d1d01 100644 --- a/src/scripts/costing/cost_estimation.py +++ b/src/scripts/costing/cost_estimation.py @@ -351,6 +351,8 @@ def melt_model_output_draws_and_runs(_df, id_vars): _draws = range(0, info['number_of_draws']) if _runs is None: _runs = range(0, info['runs_per_draw']) + if suspended_results_folder is None: + suspended_results_folder = results_folder # Load cost input files # ------------------------