diff --git a/resources/ResourceFile_LabourSkilledBirthAttendance/parameter_values.csv b/resources/ResourceFile_LabourSkilledBirthAttendance/parameter_values.csv index 5b52cf7bf6..aaa1c377bd 100644 --- a/resources/ResourceFile_LabourSkilledBirthAttendance/parameter_values.csv +++ b/resources/ResourceFile_LabourSkilledBirthAttendance/parameter_values.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:572e3e4f91cdbf0aa74360233e0a8478dfb70aa545aa477c112ee1175ca1597d +oid sha256:8aa212d272c7ba2e8e9126b49628353c5b139ce8281c5f5b14f39261b2edf59a size 10058 diff --git a/resources/ResourceFile_PregnancySupervisor/parameter_values.csv b/resources/ResourceFile_PregnancySupervisor/parameter_values.csv index ae262158ad..d93823531f 100644 --- a/resources/ResourceFile_PregnancySupervisor/parameter_values.csv +++ b/resources/ResourceFile_PregnancySupervisor/parameter_values.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b4d6e0147ff710cc5eedd4882ec5db84ecdca4f76938617cace8e44a12d4f9d5 +oid sha256:b7932d13ac16abf7a60182e9d220e26280fb766b02f10f7ffc3ee9ff7d567309 size 13738 diff --git a/resources/costing/ResourceFile_Consumables_Inflow_Outflow_Ratio.csv b/resources/costing/ResourceFile_Consumables_Inflow_Outflow_Ratio.csv new file mode 100644 index 0000000000..fd978bc7be --- /dev/null +++ b/resources/costing/ResourceFile_Consumables_Inflow_Outflow_Ratio.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c207f3cf23b7e622829b8e58273baf94e768ab8d8abea8c7fd3d74fa9c854f9 +size 486428 diff --git a/resources/costing/ResourceFile_Costing.xlsx b/resources/costing/ResourceFile_Costing.xlsx index 9f06132aaa..873865ee90 100644 --- a/resources/costing/ResourceFile_Costing.xlsx +++ b/resources/costing/ResourceFile_Costing.xlsx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2afa3649672e10c9741b26dc70fa0f4496af4fccfafbf8b7b70f3b90b291a4fb -size 1007463 +oid sha256:0799f6e699265ccda880b9bb193ce7414368256ec2dfc4e7837cd43d3e5e3aba +size 4307463 diff --git a/resources/costing/ResourceFile_Costing_Consumables.csv b/resources/costing/ResourceFile_Costing_Consumables.csv new file mode 100644 index 0000000000..dfa61f63e2 --- /dev/null +++ b/resources/costing/ResourceFile_Costing_Consumables.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fca1834c38c657dbbd53616e3972b8909882faa1767a22f11349ed2bbcbca183 +size 26791 diff --git a/resources/costing/ResourceFile_Costing_Equipment.csv b/resources/costing/ResourceFile_Costing_Equipment.csv new file mode 100644 index 0000000000..d0570dc3bb --- /dev/null +++ b/resources/costing/ResourceFile_Costing_Equipment.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e3d3624de86fffd3110bbeb906a21396f17f5a166be5f1c811e8f83f40f9c6e +size 41099 diff --git a/resources/costing/ResourceFile_Costing_Facility_Operations.csv b/resources/costing/ResourceFile_Costing_Facility_Operations.csv new file mode 100644 index 0000000000..2ab6811816 --- /dev/null +++ b/resources/costing/ResourceFile_Costing_Facility_Operations.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297dedcc993e20ea6d9f2c1b52f673c87a8a53d3a9561ecc0c70851163d70f69 +size 746 diff --git a/resources/costing/ResourceFile_Costing_HR.csv b/resources/costing/ResourceFile_Costing_HR.csv new file mode 100644 index 0000000000..c081fa29d3 --- /dev/null +++ b/resources/costing/ResourceFile_Costing_HR.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc7a4df6caf64a3fe1f6ecf1918657a6fdc19d13acc2bf14a87692602c0607c9 +size 64534 diff --git a/resources/costing/ResourceFile_Exchange_Rates_and_Inflation.csv b/resources/costing/ResourceFile_Exchange_Rates_and_Inflation.csv new file mode 100644 index 0000000000..4e03769c5b --- /dev/null +++ b/resources/costing/ResourceFile_Exchange_Rates_and_Inflation.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd430087a6ec5e50ae26e6c1e0a286bee063cf3762d0588a0cd3c28a855218a9 +size 990 diff --git a/resources/costing/ResourceFile_Health_Spending_Projections.csv b/resources/costing/ResourceFile_Health_Spending_Projections.csv new file mode 100644 index 0000000000..d27053b435 --- /dev/null +++ b/resources/costing/ResourceFile_Health_Spending_Projections.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecf6dc7abecd47af8d06c8754f8b9fe780c2f5de493b8c3d9442c447bcc6fe4c +size 6308 diff --git a/resources/costing/ResourceFile_Resource_Mapping.csv b/resources/costing/ResourceFile_Resource_Mapping.csv new file mode 100644 index 0000000000..af4d932186 --- /dev/null +++ b/resources/costing/ResourceFile_Resource_Mapping.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26461a60cc15796c2bd3be47ced6255e1bac58d8c52cfb4d12eeb681545f367e +size 12281 diff --git a/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv b/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv index 8c670a914d..1d84953282 100644 --- a/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv +++ b/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1cb38ba76c5673855e2e17e28ad1d36b5cec07d5d7872a7d6bc8aafde0f7009 -size 828 +oid sha256:ff1308ae425e4ac96aadac05b753c0f0f9fe5103a34848b93feb856d7c4e123b +size 896 diff --git a/resources/healthsystem/organisation/ResourceFile_Master_Facilities_List.csv b/resources/healthsystem/organisation/ResourceFile_Master_Facilities_List.csv index 9468bcf080..5ebedf3aab 100644 --- a/resources/healthsystem/organisation/ResourceFile_Master_Facilities_List.csv +++ b/resources/healthsystem/organisation/ResourceFile_Master_Facilities_List.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73bfb5a34b54939cbaf842feceef1013c6309d3b1e0a5ef27ffc84577ac3519e -size 6602 +oid sha256:c6df4a42409b22d0b10d56ec077f6f4b5ccbed0f16f570fedbfd397e100063a9 +size 8471 diff --git a/resources/service integration/parameter_values.csv b/resources/service integration/parameter_values.csv new file mode 100644 index 0000000000..37399c995f --- /dev/null +++ b/resources/service integration/parameter_values.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d34f163280db13af098f2a69cf79a821d62f13b6891c4597535a9aa1616128ce +size 75 diff --git a/src/scripts/costing/cost_analysis_hss_elements.py b/src/scripts/costing/cost_analysis_hss_elements.py new file mode 100644 index 0000000000..2ea426bd5c --- /dev/null +++ b/src/scripts/costing/cost_analysis_hss_elements.py @@ -0,0 +1,512 @@ +import argparse +from pathlib import Path +from tlo import Date +from collections import Counter, defaultdict + +import calendar +import datetime +import os +import textwrap + +import matplotlib.pyplot as plt +from matplotlib.ticker import FuncFormatter +import numpy as np +import pandas as pd +import ast +import math + +from tlo.analysis.utils import ( + extract_params, + extract_results, + get_scenario_info, + get_scenario_outputs, + load_pickled_dataframes, + make_age_grp_lookup, + make_age_grp_types, + summarize, + create_pickles_locally, + parse_log_file, + unflatten_flattened_multi_index_in_logging +) + +from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios, + summarize_cost_data, + apply_discounting_to_cost_data, + do_stacked_bar_plot_of_cost_by_category, + do_line_plot_of_cost, + # generate_roi_plots, + generate_multiple_scenarios_roi_plot) + +# Define a timestamp for script outputs +timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M") + +# Print the start time of the script +print('Script Start', datetime.datetime.now().strftime('%H:%M')) + +# Create folders to store results +resourcefilepath = Path("./resources") +outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk') +figurespath = Path('./outputs/global_fund_roi_analysis/hss_elements/') +if not os.path.exists(figurespath): + os.makedirs(figurespath) +roi_outputs_folder_gf = Path(figurespath / 'gf/roi') +if not os.path.exists(roi_outputs_folder_gf): + os.makedirs(roi_outputs_folder_gf) +roi_outputs_folder_fcdo = Path(figurespath / 'fcdo/roi') +if not os.path.exists(roi_outputs_folder_fcdo): + os.makedirs(roi_outputs_folder_fcdo) + +# Load result files +# ------------------------------------------------------------------------------------------------------------------ +results_folder = get_scenario_outputs('hss_elements-2024-10-22T163857Z.py', outputfilepath)[0] + +# Check can read results from draw=0, run=0 +log = load_pickled_dataframes(results_folder, 0, 0) # look at one log (so can decide what to extract) +params = extract_params(results_folder) + +# Declare default parameters for cost analysis +# ------------------------------------------------------------------------------------------------------------------ +# Period relevant for costing +TARGET_PERIOD_INTERVENTION = (Date(2025, 1, 1), Date(2035, 12, 31)) # This is the period that is costed +relevant_period_for_costing = [i.year for i in TARGET_PERIOD_INTERVENTION] +list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1)) + +# Scenarios +hss_scenarios = {0: "Baseline", 1: "HRH Moderate Scale-up (1%)", 2: "HRH Scale-up Following Historical Growth", 3: "HRH Accelerated Scale-up (6%)", + 4: "Increase Capacity at Primary Care Levels", 5: "Increase Capacity of CHW", 6: "Consumables Increased to 75th Percentile", + 7: "Consumables Available at HIV levels", 8: "Consumables Available at EPI levels", 9: "Perfect Consumables Availability", + 10: "HSS PACKAGE: Perfect", 11: "HSS PACKAGE: Realistic expansion, no change in HSB", 12: "HSS PACKAGE: Realistic expansion"} +hs_scenarios_substitutedict_fcdo = {0:"0", 1: "A", 2: "B", 3: "C", +4: "D", 5: "5", 6: "E", +7: "F", 8: "G", 9: "H", +10: "I", 11: "11", 12: "J"} +hs_scenarios_substitutedict_gf = {0:"0", 1: "A", 2: "B", 3: "C", +4: "D", 5: "E", 6: "F", +7: "G", 8: "H", 9: "9", +10: "10", 11: "11", 12: "I"} +hss_scenarios_for_fcdo_report = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 12] +hss_scenarios_for_gf_report = [0, 1, 2, 3, 4, 5, 6, 7, 8, 12] +color_map = { + 'Baseline': '#a50026', + 'HRH Moderate Scale-up (1%)': '#d73027', + 'HRH Scale-up Following Historical Growth': '#f46d43', + 'HRH Accelerated Scale-up (6%)': '#fdae61', + 'Increase Capacity at Primary Care Levels': '#fee08b', + 'Increase Capacity of CHW': '#ffffbf', + 'Consumables Increased to 75th Percentile': '#d9ef8b', + 'Consumables Available at HIV levels': '#a6d96a', + 'Consumables Available at EPI levels': '#66bd63', + 'Perfect Consumables Availability': '#1a9850', + 'HSS PACKAGE: Perfect': '#3288bd', + 'HSS PACKAGE: Realistic expansion': '#5e4fa2' +} + +# Cost-effectiveness threshold +chosen_cet = 199.620811947318 # This is based on the estimate from Lomas et al (2023)- $160.595987085533 in 2019 USD coverted to 2023 USD +# based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is $77.4 in 2023 USD terms +chosen_value_of_statistical_life = 834 + +# Discount rate +discount_rate = 0.03 + +# Define a function to create bar plots +def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False): + """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the + extent of the error bar.""" + + # Calculate y-error bars + yerr = np.array([ + (_df['mean'] - _df['lower']).values, + (_df['upper'] - _df['mean']).values, + ]) + + # Map xticks based on the hss_scenarios dictionary + xticks = {index: hss_scenarios.get(index, f"Scenario {index}") for index in _df.index} + + # Retrieve colors from color_map based on the xticks labels + colors = [color_map.get(label, '#333333') for label in xticks.values()] # default to grey if not found + + # Generate consecutive x positions for the bars, ensuring no gaps + x_positions = np.arange(len(xticks)) # Consecutive integers for each bar position + + fig, ax = plt.subplots() + ax.bar( + x_positions, + _df['mean'].values, + yerr=yerr, + color=colors, # Set bar colors + alpha=1, + ecolor='black', + capsize=10, + ) + + # Add optional annotations above each bar + if annotations: + for xpos, ypos, text in zip(x_positions, _df['upper'].values, annotations): + ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize=8) + + # Set x-tick labels with wrapped text if required + wrapped_labs = ["\n".join(textwrap.wrap(label, 25)) for label in xticks.values()] + ax.set_xticks(x_positions) # Set x-ticks to consecutive positions + ax.set_xticklabels(wrapped_labs, rotation=45 if not xticklabels_horizontal_and_wrapped else 0, ha='right', + fontsize=8) + + # Set y-axis limit to upper max + 500 + ax.set_ylim(_df['lower'].min()*1.25, _df['upper'].max()*1.25) + + # Set font size for y-tick labels and grid + ax.tick_params(axis='y', labelsize=9) + ax.tick_params(axis='x', labelsize=9) + + ax.grid(axis="y") + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + fig.tight_layout() + + return fig, ax + +def do_standard_bar_plot_with_ci(_df, set_colors=None, annotations=None, + xticklabels_horizontal_and_wrapped=False, + put_labels_in_legend=True, + offset=1e6): + """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the + extent of the error bar.""" + + substitute_labels = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + + yerr = np.array([ + (_df['mean'] - _df['lower']).values, + (_df['upper'] - _df['mean']).values, + ]) +# TODO should be above be 'median' + xticks = {(i + 0.5): k for i, k in enumerate(_df.index)} + + if set_colors: + colors = [color_map.get(series, 'grey') for series in _df.index] + else: + cmap = sns.color_palette('Spectral', as_cmap=True) + rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y)) # noqa: E731 + colors = list(map(cmap, rescale(np.array(list(xticks.keys()))))) if put_labels_in_legend else None + + fig, ax = plt.subplots(figsize=(10, 5)) + ax.bar( + xticks.keys(), + _df['mean'].values, + yerr=yerr, + ecolor='black', + color=colors, + capsize=10, + label=xticks.values() + ) + + if annotations: + for xpos, (ypos, text) in zip(xticks.keys(), zip(_df['upper'].values.flatten(), annotations)): + annotation_y = ypos + offset + + ax.text( + xpos, + annotation_y, + '\n'.join(text.split(' ', 1)), + horizontalalignment='center', + verticalalignment='bottom', # Aligns text at the bottom of the annotation position + fontsize='x-small', + rotation='horizontal' + ) + + ax.set_xticks(list(xticks.keys())) + + if put_labels_in_legend: + # Update xticks label with substitute labels + # Insert legend with updated labels that shows correspondence between substitute label and original label + xtick_values = [letter for letter, label in zip(substitute_labels, xticks.values())] + xtick_legend = [f'{letter}: {label}' for letter, label in zip(substitute_labels, xticks.values())] + h, legs = ax.get_legend_handles_labels() + ax.legend(h, xtick_legend, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5)) + ax.set_xticklabels(list(xtick_values)) + else: + if not xticklabels_horizontal_and_wrapped: + # xticklabels will be vertical and not wrapped + ax.set_xticklabels(list(xticks.values()), rotation=90) + else: + wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()] + ax.set_xticklabels(wrapped_labs) + + ax.grid(axis="y") + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + fig.tight_layout(pad=2.0) + plt.subplots_adjust(left=0.15, right=0.85) # Adjust left and right margins + + return fig, ax + +# Estimate standard input costs of scenario +# ----------------------------------------------------------------------------------------------------------------------- +input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, + _years=list_of_relevant_years_for_costing, cost_only_used_staff=True, + _discount_rate = discount_rate) +# _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs +# TODO Remove the manual fix below once the logging for these is corrected +input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost'] = \ + input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost']/10 +input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost'] =\ + input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost']/7 +#input_costs = apply_discounting_to_cost_data(input_costs, _discount_rate = discount_rate) + +# %% +# Return on Invesment analysis +# Calculate incremental cost +# ----------------------------------------------------------------------------------------------------------------------- +# Aggregate input costs for further analysis +input_costs_subset = input_costs[ + (input_costs['year'] >= relevant_period_for_costing[0]) & (input_costs['year'] <= relevant_period_for_costing[1])] +# TODO the above step may not longer be needed +total_input_cost = input_costs_subset.groupby(['draw', 'run'])['cost'].sum() +total_input_cost_summarized = summarize_cost_data(total_input_cost.unstack(level='run')) +def find_difference_relative_to_comparison(_ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return _ser \ + .unstack(level=0) \ + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ + .drop(columns=([comparison] if drop_comparison else [])) \ + .stack() + + +incremental_scenario_cost = (pd.DataFrame( + find_difference_relative_to_comparison( + total_input_cost, + comparison=0) # sets the comparator to 0 which is the Actual scenario +).T.iloc[0].unstack()).T + +# Monetary value of health impact +# ----------------------------------------------------------------------------------------------------------------------- +def get_num_dalys(_df): + """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD). + Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using + results from runs that crashed mid-way through the simulation. + """ + years_needed = relevant_period_for_costing # [i.year for i in TARGET_PERIOD_INTERVENTION] + assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded." + _df = _df.loc[_df.year.between(*years_needed)].drop(columns=['date', 'sex', 'age_range']).groupby('year').sum().sum(axis = 1) + + # Initial year and discount rate + initial_year = min(_df.index.unique()) + + # Calculate the discounted values + discounted_values = _df / (1 + discount_rate) ** (_df.index - initial_year) + + return pd.Series(discounted_values.sum()) + +num_dalys = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys, + do_scaling=True +) + +# Get absolute DALYs averted +num_dalys_averted = (-1.0 * + pd.DataFrame( + find_difference_relative_to_comparison( + num_dalys.loc[0], + comparison=0) # sets the comparator to 0 which is the Actual scenario + ).T.iloc[0].unstack(level='run')) +num_dalys_averted_fcdo_scenarios = num_dalys_averted[ + num_dalys_averted.index.get_level_values(0).isin(hss_scenarios_for_fcdo_report)] +num_dalys_averted_gf_scenarios = num_dalys_averted[ + num_dalys_averted.index.get_level_values(0).isin(hss_scenarios_for_gf_report)] + +# The monetary value of the health benefit is delta health times CET (negative values are set to 0) +def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_of_life_year): + monetary_value_of_incremental_health = (_num_dalys_averted * _chosen_value_of_life_year).clip(lower=0.0) + return monetary_value_of_incremental_health + +# TODO check that the above calculation is correct + +# 3. Return on Investment Plot +# ---------------------------------------------------- +# FCDO +# Combined ROI plot of relevant scenarios +generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life), + _incremental_input_cost=incremental_scenario_cost, + _draws = [1,2,3,4], + _scenario_dict = hss_scenarios, + _outputfilepath=roi_outputs_folder_fcdo, + _value_of_life_suffix = 'HR_VSL') + +# Combined ROI plot of relevant scenarios +generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life), + _incremental_input_cost=incremental_scenario_cost, + _draws = [6,7,8,9], + _scenario_dict = hss_scenarios, + _outputfilepath=roi_outputs_folder_fcdo, + _value_of_life_suffix = 'Consumables_VSL') + +# Combined ROI plot of relevant scenarios +generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life), + _incremental_input_cost=incremental_scenario_cost, + _draws = [10,12], + _scenario_dict = hss_scenarios, + _outputfilepath=roi_outputs_folder_fcdo, + _value_of_life_suffix = 'HSS_VSL') + +# Global Fund +# Combined ROI plot of relevant scenarios +generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life), + _incremental_input_cost=incremental_scenario_cost, + _draws = [1,2,3,4,5], + _scenario_dict = hss_scenarios, + _outputfilepath=roi_outputs_folder_gf, + _value_of_life_suffix = 'HR_VSL') + +# Combined ROI plot of relevant scenarios +generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life), + _incremental_input_cost=incremental_scenario_cost, + _draws = [6,7,8], + _scenario_dict = hss_scenarios, + _outputfilepath=roi_outputs_folder_gf, + _value_of_life_suffix = 'Consumables_VSL') + +# Combined ROI plot of relevant scenarios +generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life), + _incremental_input_cost=incremental_scenario_cost, + _draws = [12], + _scenario_dict = hss_scenarios, + _outputfilepath=roi_outputs_folder_gf, + _value_of_life_suffix = 'HSS_VSL') + +# 4. Plot Maximum ability-to-pay at CET +# ---------------------------------------------------- +max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_cet) - incremental_scenario_cost).clip( + lower=0.0) # monetary value - change in costs +max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation) +max_ability_to_pay_for_implementation_summarized_fcdo = max_ability_to_pay_for_implementation_summarized[ + max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(hss_scenarios_for_fcdo_report)] +max_ability_to_pay_for_implementation_summarized_gf = max_ability_to_pay_for_implementation_summarized[ + max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(hss_scenarios_for_gf_report)] + +# FCDO +# Plot Maximum ability to pay +name_of_plot = f'Maximum ability to pay at CET, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}' +fig, ax = do_standard_bar_plot_with_ci( + (max_ability_to_pay_for_implementation_summarized_fcdo / 1e6), + annotations=[ + f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})" + for _, row in max_ability_to_pay_for_implementation_summarized_fcdo.iterrows() + ], + xticklabels_horizontal_and_wrapped=False, +) +ax.set_title(name_of_plot) +ax.set_ylabel('Maximum ability to pay \n(Millions)') +fig.tight_layout() +fig.savefig(roi_outputs_folder_fcdo / name_of_plot.replace(' ', '_').replace(',', '')) +plt.close(fig) + +# Global Fund +# Plot Maximum ability to pay +name_of_plot = f'Maximum ability to pay at CET, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}' +fig, ax = do_standard_bar_plot_with_ci( + (max_ability_to_pay_for_implementation_summarized_gf / 1e6), + annotations=[ + f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})" + for _, row in max_ability_to_pay_for_implementation_summarized_gf.iterrows() + ], + xticklabels_horizontal_and_wrapped=False, +) +ax.set_title(name_of_plot) +ax.set_ylabel('Maximum ability to pay \n(Millions)') +fig.tight_layout() +fig.savefig(roi_outputs_folder_gf / name_of_plot.replace(' ', '_').replace(',', '')) +plt.close(fig) + +# Plot incremental costs +incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost) +# Keep only scenarios of interest +incremental_scenario_cost_summarized_fcdo = incremental_scenario_cost_summarized[ + incremental_scenario_cost_summarized.index.get_level_values(0).isin(hss_scenarios_for_fcdo_report)] +incremental_scenario_cost_summarized_gf = incremental_scenario_cost_summarized[ + incremental_scenario_cost_summarized.index.get_level_values(0).isin(hss_scenarios_for_gf_report)] + +# FCDO +name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}' +fig, ax = do_standard_bar_plot_with_ci( + (incremental_scenario_cost_summarized_fcdo / 1e6), + annotations=[ + f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}- \n {round(row['upper'] / 1e6, 1)})" + for _, row in incremental_scenario_cost_summarized_fcdo.iterrows() + ], + xticklabels_horizontal_and_wrapped=False, +) +ax.set_title(name_of_plot) +ax.set_ylabel('Cost \n(USD Millions)') +fig.tight_layout() +fig.savefig(roi_outputs_folder_fcdo / name_of_plot.replace(' ', '_').replace(',', '')) +plt.close(fig) + +# Global Fund +name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}' +fig, ax = do_standard_bar_plot_with_ci( + (incremental_scenario_cost_summarized_gf / 1e6), + annotations=[ + f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}- \n {round(row['upper'] / 1e6, 1)})" + for _, row in incremental_scenario_cost_summarized_gf.iterrows() + ], + xticklabels_horizontal_and_wrapped=False, +) +ax.set_title(name_of_plot) +ax.set_ylabel('Cost \n(USD Millions)') +fig.tight_layout() +fig.savefig(roi_outputs_folder_gf / name_of_plot.replace(' ', '_').replace(',', '')) +plt.close(fig) + +# 4. Plot costs +# ---------------------------------------------------- +# FCDO +input_costs_for_plot = input_costs[input_costs.draw.isin(hss_scenarios_for_fcdo_report)] +# First summarize all input costs +input_costs_for_plot_summarized = input_costs_for_plot.groupby(['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category']).agg( + mean=('cost', 'mean'), + lower=('cost', lambda x: x.quantile(0.025)), + upper=('cost', lambda x: x.quantile(0.975)) +).reset_index() +input_costs_for_plot_summarized = input_costs_for_plot_summarized.melt( + id_vars=['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category'], + value_vars=['mean', 'lower', 'upper'], + var_name='stat', + value_name='cost' +) + +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = Path(figurespath / 'fcdo'), _scenario_dict = hs_scenarios_substitutedict_fcdo) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025], _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_fcdo) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_fcdo) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_fcdo) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_fcdo) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_fcdo) + +# Global Fund +input_costs_for_plot = input_costs[input_costs.draw.isin(hss_scenarios_for_gf_report)] +# First summarize all input costs +input_costs_for_plot_summarized = input_costs_for_plot.groupby(['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category']).agg( + mean=('cost', 'mean'), + lower=('cost', lambda x: x.quantile(0.025)), + upper=('cost', lambda x: x.quantile(0.975)) +).reset_index() +input_costs_for_plot_summarized = input_costs_for_plot_summarized.melt( + id_vars=['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category'], + value_vars=['mean', 'lower', 'upper'], + var_name='stat', + value_name='cost' +) + +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _disaggregate_by_subgroup = False, _outputfilepath = Path(figurespath / 'gf'), _scenario_dict = hs_scenarios_substitutedict_gf) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025], _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_gf) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_gf) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_gf) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_gf) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = hs_scenarios_substitutedict_gf) diff --git a/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py new file mode 100644 index 0000000000..2f164748e1 --- /dev/null +++ b/src/scripts/costing/cost_analysis_htm_with_and_without_hss.py @@ -0,0 +1,604 @@ +import argparse +from pathlib import Path +from tlo import Date +from collections import Counter, defaultdict + +import calendar +import datetime +import os +import textwrap + +import matplotlib.pyplot as plt +# import seaborn as sns +from matplotlib.ticker import FuncFormatter +import numpy as np +import pandas as pd +import ast +import math + +from tlo.analysis.utils import ( + extract_params, + extract_results, + get_scenario_info, + get_scenario_outputs, + load_pickled_dataframes, + make_age_grp_lookup, + make_age_grp_types, + summarize, + create_pickles_locally, + parse_log_file, + unflatten_flattened_multi_index_in_logging +) + +from src.scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios, + summarize_cost_data, + apply_discounting_to_cost_data, + do_stacked_bar_plot_of_cost_by_category, + do_line_plot_of_cost, + # generate_roi_plots, + generate_multiple_scenarios_roi_plot) + +# Define a timestamp for script outputs +timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M") + +# Print the start time of the script +print('Script Start', datetime.datetime.now().strftime('%H:%M')) + +# Create folders to store results +resourcefilepath = Path("./resources") +outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk') +figurespath = Path('./outputs/global_fund_roi_analysis/htm_with_and_without_hss') +if not os.path.exists(figurespath): + os.makedirs(figurespath) +roi_outputs_folder = Path(figurespath / 'roi') +if not os.path.exists(roi_outputs_folder): + os.makedirs(roi_outputs_folder) + +# Load result files +#------------------------------------------------------------------------------------------------------------------ +results_folder = get_scenario_outputs('htm_with_and_without_hss-2024-10-22T163743Z.py', outputfilepath)[0] + +# Check can read results from draw=0, run=0 +log = load_pickled_dataframes(results_folder, 0, 0) # look at one log (so can decide what to extract) +params = extract_params(results_folder) + +# Declare default parameters for cost analysis +#------------------------------------------------------------------------------------------------------------------ +# Population scaling factor for malaria scale-up projections +population_scaling_factor = log['tlo.methods.demography']['scaling_factor']['scaling_factor'].iloc[0] +# Load the list of districts and their IDs +district_dict = pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')[ + ['District_Num', 'District']].drop_duplicates() +district_dict = dict(zip(district_dict['District_Num'], district_dict['District'])) + +# Period relevant for costing +TARGET_PERIOD_INTERVENTION = (Date(2025, 1, 1), Date(2035, 12, 31)) # This is the period that is costed +relevant_period_for_costing = [i.year for i in TARGET_PERIOD_INTERVENTION] +list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1)) + +# Scenarios +htm_scenarios = {0:"Baseline", 1: "HSS PACKAGE: Perfect", 2: "HSS PACKAGE: Realistic", 3: "HIV Programs Scale-up WITHOUT HSS PACKAGE", +4: "HIV Programs Scale-up WITH FULL HSS PACKAGE", 5: "HIV Programs Scale-up WITH REALISTIC HSS PACKAGE", 6: "TB Programs Scale-up WITHOUT HSS PACKAGE", +7: "TB Programs Scale-up WITH FULL HSS PACKAGE", 8: "TB Programs Scale-up WITH REALISTIC HSS PACKAGE", 9: "Malaria Programs Scale-up WITHOUT HSS PACKAGE", +10: "Malaria Programs Scale-up WITH FULL HSS PACKAGE", 11: "Malaria Programs Scale-up WITH REALISTIC HSS PACKAGE", 12: "HTM Programs Scale-up WITHOUT HSS PACKAGE", +13: "HTM Programs Scale-up WITH FULL HSS PACKAGE", 14: "HTM Programs Scale-up WITH REALISTIC HSS PACKAGE", 15: "HTM Programs Scale-up WITH SUPPLY CHAINS", 16: "HTM Programs Scale-up WITH HRH"} + +htm_scenarios_substitutedict_fcdo = {0:"0", 1: "1", 2: "A", 3: "B", +4: "4", 5: "C", 6: "D", +7: "7", 8: "E", 9: "F", +10: "10", 11: "G", 12: "H", +13: "13", 14: "I", 15: "J", 16: "K"} + +# Subset of scenarios included in analysis +htm_scenarios_for_gf_report = [0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 16] + +color_map = { + 'Baseline': '#9e0142', + 'HSS PACKAGE: Realistic': '#d8434e', + 'HIV Programs Scale-up WITHOUT HSS PACKAGE': '#f36b48', + 'HIV Programs Scale-up WITH REALISTIC HSS PACKAGE': '#fca45c', + 'TB Programs Scale-up WITHOUT HSS PACKAGE': '#fddc89', + 'TB Programs Scale-up WITH REALISTIC HSS PACKAGE': '#e7f7a0', + 'Malaria Programs Scale-up WITHOUT HSS PACKAGE': '#a5dc97', + 'Malaria Programs Scale-up WITH REALISTIC HSS PACKAGE': '#6dc0a6', + 'HTM Programs Scale-up WITHOUT HSS PACKAGE': '#438fba', + 'HTM Programs Scale-up WITH REALISTIC HSS PACKAGE': '#5e4fa2', + 'HTM Programs Scale-up WITH SUPPLY CHAINS': '#3c71aa', + 'HTM Programs Scale-up WITH HRH': '#2f6094', +} + +# Cost-effectiveness threshold +chosen_cet = 199.620811947318 # This is based on the estimate from Lomas et al (2023)- $160.595987085533 in 2019 USD coverted to 2023 USD +# based on Ochalek et al (2018) - the paper provided the value $61 in 2016 USD terms, this value is $77.4 in 2023 USD terms +chosen_value_of_statistical_life = 834 + +# Discount rate +discount_rate = 0.03 + +# Define a function to create bar plots +def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False): + """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the + extent of the error bar.""" + + # Calculate y-error bars + yerr = np.array([ + (_df['mean'] - _df['lower']).values, + (_df['upper'] - _df['mean']).values, + ]) + + # Map xticks based on the hss_scenarios dictionary + xticks = {index: htm_scenarios.get(index, f"Scenario {index}") for index in _df.index} + + # Retrieve colors from color_map based on the xticks labels + colors = [color_map.get(label, '#333333') for label in xticks.values()] # default to grey if not found + + # Generate consecutive x positions for the bars, ensuring no gaps + x_positions = np.arange(len(xticks)) # Consecutive integers for each bar position + + fig, ax = plt.subplots() + ax.bar( + x_positions, + _df['mean'].values, + yerr=yerr, + color=colors, # Set bar colors + alpha=1, + ecolor='black', + capsize=10, + ) + + # Add optional annotations above each bar + if annotations: + for xpos, ypos, text in zip(x_positions, _df['upper'].values, annotations): + ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize=8) + + # Set x-tick labels with wrapped text if required + wrapped_labs = ["\n".join(textwrap.wrap(label,30)) for label in xticks.values()] + ax.set_xticks(x_positions) # Set x-ticks to consecutive positions + ax.set_xticklabels(wrapped_labs, rotation=45 if not xticklabels_horizontal_and_wrapped else 0, ha='right', + fontsize=7) + + # Set y-axis limit to upper max + 500 + ax.set_ylim(_df['lower'].min()*1.25, _df['upper'].max()*1.25) + + # Set font size for y-tick labels and grid + ax.tick_params(axis='y', labelsize=9) + ax.tick_params(axis='x', labelsize=9) + + ax.grid(axis="y") + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + fig.tight_layout() + + return fig, ax + +def do_standard_bar_plot_with_ci(_df, set_colors=None, annotations=None, + xticklabels_horizontal_and_wrapped=False, + put_labels_in_legend=True, + offset=1e6): + """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the + extent of the error bar.""" + + substitute_labels = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + + yerr = np.array([ + (_df['mean'] - _df['lower']).values, + (_df['upper'] - _df['mean']).values, + ]) +# TODO should be above be 'median' + xticks = {(i + 0.5): k for i, k in enumerate(_df.index)} + + if set_colors: + colors = [color_map.get(series, 'grey') for series in _df.index] + else: + cmap = sns.color_palette('Spectral', as_cmap=True) + rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y)) # noqa: E731 + colors = list(map(cmap, rescale(np.array(list(xticks.keys()))))) if put_labels_in_legend else None + + fig, ax = plt.subplots(figsize=(10, 5)) + ax.bar( + xticks.keys(), + _df['mean'].values, + yerr=yerr, + ecolor='black', + color=colors, + capsize=10, + label=xticks.values() + ) + + if annotations: + for xpos, (ypos, text) in zip(xticks.keys(), zip(_df['upper'].values.flatten(), annotations)): + annotation_y = ypos + offset + + ax.text( + xpos, + annotation_y, + '\n'.join(text.split(' ', 1)), + horizontalalignment='center', + verticalalignment='bottom', # Aligns text at the bottom of the annotation position + fontsize='x-small', + rotation='horizontal' + ) + + ax.set_xticks(list(xticks.keys())) + + if put_labels_in_legend: + # Update xticks label with substitute labels + # Insert legend with updated labels that shows correspondence between substitute label and original label + xtick_values = [letter for letter, label in zip(substitute_labels, xticks.values())] + xtick_legend = [f'{letter}: {label}' for letter, label in zip(substitute_labels, xticks.values())] + h, legs = ax.get_legend_handles_labels() + ax.legend(h, xtick_legend, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5)) + ax.set_xticklabels(list(xtick_values)) + else: + if not xticklabels_horizontal_and_wrapped: + # xticklabels will be vertical and not wrapped + ax.set_xticklabels(list(xticks.values()), rotation=90) + else: + wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()] + ax.set_xticklabels(wrapped_labs) + + ax.grid(axis="y") + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + fig.tight_layout(pad=2.0) + plt.subplots_adjust(left=0.15, right=0.85) # Adjust left and right margins + + return fig, ax + +# Estimate standard input costs of scenario +#----------------------------------------------------------------------------------------------------------------------- +input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, + _years=list_of_relevant_years_for_costing, cost_only_used_staff=True, + _discount_rate = discount_rate) +# _draws = htm_scenarios_for_gf_report --> this subset is created after calculating malaria scale up costs +# TODO Remove the manual fix below once the logging for these is corrected +input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost'] = \ + input_costs.loc[input_costs.cost_subgroup == 'Oxygen, 1000 liters, primarily with oxygen cylinders', 'cost']/10 +input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost'] =\ + input_costs.loc[input_costs.cost_subgroup == 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly', 'cost']/7 +#input_costs = apply_discounting_to_cost_data(input_costs, _discount_rate = discount_rate) + +# Add additional costs pertaining to simulation (Only for scenarios with Malaria scale-up) +#----------------------------------------------------------------------------------------------------------------------- +# Extract supply chain cost as a proportion of consumable costs to apply to malaria scale-up commodities +# Load primary costing resourcefile +workbook_cost = pd.read_excel((resourcefilepath / "costing/ResourceFile_Costing.xlsx"), + sheet_name=None) +# Read parameters for consumables costs +# Load consumables cost data +unit_price_consumable = workbook_cost["consumables"] +unit_price_consumable = unit_price_consumable.rename(columns=unit_price_consumable.iloc[0]) +unit_price_consumable = unit_price_consumable[['Item_Code', 'Final_price_per_chosen_unit (USD, 2023)']].reset_index( + drop=True).iloc[1:] +unit_price_consumable = unit_price_consumable[unit_price_consumable['Item_Code'].notna()] + +# Assume that the cost of procurement, warehousing and distribution is a fixed proportion of consumable purchase costs +# The fixed proportion is based on Resource Mapping Expenditure data from 2018 +resource_mapping_data = workbook_cost["resource_mapping_r7_summary"] +# Make sure values are numeric +expenditure_column = ['EXPENDITURE (USD) (Jul 2018 - Jun 2019)'] +resource_mapping_data[expenditure_column] = resource_mapping_data[expenditure_column].apply( + lambda x: pd.to_numeric(x, errors='coerce')) +supply_chain_expenditure = \ +resource_mapping_data[resource_mapping_data['Cost Type'] == 'Supply Chain'][expenditure_column].sum()[0] +consumables_purchase_expenditure = \ +resource_mapping_data[resource_mapping_data['Cost Type'] == 'Drugs and Commodities'][expenditure_column].sum()[0] + \ +resource_mapping_data[resource_mapping_data['Cost Type'] == 'HIV Drugs and Commodities'][expenditure_column].sum()[0] +supply_chain_cost_proportion = supply_chain_expenditure / consumables_purchase_expenditure + +# In this case malaria intervention scale-up costs were not included in the standard estimate_input_cost_of_scenarios function +list_of_draws_with_malaria_scaleup_parameters = params[(params.module_param == 'Malaria:scaleup_start_year')] +list_of_draws_with_malaria_scaleup_parameters.loc[:,'value'] = pd.to_numeric(list_of_draws_with_malaria_scaleup_parameters['value']) +list_of_draws_with_malaria_scaleup_implemented_in_costing_period = list_of_draws_with_malaria_scaleup_parameters[(list_of_draws_with_malaria_scaleup_parameters['value'] < max(relevant_period_for_costing))].index.to_list() + +# 1. IRS costs +irs_coverage_rate = 0.8 +districts_with_irs_scaleup = ['Kasungu', 'Mchinji', 'Lilongwe', 'Lilongwe City', 'Dowa', 'Ntchisi', 'Salima', 'Mangochi', + 'Mwanza', 'Likoma', 'Nkhotakota'] +# Convert above list of district names to numeric district identifiers +district_keys_with_irs_scaleup = [key for key, name in district_dict.items() if name in districts_with_irs_scaleup] +TARGET_PERIOD_MALARIA_SCALEUP = (Date(2024, 1, 1), Date(2035, 12, 31)) + +# Get population by district +def get_total_population_by_district(_df): + years_needed = [i.year for i in TARGET_PERIOD_MALARIA_SCALEUP] # we only consider the population for the malaria scale-up period + # because those are the years relevant for malaria scale-up costing + _df['year'] = pd.to_datetime(_df['date']).dt.year + assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded." + _df = pd.melt(_df.drop(columns = 'date'), id_vars = ['year']).rename(columns = {'variable': 'district'}) + return pd.Series( + data=_df + .loc[_df.year.between(*years_needed)] + .set_index(['year', 'district'])['value'] + ) + +district_population_by_year = extract_results( + results_folder, + module='tlo.methods.malaria', + key='pop_district', + custom_generate_series=get_total_population_by_district, + do_scaling=True +) + +def get_number_of_people_covered_by_malaria_scaleup(_df, list_of_districts_covered = None, draws_included = None): + _df = pd.DataFrame(_df) + # Reset the index to make 'district' a column + _df = _df.reset_index() + # Convert the 'district' column to numeric values + _df['district'] = pd.to_numeric(_df['district'], errors='coerce') + _df = _df.set_index(['year', 'district']) + if list_of_districts_covered is not None: + _df.loc[~_df.index.get_level_values('district').isin(list_of_districts_covered), :] = 0 + if draws_included is not None: + _df.loc[:, ~_df.columns.get_level_values('draw').isin(draws_included)] = 0 + return _df + +district_population_covered_by_irs_scaleup_by_year = get_number_of_people_covered_by_malaria_scaleup(district_population_by_year, + list_of_districts_covered=district_keys_with_irs_scaleup, + draws_included = list_of_draws_with_malaria_scaleup_implemented_in_costing_period) + +irs_cost_per_person = unit_price_consumable[unit_price_consumable.Item_Code == 161]['Final_price_per_chosen_unit (USD, 2023)'] +# The above unit cost already includes implementation - project management (17%), personnel (6%), vehicles (10%), equipment (6%), monitoring and evaluation (3%), training (3%), +# other commodities (3%) and buildings (2%) from Alonso et al (2021) +irs_multiplication_factor = irs_cost_per_person * irs_coverage_rate +total_irs_cost = irs_multiplication_factor.iloc[0] * district_population_covered_by_irs_scaleup_by_year # for districts and scenarios included +total_irs_cost = total_irs_cost.groupby(level='year').sum() + +# 2. Bednet costs +bednet_coverage_rate = 0.7 +# We can assume 3-year lifespan of a bednet, each bednet covering 1.8 people. +unit_cost_of_bednet = unit_price_consumable[unit_price_consumable.Item_Code == 160]['Final_price_per_chosen_unit (USD, 2023)'] * (1 + supply_chain_cost_proportion) +# We add supply chain costs (procurement + distribution + warehousing) because the unit_cost does not include this +annual_bednet_cost_per_person = unit_cost_of_bednet / 1.8 / 3 +bednet_multiplication_factor = bednet_coverage_rate * annual_bednet_cost_per_person + +district_population_covered_by_bednet_scaleup_by_year = get_number_of_people_covered_by_malaria_scaleup(district_population_by_year, + draws_included = list_of_draws_with_malaria_scaleup_implemented_in_costing_period) # All districts covered + +total_bednet_cost = bednet_multiplication_factor.iloc[0] * district_population_covered_by_bednet_scaleup_by_year # for scenarios included +total_bednet_cost = total_bednet_cost.groupby(level='year').sum() + +# Malaria scale-up costs - TOTAL +malaria_scaleup_costs = [ + (total_irs_cost.reset_index(), 'cost_of_IRS_scaleup'), + (total_bednet_cost.reset_index(), 'cost_of_bednet_scaleup'), +] +def melt_and_label_malaria_scaleup_cost(_df, label): + multi_index = pd.MultiIndex.from_tuples(_df.columns) + _df.columns = multi_index + + # reshape dataframe and assign 'draw' and 'run' as the correct column headers + melted_df = pd.melt(_df, id_vars=['year']).rename(columns={'variable_0': 'draw', 'variable_1': 'run'}) + # Replace item_code with consumable_name_tlo + melted_df['cost_subcategory'] = label + melted_df['cost_category'] = 'other' + melted_df['cost_subgroup'] = 'NA' + melted_df['Facility_Level'] = 'all' + melted_df = melted_df.rename(columns={'value': 'cost'}) + return melted_df + +# Iterate through additional costs, melt and concatenate +for df, label in malaria_scaleup_costs: + new_df = melt_and_label_malaria_scaleup_cost(df, label) + input_costs = pd.concat([input_costs, new_df], ignore_index=True) + +# TODO Reduce the cost of Oxygen and Depo-medroxy temporarily which we figure out the issue with this +# Extract input_costs for browsing +input_costs.groupby(['draw', 'run', 'cost_category', 'cost_subcategory', 'cost_subgroup','year'])['cost'].sum().to_csv(figurespath / 'cost_detailed.csv') + +# %% +# Return on Invesment analysis +# Calculate incremental cost +# ----------------------------------------------------------------------------------------------------------------------- +# Aggregate input costs for further analysis +input_costs_subset = input_costs[ + (input_costs['year'] >= relevant_period_for_costing[0]) & (input_costs['year'] <= relevant_period_for_costing[1])] +# TODO the above step may not longer be needed +total_input_cost = input_costs_subset.groupby(['draw', 'run'])['cost'].sum() +total_input_cost_summarized = summarize_cost_data(total_input_cost.unstack(level='run')) +def find_difference_relative_to_comparison(_ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return _ser \ + .unstack(level=0) \ + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ + .drop(columns=([comparison] if drop_comparison else [])) \ + .stack() + + +incremental_scenario_cost = (pd.DataFrame( + find_difference_relative_to_comparison( + total_input_cost, + comparison=0) # sets the comparator to 0 which is the Actual scenario +).T.iloc[0].unstack()).T + +# Keep only scenarios of interest +incremental_scenario_cost = incremental_scenario_cost[ + incremental_scenario_cost.index.get_level_values(0).isin(htm_scenarios_for_gf_report)] + +# Monetary value of health impact +# ----------------------------------------------------------------------------------------------------------------------- +def get_num_dalys(_df): + """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD). + Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using + results from runs that crashed mid-way through the simulation. + """ + years_needed = relevant_period_for_costing # [i.year for i in TARGET_PERIOD_INTERVENTION] + assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded." + _df = _df.loc[_df.year.between(*years_needed)].drop(columns=['date', 'sex', 'age_range']).groupby('year').sum().sum(axis = 1) + + # Initial year and discount rate + initial_year = min(_df.index.unique()) + + # Calculate the discounted values + discounted_values = _df / (1 + discount_rate) ** (_df.index - initial_year) + + return pd.Series(discounted_values.sum()) + +num_dalys = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys, + do_scaling=True +) + +# Get absolute DALYs averted +num_dalys_averted = (-1.0 * + pd.DataFrame( + find_difference_relative_to_comparison( + num_dalys.loc[0], + comparison=0) # sets the comparator to 0 which is the Actual scenario + ).T.iloc[0].unstack(level='run')) +num_dalys_averted = num_dalys_averted[num_dalys_averted.index.get_level_values(0).isin(htm_scenarios_for_gf_report)] + +# The monetary value of the health benefit is delta health times CET (negative values are set to 0) +def get_monetary_value_of_incremental_health(_num_dalys_averted, _chosen_value_of_life_year): + monetary_value_of_incremental_health = (_num_dalys_averted * _chosen_value_of_life_year).clip(lower=0.0) + return monetary_value_of_incremental_health + +# TODO check that the above calculation is correct + +# 3. Return on Investment Plot +# ---------------------------------------------------- +# Plot ROI at various levels of cost +generate_roi_plots(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_cet), + _incremental_input_cost=incremental_scenario_cost, + _scenario_dict = htm_scenarios, + _outputfilepath=roi_outputs_folder, + _value_of_life_suffix = 'CET') + +generate_roi_plots(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life), + _incremental_input_cost=incremental_scenario_cost, + _scenario_dict = htm_scenarios, + _outputfilepath=roi_outputs_folder, + _value_of_life_suffix = 'VSL') + +# Combined ROI plot of relevant scenarios +generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life), + _incremental_input_cost=incremental_scenario_cost, + _draws = [2,12,14], + _scenario_dict = htm_scenarios, + _outputfilepath=roi_outputs_folder, + _value_of_life_suffix = 'VSL') + +generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life), + _incremental_input_cost=incremental_scenario_cost, + _draws = [2,12,14,15,16], + _scenario_dict = htm_scenarios, + _outputfilepath=roi_outputs_folder, + _value_of_life_suffix = 'all_HTM_VSL') + +generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life), + _incremental_input_cost=incremental_scenario_cost, + _draws = [3,5], + _scenario_dict = htm_scenarios, + _outputfilepath=roi_outputs_folder, + _value_of_life_suffix = 'HIV_VSL') + +generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life), + _incremental_input_cost=incremental_scenario_cost, + _draws = [6,8], + _scenario_dict = htm_scenarios, + _outputfilepath=roi_outputs_folder, + _value_of_life_suffix = 'TB_VSL') + +generate_multiple_scenarios_roi_plot(_monetary_value_of_incremental_health=get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life), + _incremental_input_cost=incremental_scenario_cost, + _draws = [9,11], + _scenario_dict = htm_scenarios, + _outputfilepath=roi_outputs_folder, + _value_of_life_suffix = 'Malaria_VSL') + +# 4. Plot Maximum ability-to-pay at CET +# ---------------------------------------------------- +max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_cet) - incremental_scenario_cost).clip( + lower=0.0) # monetary value - change in costs +max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation) +max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[ + max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(htm_scenarios_for_gf_report)] + +# Plot Maximum ability to pay +name_of_plot = f'Maximum ability to pay at CET, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}' +fig, ax = do_standard_bar_plot_with_ci( + (max_ability_to_pay_for_implementation_summarized / 1e6), + annotations=[ + f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})" + for _, row in max_ability_to_pay_for_implementation_summarized.iterrows() + ], + xticklabels_horizontal_and_wrapped=False, +) +ax.set_title(name_of_plot) +ax.set_ylabel('Maximum ability to pay \n(Millions)') +fig.tight_layout() +fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', '')) +plt.close(fig) + +# 4. Plot Maximum ability-to-pay at VSL +# ---------------------------------------------------- +max_ability_to_pay_for_implementation = (get_monetary_value_of_incremental_health(num_dalys_averted, _chosen_value_of_life_year = chosen_value_of_statistical_life) - incremental_scenario_cost).clip( + lower=0.0) # monetary value - change in costs +max_ability_to_pay_for_implementation_summarized = summarize_cost_data(max_ability_to_pay_for_implementation) +max_ability_to_pay_for_implementation_summarized = max_ability_to_pay_for_implementation_summarized[ + max_ability_to_pay_for_implementation_summarized.index.get_level_values(0).isin(htm_scenarios_for_gf_report)] + +# Plot Maximum ability to pay +name_of_plot = f'Maximum ability to pay at VSL, {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}' +fig, ax = do_bar_plot_with_ci( + (max_ability_to_pay_for_implementation_summarized / 1e6), + annotations=[ + f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}-\n {round(row['upper'] / 1e6, 1)})" + for _, row in max_ability_to_pay_for_implementation_summarized.iterrows() + ], + xticklabels_horizontal_and_wrapped=False, +) +ax.set_title(name_of_plot) +ax.set_ylabel('Maximum ability to pay (at VSL) \n(Millions)') +fig.tight_layout() +fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', '')) +plt.close(fig) + +# Plot incremental costs +incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost) +name_of_plot = f'Incremental scenario cost relative to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}' +fig, ax = do_standard_bar_plot_with_ci( + (incremental_scenario_cost_summarized / 1e6), + annotations=[ + f"{round(row['mean'] / 1e6, 1)} \n ({round(row['lower'] / 1e6, 1)}- \n {round(row['upper'] / 1e6, 1)})" + for _, row in incremental_scenario_cost_summarized.iterrows() + ], + xticklabels_horizontal_and_wrapped=False, +) +ax.set_title(name_of_plot) +ax.set_ylabel('Cost \n(USD Millions)') +fig.tight_layout() +fig.savefig(roi_outputs_folder / name_of_plot.replace(' ', '_').replace(',', '')) +plt.close(fig) + +# 4. Plot costs +# ---------------------------------------------------- +input_costs_for_plot = input_costs[input_costs.draw.isin(htm_scenarios_for_gf_report)] +# First summarize all input costs +input_costs_for_plot_summarized = input_costs_for_plot.groupby(['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category']).agg( + mean=('cost', 'mean'), + lower=('cost', lambda x: x.quantile(0.025)), + upper=('cost', lambda x: x.quantile(0.975)) +).reset_index() +input_costs_for_plot_summarized = input_costs_for_plot_summarized.melt( + id_vars=['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category'], + value_vars=['mean', 'lower', 'upper'], + var_name='stat', + value_name='cost' +) + +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = list(range(2025, 2036)), _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios_substitutedict) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', _year = [2025], _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'human resources for health', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical equipment', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'other', _disaggregate_by_subgroup = False, _outputfilepath = figurespath, _scenario_dict = htm_scenarios) diff --git a/src/scripts/costing/cost_estimation.py b/src/scripts/costing/cost_estimation.py new file mode 100644 index 0000000000..26b22df161 --- /dev/null +++ b/src/scripts/costing/cost_estimation.py @@ -0,0 +1,2086 @@ +from pathlib import Path + +from tlo import Date +from collections import defaultdict +from typing import Optional, Union, Literal + +import datetime +import textwrap + +import matplotlib.pyplot as plt +import squarify +import numpy as np +import pandas as pd +import ast +import math +import itertools +from itertools import cycle +import matplotlib.container as mpc + +from tlo.analysis.utils import ( + extract_results, + get_scenario_info, + load_pickled_dataframes, + unflatten_flattened_multi_index_in_logging +) + +#%% +# Define a helper function to load necessary unit cost data +def load_unit_cost_assumptions(resourcefilepath: Path) -> dict[str, dict]: + """ + Load and parse all cost-related input files from CSV format. + + Parameters + ---------- + resourcefilepath : Path + Base path to the root of the resource files directory. + + Returns + ------- + cost_inputs : dict of pd.DataFrame + Dictionary of cost input DataFrames: HR, consumables, equipment, facility operations. + """ + # Load cost input CSVs + cost_inputs = { + 'hr': pd.read_csv(resourcefilepath / "costing" / "ResourceFile_Costing_HR.csv"), + 'consumables': pd.read_csv(resourcefilepath / "costing" / "ResourceFile_Costing_Consumables.csv"), + 'equipment': pd.read_csv(resourcefilepath / "costing" / "ResourceFile_Costing_Equipment.csv"), + 'facility_operations': pd.read_csv(resourcefilepath / "costing" / "ResourceFile_Costing_Facility_Operations.csv"), + 'actual_expenditure_data': pd.read_csv(resourcefilepath / "costing" / "ResourceFile_Resource_Mapping.csv"), + 'health_spending_projections': pd.read_csv(resourcefilepath / "costing" / "ResourceFile_Health_Spending_Projections.csv") + } + + # Clean unit cost data + # Convert facility level to string for consistency across values (HR) + cost_inputs['hr']['Facility_Level'] = cost_inputs['hr']['Facility_Level'].astype(str) + # Keep only necessary columns and rows + cost_inputs['consumables'] = cost_inputs['consumables'][['Item_Code', 'Price_per_unit']].reset_index(drop=True) + cost_inputs['consumables'] = cost_inputs['consumables'][cost_inputs['consumables']['Item_Code'].notna()] + cost_inputs['equipment'] = cost_inputs['equipment'][cost_inputs['equipment']['Item_code'].notna()] + cost_inputs['actual_expenditure_data'] = cost_inputs['actual_expenditure_data'][(cost_inputs['actual_expenditure_data']['Cost Type'].notna()) & + (cost_inputs['actual_expenditure_data']['Cost Type'] != "Grand Total")] + + first_nan_index_facility_operations = cost_inputs['facility_operations'][cost_inputs['facility_operations']['Facility_Level'].isna()].index.min() + if pd.notna(first_nan_index_facility_operations): + cost_inputs['facility_operations'] = cost_inputs['facility_operations'].loc[:first_nan_index_facility_operations - 1] + + + cost_inputs['health_spending_projections'].columns = cost_inputs['health_spending_projections'].iloc[1] + cost_inputs['health_spending_projections'] = cost_inputs['health_spending_projections'].iloc[2:].reset_index(drop=True) # Assign the fourth row as column names + + first_nan_index_health_spending = cost_inputs['health_spending_projections'][cost_inputs['health_spending_projections']['year'].isna()].index.min() + if pd.notna(first_nan_index_health_spending): + cost_inputs['health_spending_projections'] = cost_inputs['health_spending_projections'].loc[:first_nan_index_health_spending - 1] + + cost_inputs['health_spending_projections']['year'] = cost_inputs['health_spending_projections']['year'].astype(int) + + return cost_inputs + +# Define a helper function to load necessary simulation metadata +def load_simulation_metadata(results_folder: Path) -> tuple[dict, list[int], tuple[Date, Date]]: + """ + Load simulation scenario metadata and derive key parameters. + + Parameters + ---------- + results_folder : Path + Path to the folder containing TLO model simulation results. + + Returns + ------- + info : dict + Scenario metadata including number of draws and runs per draw. + years : list of int + Full list of simulation years. + target_period : tuple of Date + Simulation date range from first to last year as TLO Date objects. + """ + # Load a sample log to derive time bounds + log = load_pickled_dataframes(results_folder, 0, 0) + dates = log['tlo.methods.healthsystem.summary']['hsi_event_counts']['date'] + first_year = min(dates).year + last_year = max(dates).year + years = list(range(first_year, last_year + 1)) + target_period = (Date(first_year, 1, 1), Date(last_year, 12, 31)) + + # Get simulation info (number of draws, runs, etc.) + info = get_scenario_info(results_folder) + + return info, years, target_period + +# Define a function to discount and summarise costs by cost_category +def apply_discounting_to_cost_data(_df: pd.DataFrame, + _discount_rate: Union[float, dict[int, float]] = 0, + _initial_year: Optional[int] = None, + _column_for_discounting: str = 'cost') -> pd.DataFrame: + """ + Apply discounting to the specified column over time, using a fixed or year-specific rate. + + Parameters: + ---------- + _df : pd.DataFrame + DataFrame containing a 'year' column and a cost column to be discounted. + + _discount_rate : float or dict of {year: float}, default 0 + Discount rate to apply. Can be: + - A single fixed rate (e.g., 0.03 for 3%) + - A dictionary of year-specific rates {2025: 0.03, 2026: 0.035, ...} + + _initial_year : int, optional + Reference year for discounting. If None, uses the earliest year in the DataFrame. + + _column_for_discounting : str, default 'cost' + Name of the column to apply discounting to. + + Returns: + ------- + pd.DataFrame + A copy of the input DataFrame with the specified column discounted in-place. + """ + + if _initial_year is None: + # Determine the initial year from the dataframe + _initial_year = min(_df['year'].unique()) + + def get_discount_factor(year): + """Compute the cumulative discount factor for a given year.""" + if isinstance(_discount_rate, dict): + # Compute the cumulative discount factor as the product of (1 + discount_rate) for all previous years + discount_factor = 1 + for y in range(_initial_year + 1, year + 1): # only starting from initial year + 1 as the discount factor for initial year should be 1 + discount_factor *= (1 + _discount_rate.get(y, 0)) # Default to 0 if year not in dictionary + return discount_factor + else: + # If a single value is provided, use standard discounting + return (1 + _discount_rate) ** (year - _initial_year) + + # Apply discounting to each row + _df.loc[:, _column_for_discounting] = _df[_column_for_discounting] / _df['year'].apply(get_discount_factor) + + return _df + +def estimate_input_cost_of_scenarios(results_folder: Path, + resourcefilepath: Path , + _draws: Optional[list[int]] = None, + _runs: Optional[list[int]] = None, + summarize: bool = False, + _metric: Literal['mean', 'median'] = 'mean', + _years: Optional[list[int]] = None, + cost_only_used_staff: bool = True, + _discount_rate: Union[float, dict[int, float]] = 0) -> pd.DataFrame: + """ + Estimate health system input costs for a given simulation. + + Parameters: + ---------- + results_folder : Path + Path to the directory containing simulation output files. + resourcefilepath : Path, optional + Path to the resource files + _draws : list, optional + Specific draws to include in the cost estimation. Defaults to all available draws. + _runs : list, optional + Specific runs to include in the cost estimation. Defaults to all runs. + summarize : bool, default False + Whether to summarize the costs across draws/runs with central metric (specified below) and confidence intervals. + _metric : {'mean', 'median'}, default 'mean' + Summary statistic to use if `summarize=True`. + _years : list of int, optional + Years to include in the cost output. If None, all years are included. + cost_only_used_staff : bool, default True + If True, only costs for level-cadre combinations ever used in simulation are included. + _discount_rate : float or dict of {int: float}, default 0 + Discount rate to apply to future costs. Can be a constant or year-specific dictionary. + + Returns: + ------- + pd.DataFrame + A dataframe containing discounted costs disaggregated by category, sub-category, category-specific subgroup, year, draw, and run. + Note that if a discount rate is used, the dataframe will provide cost as the NPV during the first year of the dataframe + """ + + # Useful common functions + def drop_outside_period(_df): + """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD""" + return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)]) + + def melt_model_output_draws_and_runs(_df, id_vars): + multi_index = pd.MultiIndex.from_tuples(_df.columns) + _df.columns = multi_index + melted_df = pd.melt(_df, id_vars=id_vars).rename(columns={'variable_0': 'draw', 'variable_1': 'run'}) + return melted_df + + # Define a relative pathway for relavant folders + path_for_consumable_resourcefiles = resourcefilepath / "healthsystem/consumables" + + # %% Gathering basic information + # Load basic simulation parameters + #------------------------------------- + info, years, TARGET_PERIOD = load_simulation_metadata(results_folder) + + if _draws is None: + _draws = range(0, info['number_of_draws']) + if _runs is None: + _runs = range(0, info['runs_per_draw']) + + # Load cost input files + #------------------------ + # Extract districts and facility levels from the Master Facility List + mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv") + district_dict = pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')[['District_Num', 'District']].drop_duplicates() + district_dict = dict(zip(district_dict['District_Num'], district_dict['District'])) + facility_id_levels_dict = dict(zip(mfl['Facility_ID'], mfl['Facility_Level'])) + fac_levels = set(mfl.Facility_Level) + + # If variable discount rate is provided, use the average across the relevant years for the purpose of annuitization of HR and equipment costs + def calculate_annuitization_rate(_discount_rate, _years): + if isinstance(_discount_rate, (int, float)): + # Single discount rate, return as is + return _discount_rate + elif isinstance(_discount_rate, dict): + # Extract rates for the specified years (default to 0 if year is missing) + rates = [_discount_rate.get(year, 0) for year in _years] + return sum(rates) / len(rates) # Average discount rate + else: + raise ValueError("`_discount_rate` must be either a number (single rate) or a dictionary {year: rate}.") + + annuitization_rate = calculate_annuitization_rate(_discount_rate, _years) + + # Read all cost parameters + #--------------------------------------- + unit_costs = load_unit_cost_assumptions(resourcefilepath) + + # Calculate necessary equipment cost components based on HSSP-III assumptions + if _discount_rate == 0: + unit_costs['equipment']['replacement_cost_annual'] = unit_costs['equipment'].apply(lambda row: row['Unit_Purchase_Cost'] / row['Life_Span'], axis=1) # straight line depreciation is discount rate is 0 + else: + unit_costs['equipment']['replacement_cost_annual'] = unit_costs['equipment'].apply(lambda row: row['Unit_Purchase_Cost']/(1+(1-(1+annuitization_rate)**(-row['Life_Span']+1))/annuitization_rate), axis=1) # Annuitised over the life span of the equipment assuming outlay at the beginning of the year + unit_costs['equipment']['service_fee_annual'] = unit_costs['equipment'].apply(lambda row: row['Unit_Purchase_Cost'] * 0.8 / 8 if row['Unit_Purchase_Cost'] > 1000 else 0, axis=1) # 80% of the value of the item over 8 years + unit_costs['equipment']['spare_parts_annual'] = unit_costs['equipment'].apply(lambda row: row['Unit_Purchase_Cost'] * 0.2 / 8 if row['Unit_Purchase_Cost'] > 1000 else 0, axis=1) # 20% of the value of the item over 8 years + unit_costs['equipment']['major_corrective_maintenance_cost_annual'] = unit_costs['equipment'].apply(lambda row: row['Unit_Purchase_Cost'] * 0.2 * 0.2 / 8 if row['Unit_Purchase_Cost'] < 250000 else 0, axis=1) # 20% of the value of 20% of the items over 8 years + # TODO consider discounting the other components + # Quantity needed for each equipment by facility + unit_costs['equipment'] = unit_costs['equipment'][['Item_code','Equipment_tlo', + 'replacement_cost_annual', 'service_fee_annual', 'spare_parts_annual', 'major_corrective_maintenance_cost_annual', + 'Health Post_Prioritised_Quantity', 'Community_Prioritised_Quantity', 'Health Center_Prioritised_Quantity', 'District_Prioritised_Quantity', 'Central_Prioritised_Quantity']] + unit_costs['equipment'] = unit_costs['equipment'].rename(columns={col: 'Quantity_' + col.replace('_Prioritised_Quantity', '') for col in unit_costs['equipment'].columns if col.endswith('_Prioritised_Quantity')}) + unit_costs['equipment'] = unit_costs['equipment'].rename(columns={col: col.replace(' ', '_') for col in unit_costs['equipment'].columns}) + + unit_costs['equipment'] = pd.wide_to_long(unit_costs['equipment'], stubnames=['Quantity_'], + i=['Item_code', 'Equipment_tlo', 'replacement_cost_annual', 'service_fee_annual', 'spare_parts_annual', 'major_corrective_maintenance_cost_annual'], + j='Facility_Level', suffix='(\d+|\w+)').reset_index() + facility_level_mapping = {'Health_Post': '0', 'Health_Center': '1a', 'Community': '1b', 'District': '2', 'Central': '3'} + unit_costs['equipment']['Facility_Level'] = unit_costs['equipment']['Facility_Level'].replace(facility_level_mapping) + unit_costs['equipment'] = unit_costs['equipment'].rename(columns = {'Quantity_': 'Quantity'}) + + # Function to prepare cost dataframe ready to be merged across cross categories + def retain_relevant_column_subset(_df, _category_specific_group): + columns_to_retain = ['draw', 'run', 'year', 'cost_subcategory', 'Facility_Level', _category_specific_group, 'cost'] + if 'cost_category' in _df.columns: + columns_to_retain.append('cost_category') + _df = _df[columns_to_retain] + return _df + def prepare_cost_dataframe(_df, _category_specific_group, _cost_category): + _df = _df.rename(columns = {_category_specific_group: 'cost_subgroup'}) + _df['cost_category'] = _cost_category + return retain_relevant_column_subset(_df, 'cost_subgroup') + + + # CALCULATE COSTS + #%% + # 1. HR cost + #------------------------ + print("Now estimating HR costs...") + # Define a function to merge unit cost data with model outputs + def merge_cost_and_model_data(cost_df, model_df, varnames): + merged_df = model_df.copy() + for varname in varnames: + new_cost_df = cost_df[cost_df['Parameter_name'] == varname][['OfficerType', 'Facility_Level', 'Value']] + new_cost_df = new_cost_df.rename(columns={"Value": varname}) + # Some parameters are specific to the facility level/cadre, others are general + if ((new_cost_df['OfficerType'] == 'All').all()) and ((new_cost_df['Facility_Level'] == 'All').all()): + merged_df[varname] = new_cost_df[varname].mean() + elif ((new_cost_df['OfficerType'] == 'All').all()) and ((new_cost_df['Facility_Level'] == 'All').all() == False): + merged_df = pd.merge(merged_df, new_cost_df[['Facility_Level',varname]], on=['Facility_Level'], how="left") + elif ((new_cost_df['OfficerType'] == 'All').all() == False) and ((new_cost_df['Facility_Level'] == 'All').all()): + merged_df = pd.merge(merged_df, new_cost_df[['OfficerType',varname]], on=['OfficerType'], how="left") + else: + merged_df = pd.merge(merged_df, new_cost_df, on=['OfficerType', 'Facility_Level'], how="left") + return merged_df + + # Get available staff count for each year and draw + def get_staff_count_by_facid_and_officer_type(_df: pd.Series) -> pd.Series: + """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series.""" + _df = _df.set_axis(_df['date'].dt.year).drop(columns=['date']) + _df.index.name = 'year' + + def change_to_standard_flattened_index_format(col): + parts = col.split("_", 3) # Split by "_" only up to 3 parts + if len(parts) > 2: + return parts[0] + "=" + parts[1] + "|" + parts[2] + "=" + parts[3] # Rejoin with "I" at the second occurrence + return col # If there's no second underscore, return the string as it is + _df.columns = [change_to_standard_flattened_index_format(col) for col in _df.columns] + + return unflatten_flattened_multi_index_in_logging(_df).stack(level=[0, 1]) # expanded flattened axis + + # Staff count by Facility ID + available_staff_count_by_facid_and_officertype = extract_results( + Path(results_folder), + module='tlo.methods.healthsystem.summary', + key='number_of_hcw_staff', + custom_generate_series=get_staff_count_by_facid_and_officer_type, + do_scaling=True, + ) + + # Update above series to get staff count by Facility_Level + available_staff_count_by_facid_and_officertype = available_staff_count_by_facid_and_officertype.reset_index().rename(columns= {'FacilityID': 'Facility_ID', 'Officer': 'OfficerType'}) + available_staff_count_by_facid_and_officertype['Facility_ID'] = pd.to_numeric(available_staff_count_by_facid_and_officertype['Facility_ID']) + available_staff_count_by_facid_and_officertype['Facility_Level'] = available_staff_count_by_facid_and_officertype['Facility_ID'].map(facility_id_levels_dict) + idx = pd.IndexSlice + available_staff_count_by_level_and_officer_type = available_staff_count_by_facid_and_officertype.drop(columns = [idx['Facility_ID']]).groupby([idx['year'], idx['Facility_Level'], idx['OfficerType']]).sum() + available_staff_count_by_level_and_officer_type = melt_model_output_draws_and_runs(available_staff_count_by_level_and_officer_type.reset_index(), id_vars= ['year', 'Facility_Level', 'OfficerType']) + available_staff_count_by_level_and_officer_type['Facility_Level'] = available_staff_count_by_level_and_officer_type['Facility_Level'].astype(str) # make sure facility level is stored as string + available_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.drop(available_staff_count_by_level_and_officer_type[available_staff_count_by_level_and_officer_type['Facility_Level'] == '5'].index) # drop headquarters because we're only concerned with staff engaged in service delivery + available_staff_count_by_level_and_officer_type.rename(columns ={'value': 'staff_count'}, inplace=True) + + # Get list of cadres which were utilised in each run to get the count of staff used in the simulation + # Note that we still cost the full staff count for any cadre-Facility_Level combination that was ever used in a run, and + # not the amount of time which was used + def get_capacity_used_by_officer_type_and_facility_level(_df: pd.Series) -> pd.Series: + """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series.""" + _df = _df.set_axis(_df['date'].dt.year).drop(columns=['date']) + _df.index.name = 'year' + return unflatten_flattened_multi_index_in_logging(_df).stack(level=[0, 1]) # expanded flattened axis + + annual_capacity_used_by_cadre_and_level = extract_results( + Path(results_folder), + module='tlo.methods.healthsystem.summary', + key='Capacity_By_OfficerType_And_FacilityLevel', + custom_generate_series=get_capacity_used_by_officer_type_and_facility_level, + do_scaling=False, + ) + + # Prepare capacity used dataframe to be multiplied by staff count + average_capacity_used_by_cadre_and_level = annual_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).mean().reset_index(drop=False) + # TODO see if cadre-level combinations should be chosen by year + average_capacity_used_by_cadre_and_level.reset_index(drop=True) # Flatten multi=index column + average_capacity_used_by_cadre_and_level = average_capacity_used_by_cadre_and_level.melt(id_vars=['OfficerType', 'FacilityLevel'], + var_name=['draw', 'run'], + value_name='capacity_used') + list_of_cadre_and_level_combinations_used = average_capacity_used_by_cadre_and_level[average_capacity_used_by_cadre_and_level['capacity_used'] != 0][['OfficerType', 'FacilityLevel', 'draw', 'run']] + print(f"Out of {average_capacity_used_by_cadre_and_level.groupby(['OfficerType', 'FacilityLevel']).size().count()} cadre and level combinations available, {list_of_cadre_and_level_combinations_used.groupby(['OfficerType', 'FacilityLevel']).size().count()} are used across the simulations") + list_of_cadre_and_level_combinations_used = list_of_cadre_and_level_combinations_used.rename(columns = {'FacilityLevel':'Facility_Level'}) + + # Subset scenario staffing level to only include cadre-level combinations used in the simulation + used_staff_count_by_level_and_officer_type = available_staff_count_by_level_and_officer_type.merge(list_of_cadre_and_level_combinations_used, on = ['draw','run','OfficerType', 'Facility_Level'], how = 'right', validate = 'm:m') + used_staff_count_by_level_and_officer_type.rename(columns ={'value': 'staff_count'}, inplace=True) + + if (cost_only_used_staff): + print("The input for 'cost_only_used_staff' implies that only cadre-level combinations which have been used in the run are costed") + staff_size_chosen_for_costing = used_staff_count_by_level_and_officer_type + else: + print("The input for 'cost_only_used_staff' implies that all staff are costed regardless of the cadre-level combinations which have been used in the run are costed") + staff_size_chosen_for_costing = available_staff_count_by_level_and_officer_type + + # Calculate various components of HR cost + # 1.1 Salary cost for health workforce cadres used in the simulation (Staff count X Annual salary) + #--------------------------------------------------------------------------------------------------------------- + salary_for_staff = merge_cost_and_model_data(cost_df = unit_costs['hr'], model_df = staff_size_chosen_for_costing, + varnames = ['salary_usd']) + salary_for_staff['cost'] = salary_for_staff['salary_usd'] * salary_for_staff['staff_count'] + + # 1.2 Pre-service training & recruitment cost to fill gap created by attrition + #--------------------------------------------------------------------------------------------------------------- + preservice_training_cost = merge_cost_and_model_data(cost_df = unit_costs['hr'], model_df = staff_size_chosen_for_costing, + varnames = ['annual_attrition_rate', + 'licensure_exam_passing_rate', 'graduation_rate', + 'absorption_rate_of_students_into_public_workforce', 'proportion_of_workforce_recruited_from_abroad', + 'average_annual_preservice_training_cost_for_cadre', 'preservice_training_duration', 'recruitment_cost_per_person_recruited_usd', + 'average_length_of_tenure_in_the_public_sector']) + + def calculate_npv_past_training_expenses_by_row(row, r = _discount_rate): + # Initialize the NPV for the row + npv = 0 + annual_cost = row['average_annual_preservice_training_cost_for_cadre'] + full_years = int(row['preservice_training_duration']) # Extract integer part of the year + partial_year = row['preservice_training_duration'] - full_years # Fractional part of the year + + # Iterate over each year of the training duration to calculate compounded cost to the present + # Calculate NPV for each full year of training + for t in range(full_years): + npv += annual_cost * (1 + r) ** (t+1+1) # 1 added twice because range(4) is [0,1,2,3] + + # Account for the fractional year at the end if it exists + if partial_year > 0: + npv += annual_cost * partial_year * (1 + r) ** (1+r) + + # Add recruitment cost assuming this happens during the partial year or the year after graduation if partial year == 0 + npv += row['recruitment_cost_per_person_recruited_usd'] * (1+r) + + return npv + + # Calculate NPV for each row using iterrows and store in a new column + npv_values = [] + for index, row in preservice_training_cost.iterrows(): + npv = calculate_npv_past_training_expenses_by_row(row, r=annuitization_rate) + npv_values.append(npv) + + preservice_training_cost['npv_of_training_and_recruitment_cost'] = npv_values + preservice_training_cost['npv_of_training_and_recruitment_cost_per_recruit'] = preservice_training_cost['npv_of_training_and_recruitment_cost'] *\ + (1/(preservice_training_cost['absorption_rate_of_students_into_public_workforce'] + preservice_training_cost['proportion_of_workforce_recruited_from_abroad'])) *\ + (1/preservice_training_cost['graduation_rate']) * (1/preservice_training_cost['licensure_exam_passing_rate']) + if _discount_rate == 0: # if the discount rate is 0, then the pre-service + recruitment cost simply needs to be divided by the number of years in tenure + preservice_training_cost['annuitisation_rate'] = preservice_training_cost['average_length_of_tenure_in_the_public_sector'] + else: + preservice_training_cost['annuitisation_rate'] = 1 + (1 - (1 + annuitization_rate) ** (-preservice_training_cost['average_length_of_tenure_in_the_public_sector'] + 1)) / annuitization_rate + preservice_training_cost['annuitised_training_and_recruitment_cost_per_recruit'] = preservice_training_cost['npv_of_training_and_recruitment_cost_per_recruit']/preservice_training_cost['annuitisation_rate'] + + # Cost per student trained * 1/Rate of absorption from the local and foreign graduates * 1/Graduation rate * attrition rate + # the inverse of attrition rate is the average expected tenure; and the preservice training cost needs to be divided by the average tenure + preservice_training_cost['cost'] = preservice_training_cost['annuitised_training_and_recruitment_cost_per_recruit'] * preservice_training_cost['staff_count'] * preservice_training_cost['annual_attrition_rate'] # not multiplied with attrition rate again because this is already factored into 'Annual_cost_per_staff_recruited' + preservice_training_cost = preservice_training_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'cost']] + + # 1.3 In-service training cost to train all staff + #--------------------------------------------------------------------------------------------------------------- + inservice_training_cost = merge_cost_and_model_data(cost_df = unit_costs['hr'], model_df = staff_size_chosen_for_costing, + varnames = ['annual_inservice_training_cost_usd']) + inservice_training_cost['cost'] = inservice_training_cost['staff_count'] * inservice_training_cost['annual_inservice_training_cost_usd'] + inservice_training_cost = inservice_training_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'cost']] + # TODO Consider calculating economic cost of HR by multiplying salary times staff count with cadres_utilisation_rate + + # 1.4 Regular mentorship and supportive supervision costs + #--------------------------------------------------------------------------------------------------------------- + mentorship_and_supportive_cost = merge_cost_and_model_data(cost_df = unit_costs['hr'], model_df = staff_size_chosen_for_costing, + varnames = ['annual_mentorship_and_supervision_cost']) + mentorship_and_supportive_cost['cost'] = mentorship_and_supportive_cost['staff_count'] * mentorship_and_supportive_cost['annual_mentorship_and_supervision_cost'] + mentorship_and_supportive_cost = mentorship_and_supportive_cost[['draw', 'run', 'year', 'OfficerType', 'Facility_Level', 'cost']] + # TODO Consider calculating economic cost of HR by multiplying salary times staff count with cadres_utilisation_rate + + # 1.5 Store all HR costs in one standard format dataframe + #--------------------------------------------------------------------------------------------------------------- + # Function to melt and label the cost category + def label_rows_of_cost_dataframe(_df, label_var, label): + _df = _df.reset_index() + _df[label_var] = label + return _df + + # Initialize HR with the salary data + if (cost_only_used_staff): + human_resource_costs = retain_relevant_column_subset(label_rows_of_cost_dataframe(salary_for_staff, 'cost_subcategory', 'salary_for_cadres_used'), 'OfficerType') + # Concatenate additional cost categories + additional_costs = [ + (preservice_training_cost, 'preservice_training_and_recruitment_cost_for_attrited_workers'), + (inservice_training_cost, 'inservice_training_cost_for_cadres_used'), + (mentorship_and_supportive_cost, 'mentorship_and_supportive_cost_for_cadres_used') + ] + else: + human_resource_costs = retain_relevant_column_subset(label_rows_of_cost_dataframe(salary_for_staff, 'cost_subcategory', 'salary_for_all_staff'), 'OfficerType') + # Concatenate additional cost categories + additional_costs = [ + (preservice_training_cost, 'preservice_training_and_recruitment_cost_for_attrited_workers'), + (inservice_training_cost, 'inservice_training_cost_for_all_staff'), + (mentorship_and_supportive_cost, 'mentorship_and_supportive_cost_for_all_staff') + ] + + # Iterate through additional costs, melt and concatenate + for df, label in additional_costs: + labelled_df = retain_relevant_column_subset(label_rows_of_cost_dataframe(df, 'cost_subcategory', label), 'OfficerType') + human_resource_costs = pd.concat([human_resource_costs, labelled_df]) + + human_resource_costs = prepare_cost_dataframe(human_resource_costs, _category_specific_group = 'OfficerType', _cost_category = 'human resources for health') + + # Only preserve the draws and runs requested + if _draws is not None: + human_resource_costs = human_resource_costs[human_resource_costs.draw.isin(_draws)] + if _runs is not None: + human_resource_costs = human_resource_costs[human_resource_costs.run.isin(_runs)] + + # %% + # 2. Consumables cost + #------------------------ + print("Now estimating Consumables costs...") + def get_quantity_of_consumables_dispensed(results_folder): + def get_counts_of_items_requested(_df): + _df = drop_outside_period(_df) + counts_of_used = defaultdict(lambda: defaultdict(int)) + counts_of_not_available = defaultdict(lambda: defaultdict(int)) + + for _, row in _df.iterrows(): + date = row['date'] + for item, num in row['Item_Used'].items(): + counts_of_used[date][item] += num + for item, num in row['Item_NotAvailable'].items(): + counts_of_not_available[date][item] += num + used_df = pd.DataFrame(counts_of_used).fillna(0).astype(int).stack().rename('Used') + not_available_df = pd.DataFrame(counts_of_not_available).fillna(0).astype(int).stack().rename('Not_Available') + + # Combine the two dataframes into one series with MultiIndex (date, item, availability_status) + combined_df = pd.concat([used_df, not_available_df], axis=1).fillna(0).astype(int) + + # Convert to a pd.Series, as expected by the custom_generate_series function + return combined_df.stack() + + cons_req = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Consumables', + custom_generate_series=get_counts_of_items_requested, + do_scaling=True) + + cons_dispensed = cons_req.xs("Used", level=2) # only keep actual dispensed amount, i.e. when available + return cons_dispensed + # TODO Extract year of dispensing drugs + + consumables_dispensed = get_quantity_of_consumables_dispensed(results_folder) + consumables_dispensed = consumables_dispensed.reset_index().rename(columns = {'level_0': 'Item_Code', 'level_1': 'year'}) + consumables_dispensed[idx['year']] = pd.to_datetime(consumables_dispensed[idx['year']]).dt.year # Extract only year from date + consumables_dispensed[idx['Item_Code']] = pd.to_numeric(consumables_dispensed[idx['Item_Code']]) + # Make a list of columns in the DataFrame pertaining to quantity dispensed + quantity_columns = consumables_dispensed.columns.to_list() + quantity_columns = [tup for tup in quantity_columns if tup not in [('Item_Code', ''), ('year', '')]] + + # 2.1 Cost of consumables dispensed + #--------------------------------------------------------------------------------------------------------------- + # Multiply number of items needed by cost of consumable + #consumables_dispensed.columns = consumables_dispensed.columns.get_level_values(0).str() + "_" + consumables_dispensed.columns.get_level_values(1) # Flatten multi-level columns for pandas merge + unit_costs['consumables'].columns = pd.MultiIndex.from_arrays([unit_costs['consumables'].columns, [''] * len(unit_costs['consumables'].columns)]) + cost_of_consumables_dispensed = consumables_dispensed.merge(unit_costs['consumables'], on = idx['Item_Code'], validate = 'm:1', how = 'left') + price_column = 'Price_per_unit' + cost_of_consumables_dispensed[quantity_columns] = cost_of_consumables_dispensed[quantity_columns].multiply( + cost_of_consumables_dispensed[price_column], axis=0) + + # 2.2 Cost of consumables stocked (quantity needed for what is dispensed) + # --------------------------------------------------------------------------------------------------------------- + # Stocked amount should be higher than dispensed because of i. excess capacity, ii. theft, iii. expiry + # While there are estimates in the literature of what % these might be, we agreed that it is better to rely upon + # an empirical estimate based on OpenLMIS data + inflow_to_outflow_ratio = pd.read_csv( + resourcefilepath / "costing/ResourceFile_Consumables_Inflow_Outflow_Ratio.csv") + inflow_to_outflow_ratio = inflow_to_outflow_ratio.set_index(['item_category', 'item_code', 'district', 'fac_type_tlo']) + average_inflow_to_outflow_ratio_ratio = inflow_to_outflow_ratio['inflow_to_outflow_ratio'].mean() # Use average where item-specific ratio is not available + + # Multiply number of items needed by cost of consumable + inflow_to_outflow_ratio_by_consumable = inflow_to_outflow_ratio.groupby(level='item_code').mean() + excess_stock_ratio = inflow_to_outflow_ratio_by_consumable - 1 + excess_stock_ratio = excess_stock_ratio.reset_index().rename(columns = {'inflow_to_outflow_ratio': 'excess_stock_proportion_of_dispensed'}) + # TODO Consider whether a more disaggregated version of the ratio dictionary should be applied + cost_of_excess_consumables_stocked = consumables_dispensed.merge(unit_costs['consumables'], left_on = 'Item_Code', right_on = 'Item_Code', validate = 'm:1', how = 'left') + excess_stock_ratio.columns = pd.MultiIndex.from_arrays([excess_stock_ratio.columns, [''] * len(excess_stock_ratio.columns)]) # TODO convert this into a funciton + cost_of_excess_consumables_stocked = cost_of_excess_consumables_stocked.merge(excess_stock_ratio, left_on = 'Item_Code', right_on = 'item_code', validate = 'm:1', how = 'left') + cost_of_excess_consumables_stocked.loc[cost_of_excess_consumables_stocked.excess_stock_proportion_of_dispensed.isna(), 'excess_stock_proportion_of_dispensed'] = average_inflow_to_outflow_ratio_ratio - 1# TODO disaggregate the average by program + cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked[idx[price_column]], axis=0) + cost_of_excess_consumables_stocked[quantity_columns] = cost_of_excess_consumables_stocked[quantity_columns].multiply(cost_of_excess_consumables_stocked[idx['excess_stock_proportion_of_dispensed']], axis=0) + + # 2.3 Store all consumable costs in one standard format dataframe + #--------------------------------------------------------------------------------------------------------------- + # Function to melt and label the cost category + consumables_dict = pd.read_csv(path_for_consumable_resourcefiles / 'ResourceFile_Consumables_Items_and_Packages.csv', low_memory=False, + encoding="ISO-8859-1")[['Items','Item_Code']] + consumables_dict = dict(zip(consumables_dict['Item_Code'], consumables_dict['Items'])) + def melt_and_label_consumables_cost(_df, label): + multi_index = pd.MultiIndex.from_tuples(_df.columns) + _df.columns = multi_index + # Select 'Item_Code', 'year', and all columns where both levels of the MultiIndex are numeric (these are the (draw,run) columns with cost values) + selected_columns = [col for col in _df.columns if + (col[0] in ['Item_Code', 'year']) or (isinstance(col[0], int) and isinstance(col[1], int))] + _df = _df[selected_columns] # Subset the dataframe with the selected columns + + # reshape dataframe and assign 'draw' and 'run' as the correct column headers + melted_df = pd.melt(_df, id_vars=['year', 'Item_Code']).rename(columns = {'variable_0': 'draw', 'variable_1': 'run'}) + # Replace item_code with consumable_name_tlo + melted_df['consumable'] = melted_df['Item_Code'].map(consumables_dict) + melted_df['cost_subcategory'] = label + melted_df['Facility_Level'] = 'all' #TODO this is temporary until 'tlo.methods.healthsystem.summary' only logs consumable at the aggregate level + melted_df = melted_df.rename(columns = {'value': 'cost'}) + return melted_df + + def disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df, + _consumables_dict, # This is a dictionary mapping codes to names + list_of_unique_medical_products): + reversed_consumables_dict = {value: key for key, value in _consumables_dict.items()} # reverse dictionary to map names to codes + new_df = _df.copy() + new_df['item_code'] = new_df['consumable'].map(reversed_consumables_dict) + cost_of_consumables = new_df[~new_df['item_code'].isin(list_of_unique_medical_products)] + cost_of_separately_managed_medical_supplies = new_df[new_df['item_code'].isin(list_of_unique_medical_products)] + cost_of_separately_managed_medical_supplies['cost_subcategory'] = cost_of_separately_managed_medical_supplies['cost_subcategory'].replace( + {'consumables_dispensed': 'separately_managed_medical_supplies_dispensed', 'consumables_stocked': 'separately_managed_medical_supplies_stocked'}, regex=True) + return cost_of_consumables.drop(columns = 'item_code'), cost_of_separately_managed_medical_supplies.drop(columns = 'item_code') + + separately_managed_medical_supplies = [127, 141, 161] # Oxygen, Blood, IRS + cost_of_consumables_dispensed, cost_of_separately_managed_medical_supplies_dispensed = disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df = retain_relevant_column_subset(melt_and_label_consumables_cost(cost_of_consumables_dispensed, 'cost_of_consumables_dispensed'), 'consumable'), + _consumables_dict = consumables_dict, + list_of_unique_medical_products = separately_managed_medical_supplies) + cost_of_excess_consumables_stocked, cost_of_separately_managed_medical_supplies_excess_stock = disaggregate_separately_managed_medical_supplies_from_consumable_costs(_df = retain_relevant_column_subset(melt_and_label_consumables_cost(cost_of_excess_consumables_stocked, 'cost_of_excess_consumables_stocked'), 'consumable'), + _consumables_dict=consumables_dict, + list_of_unique_medical_products=separately_managed_medical_supplies) + + consumable_costs = pd.concat([cost_of_consumables_dispensed, cost_of_excess_consumables_stocked]) + + # 2.4 Supply chain costs + #--------------------------------------------------------------------------------------------------------------- + # Assume that the cost of procurement, warehousing and distribution is a fixed proportion of consumable purchase costs + # The fixed proportion is based on Resource Mapping Expenditure data from 2018 + resource_mapping_data = unit_costs['actual_expenditure_data'] + # Make sure values are numeric + expenditure_column = ['EXPENDITURE (USD) (Jul 2018 - Jun 2019)'] + resource_mapping_data[expenditure_column] = resource_mapping_data[expenditure_column].apply(lambda x: pd.to_numeric(x, errors='coerce')) + # The numerator includes Supply chain expenditure for EHP consumables + supply_chain_expenditure = \ + resource_mapping_data[resource_mapping_data['Cost Type'] == 'Supply Chain'][expenditure_column].sum()[0] + # The denominator include all drugs and commodities expenditure, excluding what is recategorised as non-EHP or admin + drug_expenditure_condition = resource_mapping_data['Cost Type'].str.contains('Drugs and Commodities') + excluded_drug_expenditure_condition = (resource_mapping_data[ + 'Calibration_category'] == 'Program Management & Administration') | ( + resource_mapping_data[ + 'Calibration_category'] == 'Non-EHP consumables') + consumables_purchase_expenditure = \ + resource_mapping_data[drug_expenditure_condition][expenditure_column].sum()[0] - \ + resource_mapping_data[drug_expenditure_condition & excluded_drug_expenditure_condition][ + expenditure_column].sum()[0] + supply_chain_cost_proportion = supply_chain_expenditure / consumables_purchase_expenditure + + # Estimate supply chain costs based on the total consumable purchase cost calculated above + # Note that Oxygen, IRS, and Blood costs are already excluded because the unit_cost of these commodities already + # includes the procurement/production, storage and distribution costs + supply_chain_costs = (consumable_costs.groupby(['draw', 'run', 'year'])[ + 'cost'].sum() * supply_chain_cost_proportion).reset_index() + # Assign relevant additional columns to match the format of the rest of consumables costs + supply_chain_costs['Facility_Level'] = 'all' + supply_chain_costs['consumable'] = 'supply chain (all consumables)' + supply_chain_costs['cost_subcategory'] = 'supply_chain' + assert set(supply_chain_costs.columns) == set(consumable_costs.columns) + + # Append supply chain costs to the full consumable cost dataframe + consumable_costs = pd.concat([consumable_costs, supply_chain_costs]) + other_costs = pd.concat([cost_of_separately_managed_medical_supplies_dispensed, cost_of_separately_managed_medical_supplies_excess_stock]) + + consumable_costs = prepare_cost_dataframe(consumable_costs, _category_specific_group = 'consumable', _cost_category = 'medical consumables') + other_costs = prepare_cost_dataframe(other_costs, _category_specific_group = 'consumable', _cost_category = 'medical consumables') + + # Only preserve the draws and runs requested + if _draws is not None: + consumable_costs = consumable_costs[consumable_costs.draw.isin(_draws)] + other_costs = other_costs[other_costs.draw.isin(_draws)] + if _runs is not None: + consumable_costs = consumable_costs[consumable_costs.run.isin(_runs)] + other_costs = other_costs[other_costs.run.isin(_runs)] + + + # %% + # 3. Equipment cost + #-------------------------------------------- + print("Now estimating Medical equipment costs...") + # Total cost of equipment required as per SEL (HSSP-III) only at facility IDs where it has been used in the simulation + # Get list of equipment used in the simulation by district and level + def get_equipment_used_by_district_and_facility(_df: pd.Series) -> pd.Series: + """Summarise the parsed logged-key results for one draw (as dataframe) into a pd.Series.""" + _df = _df.pivot_table(index=['District', 'Facility_Level'], + values='EquipmentEverUsed', + aggfunc='first') + _df.index.name = 'year' + return _df['EquipmentEverUsed'] + + list_of_equipment_used_by_draw_and_run = extract_results( + Path(results_folder), + module='tlo.methods.healthsystem.summary', + key='EquipmentEverUsed_ByFacilityID', + custom_generate_series=get_equipment_used_by_district_and_facility, + do_scaling=False, + ) + for col in list_of_equipment_used_by_draw_and_run.columns: + list_of_equipment_used_by_draw_and_run[col] = list_of_equipment_used_by_draw_and_run[col].apply(ast.literal_eval) + + # Initialize an empty DataFrame + equipment_cost_across_sim = pd.DataFrame() + + # Extract equipment cost for each draw and run + for d in _draws: + for r in _runs: + print(f"Processing draw {d} and run {r} of equipment costs") + # Extract a list of equipment which was used at each facility level within each district + equipment_used = {district: {level: [] for level in fac_levels} for district in list(district_dict.values())} # create a dictionary with a key for each district and facility level + list_of_equipment_used_by_current_draw_and_run = list_of_equipment_used_by_draw_and_run[(d, r)].reset_index() + for dist in list(district_dict.values()): + for level in fac_levels: + equipment_used_subset = list_of_equipment_used_by_current_draw_and_run[(list_of_equipment_used_by_current_draw_and_run['District'] == dist) & (list_of_equipment_used_by_current_draw_and_run['Facility_Level'] == level)] + equipment_used_subset.columns = ['District', 'Facility_Level', 'EquipmentEverUsed'] + equipment_used[dist][level] = set().union(*equipment_used_subset['EquipmentEverUsed']) + equipment_used = pd.concat({ + k: pd.DataFrame.from_dict(v, 'index') for k, v in equipment_used.items()}, + axis=0) + full_list_of_equipment_used = set(equipment_used.values.flatten()) + full_list_of_equipment_used = set(filter(pd.notnull, full_list_of_equipment_used)) + + equipment_df = pd.DataFrame() + equipment_df.index = equipment_used.index + for item in full_list_of_equipment_used: + equipment_df[str(item)] = 0 + for dist_fac_index in equipment_df.index: + equipment_df.loc[equipment_df.index == dist_fac_index, str(item)] = equipment_used[equipment_used.index == dist_fac_index].isin([item]).any(axis=1) + #equipment_df.to_csv('./outputs/equipment_use.csv') + + equipment_df = equipment_df.reset_index().rename(columns = {'level_0' : 'District', 'level_1': 'Facility_Level'}) + equipment_df = pd.melt(equipment_df, id_vars = ['District', 'Facility_Level']).rename(columns = {'variable': 'Item_code', 'value': 'whether_item_was_used'}) + equipment_df['Item_code'] = pd.to_numeric(equipment_df['Item_code']) + # Merge the count of facilities by district and level + equipment_df = equipment_df.merge(mfl[['District', 'Facility_Level','Facility_Count']], on = ['District', 'Facility_Level'], how = 'left') + equipment_df.loc[equipment_df.Facility_Count.isna(), 'Facility_Count'] = 0 + + # Because levels 1b and 2 are collapsed together, we assume that the same equipment is used by level 1b as that recorded for level 2 + def update_itemuse_for_level1b_using_level2_data(_df): + # Create a list of District and Item_code combinations for which use == True + list_of_equipment_used_at_level2 = _df[(_df.Facility_Level == '2') & (_df['whether_item_was_used'] == True)][['District', 'Item_code']] + # Now update the 'whether_item_was_used' for 'Facility_Level' == '1b' to match that of level '2' + _df.loc[ + (_df['Facility_Level'] == '1b') & + (_df[['District', 'Item_code']].apply(tuple, axis=1).isin( + list_of_equipment_used_at_level2.apply(tuple, axis=1))), + 'whether_item_was_used' + ] = True + + return _df + + equipment_df = update_itemuse_for_level1b_using_level2_data(equipment_df) + + # Merge the two datasets to calculate cost + equipment_cost = pd.merge(equipment_df, unit_costs['equipment'][['Item_code', 'Equipment_tlo', 'Facility_Level', 'Quantity', 'replacement_cost_annual', 'service_fee_annual', 'spare_parts_annual', 'major_corrective_maintenance_cost_annual']], + on = ['Item_code', 'Facility_Level'], how = 'left', validate = "m:1") + categories_of_equipment_cost = ['replacement_cost', 'service_fee', 'spare_parts', 'major_corrective_maintenance_cost'] + for cost_category in categories_of_equipment_cost: + # Rename unit cost columns + unit_cost_column = cost_category + '_annual_unit' + equipment_cost = equipment_cost.rename(columns = {cost_category + '_annual':unit_cost_column }) + equipment_cost[cost_category + '_annual_total'] = equipment_cost[cost_category + '_annual_unit'] * equipment_cost['whether_item_was_used'] * equipment_cost['Quantity'] * equipment_cost['Facility_Count'] + equipment_cost['year'] = max(years) - 1 + if equipment_cost_across_sim.empty: + equipment_cost_across_sim = equipment_cost.groupby(['year', 'Facility_Level', 'Equipment_tlo'])[[item + '_annual_total' for item in categories_of_equipment_cost]].sum() + equipment_cost_across_sim['draw'] = d + equipment_cost_across_sim['run'] = r + else: + equipment_cost_for_current_sim = equipment_cost.groupby(['year', 'Facility_Level', 'Equipment_tlo'])[[item + '_annual_total' for item in categories_of_equipment_cost]].sum() + equipment_cost_for_current_sim['draw'] = d + equipment_cost_for_current_sim['run'] = r + # Concatenate the results + equipment_cost_across_sim = pd.concat([equipment_cost_across_sim, equipment_cost_for_current_sim], axis=0) + + equipment_costs = pd.melt(equipment_cost_across_sim.reset_index(), + id_vars=['draw', 'run', 'Facility_Level', 'Equipment_tlo'], # Columns to keep + value_vars=[col for col in equipment_cost_across_sim.columns if col.endswith('_annual_total')], # Columns to unpivot + var_name='cost_subcategory', # New column name for the 'sub-category' of cost + value_name='cost') # New column name for the values + + # Assume that the annual costs are constant each year of the simulation + equipment_costs = pd.concat([equipment_costs.assign(year=year) for year in years]) + # TODO If the logger is updated to include year, we may wish to calculate equipment costs by year - currently we assume the same annuitised equipment cost each year + equipment_costs = equipment_costs.reset_index(drop=True) + equipment_costs = equipment_costs.rename(columns = {'Equipment_tlo': 'Equipment'}) + equipment_costs = prepare_cost_dataframe(equipment_costs, _category_specific_group = 'Equipment', _cost_category = 'medical equipment') + + # 4. Facility running costs + # Average running costs by facility level and district times the number of facilities in the simulation + # Convert unit_costs to long format + unit_costs['facility_operations'] = pd.melt( + unit_costs['facility_operations'], + id_vars=["Facility_Level"], # Columns to keep as identifiers + var_name="operating_cost_type", # Name for the new 'cost_category' column + value_name="unit_cost" # Name for the new 'cost' column + ) + unit_costs['facility_operations']['Facility_Level'] = unit_costs['facility_operations']['Facility_Level'].astype(str) + fac_count_by_level = mfl[['Facility_Level', 'Facility_Count']].groupby(['Facility_Level']).sum().reset_index() + + facility_operation_cost = pd.merge(unit_costs['facility_operations'], fac_count_by_level, on = 'Facility_Level', how = 'left', validate = 'm:m') + facility_operation_cost['Facility_Count'] = facility_operation_cost['Facility_Count'].fillna(0).astype(int) + facility_operation_cost['cost'] = facility_operation_cost['unit_cost'] * facility_operation_cost['Facility_Count'] + + # Duplicate the same set of facility operation costs for all draws and runs + # Create the Cartesian product of `_draws` and `_runs` + combinations = list(itertools.product(_draws, _runs)) + comb_df = pd.DataFrame(combinations, columns=["draw", "run"]) + facility_operation_cost = facility_operation_cost.merge(comb_df, how="cross") + facility_operation_cost['cost_category'] = 'Facility operating cost' + operating_cost_mapping = {'Electricity': 'utilities_and_maintenance', 'Water': 'utilities_and_maintenance', 'Cleaning':'utilities_and_maintenance', + 'Security':'utilities_and_maintenance', 'Building maintenance': 'building_maintenance', + 'Facility management': 'utilities_and_maintenance', 'Vehicle maintenance': 'vehicle_maintenance', + 'Ambulance fuel': 'fuel_for_ambulance', 'Food for inpatient cases': 'food_for_inpatient_care'} + facility_operation_cost['cost_subcategory'] = facility_operation_cost['operating_cost_type'] + facility_operation_cost['cost_subcategory'] = facility_operation_cost['cost_subcategory'].map(operating_cost_mapping) + # Assume that the annual costs are constant each year of the simulation + facility_operation_cost = pd.concat([facility_operation_cost.assign(year=year) for year in years]) + + # Assume that the annual costs are constant each year of the simulation + facility_operation_cost = prepare_cost_dataframe(facility_operation_cost, _category_specific_group = 'operating_cost_type', _cost_category = 'facility operating cost') + + + # %% + # Store all costs in single dataframe + #-------------------------------------------- + scenario_cost = pd.concat([human_resource_costs, consumable_costs, equipment_costs, other_costs, facility_operation_cost], ignore_index=True) + scenario_cost['cost'] = pd.to_numeric(scenario_cost['cost'], errors='coerce') + + # Summarize costs + if summarize: + groupby_cols = [col for col in scenario_cost.columns if col not in ['run', 'cost']] + # Use the summary metric specific in the inputs + if _metric not in ['mean', 'median']: + raise ValueError(f"Invalid input for _metric: '{_metric}'. " + f"Values need to be one of 'mean' or 'median'") + else: + # Define aggregation function based on _metric input (mean or median) + agg_func = np.mean if _metric == 'mean' else np.median + + scenario_cost = pd.concat( + { + _metric: scenario_cost.groupby(by=groupby_cols, sort=False)['cost'].agg(agg_func), + 'lower': scenario_cost.groupby(by=groupby_cols, sort=False)['cost'].quantile(0.025), + 'upper': scenario_cost.groupby(by=groupby_cols, sort=False)['cost'].quantile(0.975), + }, + axis=1 + ) + + scenario_cost = pd.melt( + scenario_cost.reset_index(), + id_vars=groupby_cols, # Columns to keep + value_vars=[_metric, 'lower', 'upper'], # Columns to unpivot + var_name='stat', # New column name for the 'sub-category' of cost + value_name='cost' + ) + + if _years is None: + return apply_discounting_to_cost_data(_df = scenario_cost, + _discount_rate = _discount_rate, _column_for_discounting = 'cost') + else: + return apply_discounting_to_cost_data(_df = scenario_cost[scenario_cost.year.isin(_years)], + _discount_rate = _discount_rate, + _column_for_discounting = 'cost') + +# Define a function to summarize cost data from +# Note that the dataframe needs to have draw as index and run as columns. if the dataframe is long with draw and run as index, then +# first unstack the dataframe and subsequently apply the summarize function +def summarize_cost_data(_df, + _metric: Literal['mean', 'median'] = 'mean') -> pd.DataFrame: + """ + Summarize cost data across runs by computing central tendency and 95% confidence intervals. + + Parameters: + ---------- + _df : pd.DataFrame + A DataFrame with draw as index and run as columns, where each cell contains a cost value. + - Rows = draw IDs (e.g., 0, 1, 2) + - Columns = run IDs (e.g., 0, 1, 2) + - Values = cost estimates + + _metric : {'mean', 'median'}, default 'mean' + The central summary statistic to compute across runs. + + Returns: + ------- + pd.DataFrame + A pivoted DataFrame with draws as index and a MultiIndex of columns: + (run ID, ['mean' or 'median', 'lower', 'upper']), where: + - 'lower' = 2.5th percentile + - 'upper' = 97.5th percentile + """ + + if _metric not in ['mean', 'median']: + raise ValueError(f"Invalid input for _metric: '{_metric}'. " + f"Values need to be one of 'mean' or 'median'") + + _df = _df.stack() + collapsed_df = _df.groupby(level='draw').agg([ + _metric, + ('lower', lambda x: x.quantile(0.025)), + ('upper', lambda x: x.quantile(0.975)) + ]) + + collapsed_df = collapsed_df.unstack() + collapsed_df.index = collapsed_df.index.set_names('stat', level=0) + collapsed_df = collapsed_df.unstack(level='stat') + return collapsed_df + +# Estimate projected health spending +#################################################### +def estimate_projected_health_spending(resourcefilepath: Path, + results_folder: Path, + _draws: Optional[list[int]] = None, + _runs: Optional[list[int]] = None, + _years: Optional[list[int]] = None, + _discount_rate: float = 0, + _summarize: bool = False, + _metric: Literal['mean', 'median'] = 'mean') -> pd.DataFrame: + """ + Estimate total projected health spending for a simulation period. + + Combines health spending per capita projections (Dieleman et al, 2019) with simulated population estimates to calculate + total health expenditure, optionally applying a discount rate and summarizing across runs. + + Parameters: + ---------- + resourcefilepath : Path + Path to the folder containing the costing resource Excel files. + results_folder : Path + Path to the simulation results folder. + _draws : list or range, optional + Draws to include. If None, all available draws are used. + _runs : list or range, optional + Runs to include. If None, all available runs are used. + _years : list of int, optional + Years to include. If None, includes the full simulation period. + _discount_rate : float, default 0 + Discount rate applied to future costs. + _summarize : bool, default False + Whether to summarize output across runs using mean/median and 95% confidence intervals. + _metric : {'mean', 'median'}, default 'mean' + Central tendency metric used if summarizing. + + Returns: + ------- + pd.DataFrame + If `_summarize=True`, returns a DataFrame with: + - Index = draw + - Columns = 'mean'/'median', 'lower', 'upper' ROI values + + If `_summarize=False`, returns a DataFrame with: + - Index = draw + - Columns = run + - Values = discounted total health spending for the selected years + """ + + # %% Gathering basic information + # Load basic simulation parameters + #------------------------------------- + info, years, TARGET_PERIOD = load_simulation_metadata(results_folder) + + if _draws is None: + _draws = range(0, info['number_of_draws']) + if _runs is None: + _runs = range(0, info['runs_per_draw']) + + # Load health spending per capita projections + #---------------------------------------- + # Load health spending projections + unit_costs = load_unit_cost_assumptions(resourcefilepath) + health_spending_per_capita = unit_costs["health_spending_projections"] + total_health_spending_per_capita_mean = health_spending_per_capita[['year', 'total_mean']].set_index('year') + total_health_spending_per_capita_mean.columns = pd.MultiIndex.from_tuples([('total_mean', '')]) + + # Load population projections + # ---------------------------------------- + def get_total_population(_df): + years_needed = [min(_years), max(_years)] # we only consider the population for the malaria scale-up period + # because those are the years relevant for malaria scale-up costing + _df['year'] = pd.to_datetime(_df['date']).dt.year + _df = _df[['year', 'total']] + assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded." + return pd.Series(_df.loc[_df.year.between(*years_needed)].set_index('year')['total']) + + total_population_by_year = extract_results( + results_folder, + module='tlo.methods.demography', + key='population', + custom_generate_series=get_total_population, + do_scaling=True + ) + population_columns = total_population_by_year.columns + + # Estimate total health spending + projected_health_spending = pd.merge(total_health_spending_per_capita_mean, + total_population_by_year, + left_index=True, right_index=True,how='inner') + projected_health_spending = projected_health_spending.apply(pd.to_numeric, errors='coerce') + projected_health_spending[population_columns] = projected_health_spending[population_columns].multiply( + projected_health_spending['total_mean'], axis=0) + projected_health_spending = projected_health_spending[population_columns] + + # Apply discount rate + # Reformat dataframe to apply discounting function + projected_health_spending.columns.names = ['draw', 'run'] + projected_health_spending = projected_health_spending.stack(level=['draw', 'run']).reset_index() + projected_health_spending.columns = ['year', 'draw', 'run', 'total_spending'] + + # Initial year and discount rate + initial_year = min(projected_health_spending['year'].unique()) + projected_health_spending_discounted = apply_discounting_to_cost_data( + projected_health_spending, _discount_rate= _discount_rate, + _column_for_discounting='total_spending', _initial_year = initial_year) + projected_health_spending_discounted = projected_health_spending_discounted.groupby(['draw', 'run'])['total_spending'].sum() + + if _summarize == True: + if _metric == 'mean': + # Calculate the mean and 95% confidence intervals for each group + projected_health_spending_discounted = projected_health_spending_discounted.groupby(level="draw").agg( + mean=np.mean, + lower=lambda x: np.percentile(x, 2.5), + upper=lambda x: np.percentile(x, 97.5) + ) + + elif _metric == 'median': + # Calculate the mean and 95% confidence intervals for each group + projected_health_spending_discounted = projected_health_spending_discounted.groupby(level="draw").agg( + median=np.median, + lower=lambda x: np.percentile(x, 2.5), + upper=lambda x: np.percentile(x, 97.5) + ) + + else: + raise ValueError(f"Invalid input for _metric: '{_metric}'. " + f"Values need to be one of 'mean' or 'median'") + # Flatten the resulting DataFrame into a single-level MultiIndex Series + projected_health_spending_discounted = projected_health_spending_discounted.stack().rename_axis(["draw", "stat"]).rename("value") + + return projected_health_spending_discounted.unstack() + +# Plot costs +#################################################### +# 1. Stacked bar plot (Total cost + Cost categories) +#---------------------------------------------------- +def do_stacked_bar_plot_of_cost_by_category(_df: pd.DataFrame, + _cost_category: Literal['all', 'human resources for health', 'medical consumables', + 'medical equipment', 'facility operating cost'] = 'all', + _disaggregate_by_subgroup: bool = False, + _year: list[int] = 'all', + _draws: Optional[list[int]] = None, + _scenario_dict: Optional[dict[int,str]] = None, + show_title: bool = True, + _outputfilepath: Optional[Path] = None, + _add_figname_suffix: str = ''): + """ + Create and save a stacked bar chart of costs by category, subcategory or subgroup. + + Parameters: + ---------- + _df : pd.DataFrame + DataFrame with cost results, including columns: + ['draw', 'year', 'cost_category', 'cost_subcategory', 'cost_subgroup', + 'cost', 'stat'] — typically produced by `estimate_input_cost_of_scenarios`. + + _cost_category : str, default 'all' + If 'all', compares high-level categories (e.g., HR, consumables, equipment, facilty operations). + Otherwise, filters to a specific category and optionally disaggregates. + + _disaggregate_by_subgroup : bool, default False + If True and a single `_cost_category` is selected, breaks down costs by `cost_subgroup`. + + _year : str or list of int, default 'all' + Year or years to include. Can be: + - 'all' to include all available years + - a single year or multiple years as a list: [2025] + + _draws : list of int, optional + If specified, only includes the specified draws. + + _scenario_dict : dict, required + Dictionary mapping draw numbers to scenario names, used for x-axis labels. + + show_title : bool, default True + Whether to display the chart title. + + _outputfilepath : Path, optional + Folder to save the plot. File will be saved as a PNG using `_cost_category` + and `_add_figname_suffix` in the filename. + + _add_figname_suffix : str, default '' + Optional string to append to the saved figure's filename + + Returns: + ------- + None + The chart is saved to disk as a PNG. + """ + # Subset and Pivot the data to have 'Cost Sub-category' as columns + # Check what's the correct central metric to use (either 'mean' or 'median') + central_metric = [stat for stat in _df.stat.unique() if stat not in ['lower', 'upper']][0] + + # Make a copy of the dataframe to avoid modifying the original + _df_central = _df[_df.stat == central_metric].copy() + _df_lower = _df[_df.stat == 'lower'].copy() + _df_upper = _df[_df.stat == 'upper'].copy() + + # Subset the dataframes to keep the s=relevant categories for the plot + dfs = {"_df_central": _df_central, "_df_lower": _df_lower, "_df_upper": _df_upper} # create a dict of dataframes + for name, df in dfs.items(): + dfs[name] = df.copy() # Choose the dataframe to modify + # Convert 'cost' to millions + dfs[name]['cost'] = dfs[name]['cost'] / 1e6 + # Subset data + if _draws is not None: + dfs[name] = dfs[name][dfs[name].draw.isin(_draws)] + if _year != 'all': + dfs[name] = dfs[name][dfs[name]['year'].isin(_year)] + if _cost_category != 'all': + dfs[name] = dfs[name][dfs[name]['cost_category'] == _cost_category] + + # Extract the updated DataFrames back from the dictionary + _df_central, _df_lower, _df_upper = dfs["_df_central"], dfs["_df_lower"], dfs["_df_upper"] + + if _cost_category == 'all': + if (_disaggregate_by_subgroup == True): + raise ValueError(f"Invalid input for _disaggregate_by_subgroup: '{_disaggregate_by_subgroup}'. " + f"Value can be True only when plotting a specific _cost_category") + else: + pivot_central = _df_central.pivot_table(index='draw', columns='cost_category', values='cost', aggfunc='sum') + pivot_lower = _df_lower.pivot_table(index='draw', columns='cost_category', values='cost', aggfunc='sum') + pivot_upper = _df_upper.pivot_table(index='draw', columns='cost_category', values='cost', aggfunc='sum') + else: + if (_disaggregate_by_subgroup == True): + for name, df in dfs.items(): + dfs[name] = df.copy() # Choose the dataframe to modify + # If sub-groups are more than 10 in number, then disaggregate the top 10 and group the rest into an 'other' category + if (len(dfs[name]['cost_subgroup'].unique()) > 10): + # Calculate total cost per subgroup + subgroup_totals = dfs[name].groupby('cost_subgroup')['cost'].sum() + # Identify the top 10 subgroups by cost + top_10_subgroups = subgroup_totals.nlargest(10).index.tolist() + # Label the remaining subgroups as 'other' + dfs[name]['cost_subgroup'] = dfs[name]['cost_subgroup'].apply( + lambda x: x if x in top_10_subgroups else 'All other items' + ) + + # Extract the updated DataFrames back from the dictionary + _df_central, _df_lower, _df_upper = dfs["_df_central"], dfs["_df_lower"], dfs["_df_upper"] + + pivot_central = _df_central.pivot_table(index='draw', columns='cost_subgroup', + values='cost', aggfunc='sum') + pivot_lower = _df_lower.pivot_table(index='draw', columns='cost_subgroup', + values='cost', aggfunc='sum') + pivot_upper = _df_upper.pivot_table(index='draw', columns='cost_subgroup', + values='cost', aggfunc='sum') + + plt_name_suffix = '_by_subgroup' + else: + pivot_central = _df_central.pivot_table(index='draw', columns='cost_subcategory', values='cost', aggfunc='sum') + pivot_lower = _df_lower.pivot_table(index='draw', columns='cost_subcategory', values='cost', aggfunc='sum') + pivot_upper = _df_upper.pivot_table(index='draw', columns='cost_subcategory', values='cost', aggfunc='sum') + plt_name_suffix = '' + + # Sort pivot_df columns in ascending order by total cost + sorted_columns = pivot_central.sum(axis=0).sort_values().index + pivot_central = pivot_central[sorted_columns] + pivot_lower = pivot_lower[sorted_columns] + pivot_upper = pivot_upper[sorted_columns] + + # Error bars + lower_bounds = pivot_central.sum(axis=1) - pivot_lower.sum(axis=1) + lower_bounds[lower_bounds<0] = 0 + upper_bounds = pivot_upper.sum(axis=1) - pivot_central.sum(axis=1) + + if _cost_category == 'all': + # Predefined color mapping for cost categories + color_mapping = { + 'human resources for health': '#1f77b4', # Muted blue + 'medical consumables': '#ff7f0e', # Muted orange + 'medical equipment': '#2ca02c', # Muted green + 'other': '#d62728', # Muted red + 'facility operating cost': '#9467bd', # Muted purple + } + # Default color for unexpected categories + default_color = 'gray' + plt_name_suffix = '' + + # Define custom colors for the bars + if _cost_category == 'all': + column_colors = [color_mapping.get(col, default_color) for col in sorted_columns] + # Plot the stacked bar chart with set colours + ax = pivot_central.plot(kind='bar', stacked=True, figsize=(10, 6), color=column_colors) + + # Add error bars + x_pos = np.arange(len(pivot_central.index)) + total_central = pivot_central.sum(axis=1) + error_bars = [lower_bounds, upper_bounds] + ax.errorbar(x_pos, total_central, yerr=error_bars, fmt='o', color='black', capsize=5) + + else: + # Plot the stacked bar chart without set colours + ax = pivot_central.plot(kind='bar', stacked=True, figsize=(10, 6)) + + # Add error bars + x_pos = np.arange(len(pivot_central.index)) + total_central = pivot_central.sum(axis=1) + error_bars = [lower_bounds, upper_bounds] + ax.errorbar(x_pos, total_central, yerr=error_bars, fmt='o', color='black', capsize=5) + + # Add data labels such that the stacked block has a superimposed white label is the value is >=2% of the Y-axis limit + # and a black label adjusted to the right of the bar (for visibility) if the value is <2% + # Get max y-limit for threshold + max_y = ax.get_ylim()[1] + threshold = max_y * 0.02 # 2% of ylim + + for container in ax.containers: + if isinstance(container, mpc.BarContainer): # Ensure we're working with bars, not error bars + for rect in container: + height = rect.get_height() + if height > 0: # Avoid labeling zero-height bars + x = rect.get_x() + rect.get_width() / 2 # Center of bar + y = rect.get_y() + height / 2 # Middle of segment + + if height < threshold: # Small segment -> place label outside + ax.annotate( + f'{round(height, 1)}', + xy=(x, rect.get_y() + height), # Arrow start + xytext=(x + 0.3, rect.get_y() + height + threshold), # Offset text + arrowprops=dict(arrowstyle="->", color='black', lw=0.8), + fontsize='small', ha='left', va='center', color='black' + ) + else: # Large segment -> label inside + ax.text(x, y, f'{round(height, 1)}', ha='center', va='center', fontsize='small', color='white') + + # Set custom x-tick labels if _scenario_dict is provided + if _scenario_dict: + labels = [_scenario_dict.get(label, label) for label in pivot_central.index] + else: + labels = pivot_central.index.astype(str) + + # Wrap x-tick labels for readability + wrapped_labels = [textwrap.fill(str(label), 20) for label in labels] + ax.set_xticklabels(wrapped_labels, rotation=45, ha='right', fontsize='small') + + # Period included for plot title and name + if _year == 'all': + period = (f"{min(_df_central['year'].unique())} - {max(_df_central['year'].unique())}") + elif (len(_year) == 1): + period = (f"{_year[0]}") + else: + period = (f"{min(_year)} - {max(_year)}") + + # Save plot + plt.xlabel('Scenario') + plt.ylabel('Cost (2023 USD), millions') + + # Arrange the legend in the same ascending order + handles, labels = plt.gca().get_legend_handles_labels() + plt.legend(handles[::-1], labels[::-1], bbox_to_anchor=(1.05, 0.7), loc='center left', fontsize='small') + + # Extend the y-axis by 25% + max_y = ax.get_ylim()[1] + ax.set_ylim(0, max_y*1.25) + + # Save the plot with tight layout + plt.tight_layout(pad=2.0) # Ensure there is enough space for the legend + plt.subplots_adjust(right=0.8) # Adjust to ensure legend doesn't overlap + + # Add gridlines and border + plt.grid(visible=True, which='major', linestyle='--', linewidth=0.5, color='gray') + #plt.rcParams['figure.facecolor'] = 'white' + plt.rcParams['figure.edgecolor'] = 'gray' + plt.rcParams['figure.frameon'] = True + + if show_title != False: + plt.title(f'Costs by Scenario \n (Cost Category = {_cost_category} ; Period = {period})') + plt.savefig(_outputfilepath / f'stacked_bar_chart_{_cost_category}_{period}{plt_name_suffix}{_add_figname_suffix}.png', dpi=100, + bbox_inches='tight') + plt.close() + +# 2. Line plots of total costs +#---------------------------------------------------- +# TODO: Check why line plot get save without a file name +def do_line_plot_of_cost(_df: pd.DataFrame, + _cost_category: Literal['all', 'human resources for health', 'medical consumables', + 'medical equipment', 'facility operating cost'] = 'all', + _year: list[int] ='all', + _draws: Optional[list[int]] = None, + disaggregate_by: Optional[Literal['cost_category', 'cost_subcategory', 'cost_subgroup']] = None, + _y_lim: Optional[float] = None, + show_title: bool = True, + _outputfilepath: Optional[Path] = None)-> None: + """ + Plot and save a line chart of cost trends over time by category or subcategory. + + Parameters: + ---------- + _df : pd.DataFrame + A cost summary DataFrame (usually from `estimate_input_cost_of_scenarios`) + containing columns like ['year', 'draw', 'cost', 'stat', 'cost_category', etc.]. + + _cost_category : str, default 'all' + If 'all', plots total cost across all categories. Otherwise, filters to a specific category. + + _year : str or list of int, default 'all' + Year(s) to include. Can be: + - 'all' to include all + - a single year or multiple years as a list: [2025] + + _draws : list of int, optional + If specified, filters to those draws. Required if `disaggregate_by` is set. + + disaggregate_by : {'cost_category', 'cost_subcategory', 'cost_subgroup'}, optional + Controls disaggregation on the plot + Note: If disaggregating, `_draws` must contain **only one draw**. + + _y_lim : float, optional + Custom upper limit for the y-axis. If None, uses automatic scaling. + + show_title : bool, default True + Whether to show the plot title. + + _outputfilepath : Path, optional + Directory where the plot image will be saved. Filename is auto-generated based on inputs. + + Returns: + ------- + None + Saves a PNG chart to `_outputfilepath`. + """ + + # Check what's the correct central metric to use (either 'mean' or 'median') + central_metric = [stat for stat in _df.stat.unique() if stat not in ['lower', 'upper']][0] + + # Validate disaggregation options + valid_disaggregations = ['cost_category', 'cost_subcategory', 'cost_subgroup'] + if disaggregate_by not in valid_disaggregations and disaggregate_by is not None: + raise ValueError(f"Invalid disaggregation option: {disaggregate_by}. Choose from {valid_disaggregations}.") + + # + if ((_draws is None) or (len(_draws) > 1)) & (disaggregate_by is not None): + raise ValueError(f"The disaggregate_by option only works if only one draw is plotted, for exmaple _draws = [0]") + + # Filter the dataframe by draws, if specified + subset_df = _df if _draws is None else _df[_df.draw.isin(_draws)] + + # Filter by year if specified + if _year != 'all': + subset_df = subset_df[subset_df['year'].isin(_year)] + + # Handle scenarios based on `_cost_category` and `disaggregate_by` conditions + if _cost_category == 'all': + if disaggregate_by == 'cost_subgroup': + raise ValueError("Cannot disaggregate by 'cost_subgroup' when `_cost_category='all'` due to data size.") + else: + # Filter subset_df by specific cost category if specified + subset_df = subset_df[subset_df['cost_category'] == _cost_category] + + # Set grouping columns based on the disaggregation level + if disaggregate_by == 'cost_category': + groupby_columns = ['year', 'cost_category'] + elif disaggregate_by == 'cost_subcategory': + groupby_columns = ['year', 'cost_subcategory'] + elif disaggregate_by == 'cost_subgroup': + # If disaggregating by 'cost_subgroup' and there are more than 10 subgroups, limit to the top 10 + "Other" + if len(subset_df['cost_subgroup'].unique()) > 10: + # Calculate total cost per subgroup + subgroup_totals = subset_df[subset_df.stat == central_metric].groupby('cost_subgroup')['cost'].sum() + # Identify the top 10 subgroups by cost + top_10_subgroups = subgroup_totals.nlargest(10).index.tolist() + # Reassign smaller subgroups to an "Other" category + subset_df['cost_subgroup'] = subset_df['cost_subgroup'].apply( + lambda x: x if x in top_10_subgroups else 'Other' + ) + groupby_columns = ['year', 'cost_subgroup'] + else: + groupby_columns = ['year'] + + # Extract central, lower, and upper values for the plot + central_values = subset_df[subset_df.stat == central_metric].groupby(groupby_columns)['cost'].sum() / 1e6 + lower_values = subset_df[subset_df.stat == 'lower'].groupby(groupby_columns)['cost'].sum() / 1e6 + upper_values = subset_df[subset_df.stat == 'upper'].groupby(groupby_columns)['cost'].sum() / 1e6 + + # Prepare to store lines and labels for the legend + lines = [] + labels = [] + + # Define a list of colors + if disaggregate_by == 'cost_category': + color_mapping = { + 'human resources for health': '#1f77b4', # Muted blue + 'medical consumables': '#ff7f0e', # Muted orange + 'medical equipment': '#2ca02c', # Muted green + 'other': '#d62728', # Muted red + 'facility operating cost': '#9467bd', # Muted purple + } + # Default color for unexpected categories + default_color = 'gray' + else: + # Define a list of colors to rotate through + colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'orange', 'purple', 'brown', 'gray'] # Add more colors as needed + color_cycle = iter(colors) # Create an iterator from the color list + + # Plot each line for the disaggregated values + if disaggregate_by: + for disaggregate_value in central_values.index.get_level_values(disaggregate_by).unique(): + # Get central, lower, and upper values for each disaggregated group + value_central = central_values.xs(disaggregate_value, level=disaggregate_by) + value_lower = lower_values.xs(disaggregate_value, level=disaggregate_by) + value_upper = upper_values.xs(disaggregate_value, level=disaggregate_by) + + if disaggregate_by == 'cost_category': + color = color_mapping.get(disaggregate_value, default_color) + else: + # Get the next color from the cycle + color = next(color_cycle) + + # Plot line for central and shaded region for 95% CI + line, = plt.plot(value_central.index, value_central, marker='o', linestyle='-', color=color, label=f'{disaggregate_value} - {central_metric}') + plt.fill_between(value_central.index, value_lower, value_upper, color=color, alpha=0.2) + + # Append to lines and labels for sorting later + lines.append(line) + labels.append(disaggregate_value) + else: + line, = plt.plot(central_values.index, central_values, marker='o', linestyle='-', color='b', label=central_metric) + plt.fill_between(central_values.index, lower_values, upper_values, color='b', alpha=0.2) + + # Append to lines and labels for sorting later + lines.append(line) + labels.append(central_metric) + + # Sort the legend based on total costs + total_costs = {label: central_values.xs(label, level=disaggregate_by).sum() for label in labels} + sorted_labels = sorted(total_costs.keys(), key=lambda x: total_costs[x]) + + # Reorder lines based on sorted labels + handles = [lines[labels.index(label)] for label in sorted_labels] + + # Define period for plot title + if _year == 'all': + period = f"{min(subset_df['year'].unique())} - {max(subset_df['year'].unique())}" + elif len(_year) == 1: + period = str(_year[0]) + else: + period = f"{min(_year)} - {max(_year)}" + + # Set labels, legend, and title + # Set y-axis limit if provided + if _y_lim is not None: + plt.ylim(0, _y_lim) + + # Add gridlines and border + plt.grid(visible=True, which='major', linestyle='--', linewidth=0.5, color='gray') + plt.xlabel('Year') + plt.ylabel('Cost (2023 USD), millions') + plt.legend(handles[::-1], sorted_labels[::-1], loc='upper right', bbox_to_anchor=(0.98, 0.98), framealpha=0.6) + if (show_title != False): + plot_title = f'Total input cost \n (Category = {_cost_category}, Period = {period})' + plt.title(plot_title) + + # Save plot with a proper filename + if disaggregate_by is None: + filename_suffix = "" + else: + filename_suffix = f"_by_{disaggregate_by}" + + draw_suffix = 'all' if _draws is None else str(_draws) + filename = f'trend_{_cost_category}_{period}{filename_suffix}_draw-{draw_suffix}.png' + plt.savefig(_outputfilepath / filename, dpi=100, bbox_inches='tight') + plt.close() + +# Treemap by category subgroup +#----------------------------------------------------------------------------------------------- +def create_summary_treemap_by_cost_subgroup(_df: pd.DataFrame, + _cost_category: Literal['human resources for health', 'medical consumables', + 'medical equipment', 'facility operating cost'], + _draw: Optional[list[int]] = None, + _year: list[int] = 'all', + _color_map: Optional[dict[str, str]] = None, + _label_fontsize: int = 10, + show_title: bool = True, + _outputfilepath: Optional[Path] = None) -> None: + """ + Generate and save a treemap visualizing cost composition by subgroup within a cost category. + + Parameters: + ---------- + _df : pd.DataFrame + DataFrame of costs with columns: ['cost_category', 'cost_subgroup', 'draw', 'year', 'cost']. + Typically output from `estimate_input_cost_of_scenarios`. + + _cost_category : str, required + The high-level cost category to visualize (e.g., 'human resources for health', + 'medical consumables', 'medical equipment', 'facility operating cost'). + + _draw : int, optional + Specific draw to visualize. If None, uses the full dataset. + + _year : str or list of int, default 'all' + Year or list of years to include in the treemap. If 'all', includes all available years. + + _color_map : dict, optional + Dictionary mapping cost subgroups to specific colors. If None, a default colormap is used. + eg. _color_map = {'First-line ART regimen: adult':'#1f77b4', + 'Test, HIV EIA Elisa': '#ff7f0e', + 'VL Test': '#2ca02c'} + + _label_fontsize : int, default 10 + Font size used for labels inside treemap tiles. + + show_title : bool, default True + Whether to display a plot title. + + _outputfilepath : Path, optional + Directory where the treemap image should be saved. + + Returns: + ------- + None + Saves the treemap as a PNG file named `treemap_{category}_{draw}_{period}.png`. + """ + # Function to wrap text to fit within treemap rectangles + def wrap_text(text, width=15): + return "\n".join(textwrap.wrap(text, width)) + + valid_cost_categories = ['human resources for health', 'medical consumables', + 'medical equipment', 'facility operating cost'] + if _cost_category == None: + raise ValueError(f"Specify one of the following as _cost_category - {valid_cost_categories})") + elif _cost_category not in valid_cost_categories: + raise ValueError(f"Invalid input for _cost_category: '{_cost_category}'. " + f"Specify one of the following - {valid_cost_categories})") + else: + _df = _df[_df['cost_category'] == _cost_category] + + if _draw != None: + _df = _df[_df.draw == _draw] + + # Remove non-specific subgroup for consumables + if _cost_category == 'medical consumables': + _df = _df[~(_df.cost_subgroup == 'supply chain (all consumables)')] + + # Create summary dataframe for treemap + _df = _df.groupby('cost_subgroup')['cost'].sum().reset_index() + _df = _df.sort_values(by="cost", ascending=False) + top_10 = _df.iloc[:10] + + if (len(_df['cost_subgroup'].unique()) > 10): + # Step 2: Group all other consumables into "Other" + other_cost = _df.iloc[10:]["cost"].sum() + top_10 = pd.concat([top_10, pd.DataFrame([{"cost_subgroup": "Other", "cost": other_cost}])], ignore_index=True) + + # Prepare data for the treemap + total_cost = top_10["cost"].sum() + top_10["proportion"] = top_10["cost"]/total_cost + sizes = top_10["cost"] + + # Handle color map + if _color_map is None: + # Generate automatic colors if no color map is provided + auto_colors = plt.cm.Paired.colors + color_cycle = cycle(auto_colors) # Cycle through the automatic colors + color_map = {subgroup: next(color_cycle) for subgroup in top_10["cost_subgroup"]} + else: + # Use the provided color map, fallback to a default color for missing subgroups + fallback_color = '#cccccc' + color_map = {subgroup: _color_map.get(subgroup, fallback_color) for subgroup in top_10["cost_subgroup"]} + + # Get colors for each subgroup + colors = [color_map[subgroup] for subgroup in top_10["cost_subgroup"]] + + # Exclude labels for small proportions + labels = [ + f"{wrap_text(name)}\n${round(cost, 1)}m\n({round(prop * 100, 1)}%)" + if prop >= 0.01 else "" + for name, cost, prop in zip(top_10["cost_subgroup"], top_10["cost"] / 1e6, top_10["proportion"]) + ] + # Period included for plot title and name + if _year == 'all': + period = (f"{min(_df['year'].unique())} - {max(_df['year'].unique())}") + elif (len(_year) == 1): + period = (f"{_year[0]}") + else: + period = (f"{min(_year)} - {max(_year)}") + + # Plot the treemap + plt.figure(figsize=(12, 8)) + squarify.plot(sizes=sizes, label=labels, alpha=0.8, color=colors, text_kwargs={'fontsize': _label_fontsize}) + plt.axis("off") + if (show_title != False): + plt.title(f'{_cost_category} ; Period = {period}') + + plt.savefig(_outputfilepath / f'treemap_{_cost_category}_[{_draw}]_{period}.png', + dpi=100, + bbox_inches='tight') + plt.close() + +# Plot ROI +def generate_multiple_scenarios_roi_plot( _monetary_value_of_incremental_health: pd.DataFrame, + _incremental_input_cost: pd.DataFrame, + _outputfilepath: Path, + _draws: list[int], + _scenario_dict: dict[int, str], + _metric: str = 'mean', + _y_axis_lim: Optional[float] = None, + _plot_vertical_lines_at: Optional[list[int]] = None, + _projected_health_spending: Optional[float] = None, + _additional_horizontal_lines_for_interpretation: Optional[list[dict]] = None, + _draw_colors: Optional[dict[int, str]] = None, + _value_of_life_suffix: str = '', + _year_suffix: str = '', + show_title_and_legend: Optional[bool] = True) -> None: + """ + Generate and save ROI plots for multiple scenarios, showing returns over a range of implementation costs. + + Parameters: + ---------- + _monetary_value_of_incremental_health : pd.DataFrame + DataFrame with index (draw, run), containing monetary values of health benefits. + This can be estimated as (_num_dalys_averted_by_scenario * _chosen_value_of_life_year).clip(lower=0.0) + + _incremental_input_cost : pd.DataFrame + DataFrame with index (draw, run), containing incremental costs for each scenario. + + _outputfilepath : Path + Path to the output folder where plots will be saved. + + _draws : list of int, required + List of draw indices to include in the plot. + + _scenario_dict : dict of {int: str}, required + Mapping from draw index to scenario name, used for plot labeling. + + _metric : {'mean', 'median'}, default 'mean' + Central tendency to use when summarizing ROI across runs. + + _y_axis_lim : float, optional + Y-axis upper limit. If None, scales based on the maximum ROI value. + + _plot_vertical_lines_at : list of int, optional + If specified, vertical dashed lines are drawn at these implementation cost values (in USD). + Annotates ROI ratio across scenarios at those points. + + _projected_health_spending : float, optional + Used to annotate x-axis values as % of total projected health spending + Can be estimated using the function estimate_projected_health_spending, but a single value will need to be taken + eg. mean value for the baseline scenario. + + _draw_colors : dict of {int: str}, optional + Custom colors to use for each scenario/draw. + + _value_of_life_suffix : str, default '' + Suffix added to the filename to reflect the assumed value of life (e.g., DALY monetization label). + + _year_suffix : str, default '' + Optional suffix to add to the filename or plot title to indicate year range. + + show_title_and_legend : bool, default True + Whether to include the plot title and legend. + + Returns: + ------- + None + Saves a PNG file visualizing ROI vs. implementation cost for each scenario. + """ + if _metric not in ['mean', 'median']: + raise ValueError(f"Invalid input for _metric: '{_metric}'. " + f"Values need to be one of 'mean' or 'median'") + + # Default color mapping if not provided + if _draw_colors is None: + _draw_colors = {draw: color for draw, color in zip(_draws, plt.cm.tab10.colors[:len(_draws)])} + + # Calculate maximum ability to pay for implementation + _monetary_value_of_incremental_health = _monetary_value_of_incremental_health[_monetary_value_of_incremental_health.index.get_level_values('draw').isin(_draws)] + _incremental_input_cost = _incremental_input_cost[_incremental_input_cost.index.get_level_values('draw').isin(_draws)] + max_ability_to_pay_for_implementation = (_monetary_value_of_incremental_health - _incremental_input_cost).clip(lower=0.0) # monetary value - change in costs + + # Create a figure and axis to plot all draws together + fig, ax = plt.subplots(figsize=(10, 6)) + + # Store ROI values for specific costs + max_roi = [] + roi_at_costs = {cost: [] for cost in (_plot_vertical_lines_at or [])} + + # Iterate over each draw in monetary_value_of_incremental_health + for draw_index, row in _monetary_value_of_incremental_health.iterrows(): + print("Plotting ROI for draw ", draw_index) + # Initialize an empty DataFrame to store values for each 'run' + all_run_values = pd.DataFrame() + + # Create an array of implementation costs ranging from 0 to the max value of max ability to pay for the current draw + implementation_costs = np.linspace(0, max_ability_to_pay_for_implementation.loc[draw_index].max(), 50) + # Add fixed values for ROI ratio calculation + additional_costs = np.array([1_000_000_000, 3_000_000_000]) + implementation_costs = np.sort(np.unique(np.concatenate([implementation_costs, additional_costs]))) + + # Retrieve the corresponding row from incremental_scenario_cost for the same draw + incremental_scenario_cost_row = _incremental_input_cost.loc[draw_index] + + # Calculate the values for each individual run + for run in incremental_scenario_cost_row.index: # Assuming 'run' columns are labeled by numbers + # Calculate the total costs for the current run + total_costs = implementation_costs + incremental_scenario_cost_row[run] + + # Initialize run_values as an empty series with the same index as total_costs + run_values = pd.Series(index=total_costs, dtype=float) + + # For negative total_costs, set corresponding run_values to infinity + run_values[total_costs < 0] = np.inf + + # For non-negative total_costs, calculate the metric and clip at 0 + non_negative_mask = total_costs >= 0 + run_values[non_negative_mask] = np.clip( + (row[run] - total_costs[non_negative_mask]) / total_costs[non_negative_mask], + 0, + None + ) + + # Create a DataFrame with index as (draw_index, run) and columns as implementation costs + run_values = run_values.values # remove index and convert to array + run_df = pd.DataFrame([run_values], index=pd.MultiIndex.from_tuples([(draw_index, run)], names=['draw', 'run']), + columns=implementation_costs) + + # Append the run DataFrame to all_run_values + all_run_values = pd.concat([all_run_values, run_df]) + + # Replace inf with NaN temporarily to handle quantile calculation correctly + temp_data = all_run_values.replace([np.inf, -np.inf], np.nan) + + collapsed_data = temp_data.groupby(level='draw').agg([ + _metric, + ('lower', lambda x: x.quantile(0.025)), + ('upper', lambda x: x.quantile(0.975)) + ]) + + # Revert the NaNs back to inf + collapsed_data = collapsed_data.replace([np.nan], np.inf) + + collapsed_data = collapsed_data.unstack() + collapsed_data.index = collapsed_data.index.set_names('implementation_cost', level=0) + collapsed_data.index = collapsed_data.index.set_names('stat', level=1) + collapsed_data = collapsed_data.reset_index().rename(columns = {0: 'roi'}) + + # Divide rows by the sum of implementation costs and incremental input cost + central_values = collapsed_data[collapsed_data['stat'] == _metric][['implementation_cost', 'roi']] + lower_values = collapsed_data[collapsed_data['stat'] == 'lower'][['implementation_cost', 'roi']] + upper_values = collapsed_data[collapsed_data['stat'] == 'upper'][['implementation_cost', 'roi']] + + # Plot central line and confidence interval + ax.plot( + implementation_costs / 1e6, + central_values['roi'], + label=f'{_scenario_dict[draw_index]}', + color=_draw_colors.get(draw_index, 'black'), + ) + ax.fill_between( + implementation_costs / 1e6, + lower_values['roi'], + upper_values['roi'], + alpha=0.2, + color=_draw_colors.get(draw_index, 'black'), + ) + + max_val = central_values[~np.isinf(central_values['roi'])]['roi'].max() + max_roi.append(max_val) + + # Capture ROI at specific costs + if _plot_vertical_lines_at: + for cost in _plot_vertical_lines_at: + roi_value = collapsed_data[ + (collapsed_data.implementation_cost == cost) & + (collapsed_data.stat == _metric) + ]['roi'] + if not roi_value.empty: + roi_at_costs[cost].append(roi_value.iloc[0]) + + # Calculate and annotate ROI ratios + if _plot_vertical_lines_at: + for cost in _plot_vertical_lines_at: + if cost in roi_at_costs: + ratio = max(roi_at_costs[cost]) / min(roi_at_costs[cost]) + ax.axvline(x=cost / 1e6, color='black', linestyle='--', linewidth=1) + ax.text(cost / 1e6 + ax.get_xlim()[1] * 0.011, ax.get_ylim()[1] * 0.75, + f'At ${cost / 1e6:.0f}M, ratio of ROI curves = {round(ratio, 2)}', + color='black', fontsize=10, rotation=90, verticalalignment='top') + + # Define fixed x-tick positions with a gap of 2000 + step_size = (ax.get_xlim()[1] - 0)/5 + xticks = np.arange(0, ax.get_xlim()[1] + 1, int(round(step_size, -3))) # From 0 to max x-limit with 5 steps + # Get labels + xtick_labels = [f'{tick:,.0f}' for tick in xticks] # Default labels for all ticks + + # Replace specific x-ticks with % of health spending values + if _projected_health_spending: + xtick_labels[1] = f'{xticks[1]:,.0f}\n({xticks[1] / (_projected_health_spending / 1e6) :.2%} of \n projected total \n health spend)' + for i, tick in enumerate(xticks): + if (i != 0) & (i != 1): # Replace for 4000 + xtick_labels[i] = f'{tick:,.0f}\n({tick / (_projected_health_spending/1e6) :.2%})' + + # Update the x-ticks and labels + ax.set_xticks(xticks) + ax.set_xticklabels(xtick_labels, fontsize=10) + + # Add manual horizontal lines if specified + if _additional_horizontal_lines_for_interpretation: + for line in _additional_horizontal_lines_for_interpretation: + y = line['y_value'] + x_start = line['x_start'] + x_end = line['x_end'] + label = line.get('label', '') + color = line.get('color', 'black') + # Draw double-headed arrow instead of hline + ax.annotate( + '', + xy=(x_end, y), + xytext=(x_start, y), + arrowprops=dict( + arrowstyle='<->', + color=color, + lw=1.5 + ) + ) + + # Place label slightly above the line + if label: + # Wrap label into 2 lines, add 3rd line if comparison_text exists + label_wrapped = label.replace('$', '\$').replace(' [', '\n[') + + label_x = x_end + ax.get_xlim()[1] * 0.005 + label_y = y + + y_label_offset = line.get('y_label_offset', '') + if y_label_offset: + label_y = y + y_label_offset + + ax.text( + label_x, + label_y, + label_wrapped, + ha='left', + va='center', + fontsize=8, + color='white', + fontweight='bold', + bbox=dict( + boxstyle='round,pad=0.3', + facecolor=color, + edgecolor=color, + alpha=0.9 + ) + ) + + # Add scenario label outside the box + scenario_label = line.get('scenario_label', '') + if scenario_label: + ax.text( + label_x + ax.get_xlim()[1] * 0.16, # push to the right of the box + label_y, + scenario_label, + ha='left', + va='center', + fontsize=8, + color=color + ) + + # Set y-axis limit + if _y_axis_lim == None: + ax.set_ylim(0, max(max_roi) * 1.25) + else: + ax.set_ylim(0, _y_axis_lim) + ax.set_xlim(left = 0) + + plt.xlabel('Incremental above service level costs, USD millions') + plt.ylabel('Return on Investment') + + # Show legend and title + if (show_title_and_legend != False): + plt.title(f'Return on Investment at different levels of above service level cost {_year_suffix}') + plt.legend() + + # Add gridlines and border + plt.grid(False) + fig.patch.set_facecolor("white") # White background for the entire figure + + # Save + plt.savefig(_outputfilepath / f'draws_{_draws}_ROI_at_{_value_of_life_suffix}_{_year_suffix}.png', dpi=100, + bbox_inches='tight') + plt.close() + +def tabulate_roi_estimates(_monetary_value_of_incremental_health: pd.DataFrame, + _incremental_input_cost: pd.DataFrame, + _draws: Optional[list[int]] = None, + _metric: Literal['mean', 'median'] = 'mean') -> pd.DataFrame: + """ + Compute ROI estimates in tabular form for multiple scenarios and implementation costs. + + For each draw, calculates ROI at various hypothetical implementation cost levels. + ROI is defined as: (monetary value of health gain - total cost) / total cost. + + Parameters: + ---------- + _monetary_value_of_incremental_health : pd.DataFrame + DataFrame indexed by [draw, run], with estimated monetary values of health gain. + + _incremental_input_cost : pd.DataFrame + DataFrame indexed by [draw, run], with estimated incremental scenario costs. + + _draws : list of int + Draw indices to include in the tabulation. + + _metric : {'mean', 'median'}, default 'mean' + Summary statistic to compute across runs. Also includes 2.5th and 97.5th percentiles. + + Returns: + ------- + pd.DataFrame + Long-format DataFrame with: + - 'implementation_cost' (in USD) + - 'stat' ('mean', 'lower', 'upper') + - 'roi' (return on investment) + + Includes results for all requested draws and implementation cost levels - the implementation cost levels + used are based on max_ability_to_pay_for_implementation calculated within the function - at this level + of incremental scenario cost + implementation cost, ROI should be 0. + """ + + # Calculate maximum ability to pay for implementation + _monetary_value_of_incremental_health = _monetary_value_of_incremental_health[_monetary_value_of_incremental_health.index.get_level_values('draw').isin(_draws)] + _incremental_input_cost = _incremental_input_cost[_incremental_input_cost.index.get_level_values('draw').isin(_draws)] + max_ability_to_pay_for_implementation = (_monetary_value_of_incremental_health - _incremental_input_cost).clip(lower=0.0) # monetary value - change in costs + + roi_df = pd.DataFrame() + + # Create an array of implementation costs ranging from 0 to the max value of max ability to pay for the current draw + max_ability_to_pay_for_implementation_rounded_value = math.ceil(max_ability_to_pay_for_implementation.max().max() / 1_000_000_000) * 1_000_000_000 + implementation_costs = np.linspace(0, max_ability_to_pay_for_implementation_rounded_value, 20) + implementation_costs = np.ceil(implementation_costs / 1_000_000_000) * 1_000_000_000 # Round each to nearest billion + + # Iterate over each draw in monetary_value_of_incremental_health + for draw_index, row in _monetary_value_of_incremental_health.iterrows(): + print("Tablulating ROI for draw ", draw_index) + # Initialize an empty DataFrame to store values for each 'run' + all_run_values = pd.DataFrame() + + # Retrieve the corresponding row from incremental_scenario_cost for the same draw + incremental_scenario_cost_row = _incremental_input_cost.loc[draw_index] + + # Calculate the values for each individual run + for run in incremental_scenario_cost_row.index: # Assuming 'run' columns are labeled by numbers + # Calculate the total costs for the current run + total_costs = implementation_costs + incremental_scenario_cost_row[run] + + # Initialize run_values as an empty series with the same index as total_costs + run_values = pd.Series(index=total_costs, dtype=float) + + # For negative total_costs, set corresponding run_values to infinity + run_values[total_costs < 0] = np.inf + + # For non-negative total_costs, calculate the metric and clip at 0 + non_negative_mask = total_costs >= 0 + run_values[non_negative_mask] = (row[run] - total_costs[non_negative_mask]) / abs(total_costs[non_negative_mask]) + + # Create a DataFrame with index as (draw_index, run) and columns as implementation costs + run_values = run_values.values # remove index and convert to array + run_df = pd.DataFrame([run_values], index=pd.MultiIndex.from_tuples([(draw_index, run)], names=['draw', 'run']), + columns=implementation_costs) + + # Append the run DataFrame to all_run_values + all_run_values = pd.concat([all_run_values, run_df]) + + # Replace inf with NaN temporarily to handle quantile calculation correctly + temp_data = all_run_values.replace([np.inf, -np.inf], np.nan) + + collapsed_data = temp_data.groupby(level='draw').agg([ + _metric, + ('lower', lambda x: x.quantile(0.025)), + ('upper', lambda x: x.quantile(0.975)) + ]) + + # Revert the NaNs back to inf + collapsed_data = collapsed_data.replace([np.nan], np.inf) + + collapsed_data = collapsed_data.unstack() + collapsed_data.index = collapsed_data.index.set_names('implementation_cost', level=0) + collapsed_data.index = collapsed_data.index.set_names('stat', level=1) + collapsed_data = collapsed_data.reset_index().rename(columns = {0: 'roi'}) + + if roi_df.empty: + roi_df = collapsed_data + else: + roi_df = pd.concat([roi_df, collapsed_data], ignore_index=True) + return roi_df + +def extract_roi_at_specific_implementation_costs(_monetary_value_of_incremental_health: pd.DataFrame, + _incremental_input_cost: pd.DataFrame, + _draws: Optional[list[int]] = None, + _non_zero_implementation_cost_proportion: float = 0.58, + _metric: Literal['mean', 'median'] = 'mean') -> pd.DataFrame: + """ + Compute ROI estimates in tabular form for specific implementation cost (or above service level cost) assumptions. + + For each draw, calculates ROI at specified hypothetical implementation cost levels. + ROI is defined as: (monetary value of health gain - total cost) / total cost. + + Parameters: + ---------- + _monetary_value_of_incremental_health : pd.DataFrame + DataFrame indexed by [draw, run], with estimated monetary values of health gain. + + _incremental_input_cost : pd.DataFrame + DataFrame indexed by [draw, run], with estimated incremental scenario costs. + + _draws : list of int + Draw indices to include in the tabulation. + + _non_zero_implementation_cost_proportion: float + This specifies the proportion of _incremental_input_cost which is assumed to be spent on implementation in + the second ROI estimate for the table. The first estimate assumes 0 implementation cost. + The default value is based on Opuni et al (2023) + + _metric : {'mean', 'median'}, default 'mean' + Summary statistic to compute across runs. Also includes 2.5th and 97.5th percentiles. + + Returns: + ------- + pd.DataFrame + DataFrame with the following columns - scenario, monetised health benefits, service level cost, above + service level or implementation cost, ROI assuming zero above service level cost, ROI assuming non-zero + above service level cost + """ + + # Calculate maximum ability to pay for implementation + _monetary_value_of_incremental_health = _monetary_value_of_incremental_health[_monetary_value_of_incremental_health.index.get_level_values('draw').isin(_draws)] + _incremental_input_cost = _incremental_input_cost[_incremental_input_cost.index.get_level_values('draw').isin(_draws)] + + def convert_results_to_dict(_df): + draws = _df.index.to_list() + values = { + draw: { + chosen_metric: _df.loc[_df.index.get_level_values('draw') == draw, chosen_metric].iloc[0], + "lower": _df.loc[_df.index.get_level_values('draw') == draw, 'lower'].iloc[0], + "upper": _df.loc[_df.index.get_level_values('draw') == draw, 'upper'].iloc[0] + } + for draw in draws + } + return values + + _monetary_value_of_incremental_health_summary = summarize_cost_data(_monetary_value_of_incremental_health, _metric = _metric) + _monetary_value_of_incremental_health_summary.columns.name = None # Remove MultiIndex name if exists + _incremental_input_cost_summary = summarize_cost_data(_incremental_input_cost, _metric = _metric) + _incremental_input_cost_summary.columns.name = None # Remove MultiIndex name if exists + + roi_df = pd.DataFrame() + + roi_df['Monetised health benefits ($, billion)'] = ( + "$" + _monetary_value_of_incremental_health_summary[_metric].div(1e9).apply("{:,.2f}".format) + " [" + + "$" + _monetary_value_of_incremental_health_summary['lower'].div(1e9).apply("{:,.2f}".format) + " - " + + "$" + _monetary_value_of_incremental_health_summary['upper'].div(1e9).apply("{:,.2f}".format) + "]" +) + roi_df['Service-level costs ($, million)'] = ( + "$" + _incremental_input_cost_summary[_metric].div(1e6).apply("{:,.2f}".format) + " [" + + "$" + _incremental_input_cost_summary['lower'].div(1e6).apply("{:,.2f}".format) + " - " + + "$" + _incremental_input_cost_summary['upper'].div(1e6).apply("{:,.2f}".format) + "]" + ) + + # Non zero implementation cost + roi_df['Above Service-level costs ($, million)'] = ( + "$" + (_incremental_input_cost_summary[_metric] * (1+_non_zero_implementation_cost_proportion)).div(1e6).apply("{:,.2f}".format) + " [" + + "$" + (_incremental_input_cost_summary['lower'] * (1+_non_zero_implementation_cost_proportion)).div(1e6).apply("{:,.2f}".format) + " - " + + "$" + (_incremental_input_cost_summary['upper'] * (1+_non_zero_implementation_cost_proportion)).div(1e6).apply("{:,.2f}".format) + "]" + ) + + # ROI at 0 implementation cost + roi = (_monetary_value_of_incremental_health - _incremental_input_cost).div(abs(_incremental_input_cost)) + roi_summary = summarize_cost_data(roi, _metric = _metric) + + roi_df['ROI (assuming zero above service level costs)'] = ( + (roi_summary[_metric]).round(2).astype(str) + " [" + + (roi_summary['lower']).round(2).astype(str) + " - " + + (roi_summary['upper']).round(2).astype(str) + "]" + ) + + # ROI at non-zero implementation cost + roi_non_zero_implementation_cost = (_monetary_value_of_incremental_health - _incremental_input_cost * (1+_non_zero_implementation_cost_proportion)).div(abs(_incremental_input_cost * (1+_non_zero_implementation_cost_proportion))) + roi_non_zero_implementation_cost_summary = summarize_cost_data(roi_non_zero_implementation_cost, _metric = _metric) + + roi_df['ROI (assuming non-zero above service level costs)'] = ( + (roi_non_zero_implementation_cost_summary[_metric]).round(2).astype(str) + " [" + + (roi_non_zero_implementation_cost_summary['lower']).round(2).astype(str) + " - " + + (roi_non_zero_implementation_cost_summary['upper']).round(2).astype(str) + "]" + ) + + return roi_df.reset_index() diff --git a/src/scripts/costing/costing_overview_analysis.py b/src/scripts/costing/costing_overview_analysis.py new file mode 100644 index 0000000000..9a483980a0 --- /dev/null +++ b/src/scripts/costing/costing_overview_analysis.py @@ -0,0 +1,347 @@ +"""Produce outputs for cost overview paper. +The draft version of the paper uses outputs from scenario_impact_of_healthsystem.py, used to model HSS scenarios for +FCDO and Global Fund. + +with reduced consumables logging +/Users/tmangal/PycharmProjects/TLOmodel/outputs/t.mangal@imperial.ac.uk/hss_elements-2024-11-12T172311Z +""" + +from pathlib import Path +from tlo import Date + +import datetime +import os +import textwrap + +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np +import pandas as pd +from itertools import cycle +import jinja2 # This is for the latex table + +from tlo.analysis.utils import ( + extract_params, + get_scenario_info, + get_scenario_outputs, + load_pickled_dataframes, +) + +from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios, + do_stacked_bar_plot_of_cost_by_category, + do_line_plot_of_cost, + create_summary_treemap_by_cost_subgroup) + +# Define a timestamp for script outputs +timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M") + +# Print the start time of the script +print('Script Start', datetime.datetime.now().strftime('%H:%M')) + +# Create folders to store results +resourcefilepath = Path("./resources") +outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk') +figurespath = Path('./outputs/costing/overview/') +if not os.path.exists(figurespath): + os.makedirs(figurespath) + +# Load result files +# ------------------------------------------------------------------------------------------------------------------ +results_folder = get_scenario_outputs('htm_and_hss_runs-2025-01-16T135243Z.py', outputfilepath)[0] # January 2025 runs + +# Check can read results from draw=0, run=0 +log = load_pickled_dataframes(results_folder, 0, 0) # look at one log (so can decide what to extract) +params = extract_params(results_folder) +info = get_scenario_info(results_folder) + +# Declare default parameters for cost analysis +# ------------------------------------------------------------------------------------------------------------------ +# Period relevant for costing +TARGET_PERIOD = (Date(2023, 1, 1), Date(2030, 12, 31)) # This is the period that is costed +relevant_period_for_costing = [i.year for i in TARGET_PERIOD] +list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1)) +list_of_years_for_plot = list(range(2023, 2031)) +number_of_years_costed = relevant_period_for_costing[1] - 2023 + 1 + +# Scenarios +cost_scenarios = {0: "Actual", 3: "Expanded HRH", 5: "Improved consumable availability", + 8: "Expanded HRH + Improved consumable availability"} + +# Costing parameters +discount_rate = 0.03 +discount_rate_lomas = {2023: 0.0036, 2024: 0.0040, 2025: 0.0039, 2026: 0.0042, 2027: 0.0042, 2028: 0.0041, + 2029: 0.0041, 2030: 0.0040}# get the list of discount rates from 2023 until 2030 + +# Estimate standard input costs of scenario +# ----------------------------------------------------------------------------------------------------------------------- +# Standard 3% discount rate +input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = [0, 3, 5, 8], + _years=list_of_relevant_years_for_costing, cost_only_used_staff=True, + _discount_rate = discount_rate, summarize = True) + +# Undiscounted costs +input_costs_undiscounted = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = [0, 3, 5, 8], + _years=list_of_relevant_years_for_costing, cost_only_used_staff=True, + _discount_rate = 0, summarize = True) + +# Cost with variable discount rate based on Lomas et al (2021) +input_costs_variable_discounting = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = [0, 3, 5, 8], + _years=list_of_relevant_years_for_costing, cost_only_used_staff=True, + _discount_rate = discount_rate_lomas, summarize = True) + +# Get overall estimates for main text +# ----------------------------------------------------------------------------------------------------------------------- +cost_by_draw = input_costs.groupby(['draw', 'stat'])['cost'].sum() +undiscounted_cost_by_draw = input_costs_undiscounted.groupby(['draw', 'stat'])['cost'].sum() + +# Abstract +consumable_cost_by_draw = input_costs[(input_costs.cost_category == 'medical consumables') & (input_costs.stat == 'mean')].groupby(['draw'])['cost'].sum() +print(f"Under current system capacity, total healthcare delivery costs for 2023–2030 were estimated at \$" + f"{cost_by_draw[0,'mean']/1e9:,.2f} billion [95\% confidence interval (CI), \${cost_by_draw[0,'lower']/1e9:,.2f}b - \${cost_by_draw[0,'upper']/1e9:,.2f}b], averaging \$" + f"{undiscounted_cost_by_draw[0,'mean']/1e6/number_of_years_costed:,.2f} million [\${undiscounted_cost_by_draw[0,'lower']/1e6/number_of_years_costed:,.2f}m - \${undiscounted_cost_by_draw[0,'upper']/1e6/number_of_years_costed:,.2f}m] annually." + f" Scenario analysis revealed the importance of health system interdependencies: improving consumable availability alone led to a modest " + f"{(consumable_cost_by_draw[5]/consumable_cost_by_draw[0] - 1) * 100:.2f}\%" + f" increase in consumables cost due to constraints in the health workforce. In contrast, expanding human resources for health (HRH) increased consumables costs by " + f"{(consumable_cost_by_draw[3]/consumable_cost_by_draw[0] - 1) * 100:.2f}\%" + f", while jointly expanding HRH and consumable availability raised consumables costs by " + f"{(consumable_cost_by_draw[8]/consumable_cost_by_draw[0] - 1) * 100:.2f}\%, " + f"illustrating how bottlenecks in one component limit the effect of changes in another.") +# Results 1 +print(f"The total cost of healthcare delivery in Malawi between 2023 and 2030 was estimated to be " + f"\${cost_by_draw[0,'mean']/1e9:,.2f} billion [95\% confidence interval (CI), \${cost_by_draw[0,'lower']/1e9:,.2f}b - \${cost_by_draw[0,'upper']/1e9:,.2f}b], under the actual scenario, and increased to " + f"\${cost_by_draw[5,'mean']/1e9:,.2f} billion [\${cost_by_draw[5,'lower']/1e9:,.2f}b - \${cost_by_draw[5,'upper']/1e9:,.2f}b] under the improved consumable availability scenario, " + f"followed by \${cost_by_draw[3,'mean']/1e9:,.2f} billion [\${cost_by_draw[3,'lower']/1e9:,.2f}b - \${cost_by_draw[3,'upper']/1e9:,.2f}b] under the expanded HRH scenario and finally " + f"\${cost_by_draw[8,'mean']/1e9:,.2f} billion [\${cost_by_draw[8,'lower']/1e9:,.2f}b - \${cost_by_draw[8,'upper']/1e9:,.2f}b] under the expanded HRH + improved consumable availability scenario.") +# Results 2 +print(f"This translates to an average annual cost of " + f"\${undiscounted_cost_by_draw[0,'mean']/1e6/number_of_years_costed:,.2f} million [\${undiscounted_cost_by_draw[0,'lower']/1e6/number_of_years_costed:,.2f}m - \${undiscounted_cost_by_draw[0,'upper']/1e6/number_of_years_costed:,.2f}m], under the actual scenario, " + f"\${undiscounted_cost_by_draw[5,'mean']/1e6/number_of_years_costed:,.2f} million [\${undiscounted_cost_by_draw[5,'lower']/1e6/number_of_years_costed:,.2f}m - \${undiscounted_cost_by_draw[5,'upper']/1e6/number_of_years_costed:,.2f}m] under the improved consumable availability scenario, followed by " + f"\${undiscounted_cost_by_draw[3,'mean']/1e6/number_of_years_costed:,.2f} million [\${undiscounted_cost_by_draw[3,'lower']/1e6/number_of_years_costed:,.2f}m - \${undiscounted_cost_by_draw[3,'upper']/1e6/number_of_years_costed:,.2f}m] under the expanded HRH scenario and finally " + f"\${undiscounted_cost_by_draw[8,'mean']/1e6/number_of_years_costed:,.2f} million [\${undiscounted_cost_by_draw[8,'lower']/1e6/number_of_years_costed:,.2f}m - \${undiscounted_cost_by_draw[8,'upper']/1e6/number_of_years_costed:,.2f}m] under the expanded HRH + improved consumable availability scenario.") +# Results 3 +print(f"Notably, improving consumable availability alone increases the cost of medical consumables by just " + f"{(consumable_cost_by_draw[5]/consumable_cost_by_draw[0] - 1) * 100:.2f}\% " + f"because the limited health workforce (HRH) restricts the number of feasible appointments and, consequently, the quantity of consumables dispensed. " + f"In contrast, expanding HRH alone raises consumable costs by " + f"{(consumable_cost_by_draw[3]/consumable_cost_by_draw[0] - 1) * 100:.2f}\%" + f". When both HRH and consumable availability are expanded together, consumable costs increase by " + f"{(consumable_cost_by_draw[8]/consumable_cost_by_draw[0] - 1) * 100:.2f}\% " + f"compared to the actual scenario.") +# Results 4 +cost_of_hiv_testing = input_costs[(input_costs.cost_subgroup == 'Test, HIV EIA Elisa') & (input_costs.stat == 'mean')].groupby(['draw'])['cost'].sum() +print(f"For instance, the cost of HIV testing consumables increases by {(cost_of_hiv_testing[3]/cost_of_hiv_testing[0] - 1)*100:.2f}\% under the expanded HRH scenario and by " + f"{(cost_of_hiv_testing[8]/cost_of_hiv_testing[0] - 1)*100:.2f}\% under the combined expanded HRH and improved consumable availability scenario, " + f"while showing almost no change under the scenario with improved consumable availability alone") + +# Get figures for overview paper +# ----------------------------------------------------------------------------------------------------------------------- +# Figure 2: Estimated costs by cost category +do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'all', _disaggregate_by_subgroup = False, + _year = list_of_relevant_years_for_costing,show_title = False, + _outputfilepath = figurespath, _scenario_dict = cost_scenarios) + +revised_consumable_subcategories = {'cost_of_separately_managed_medical_supplies_dispensed':'cost_of_consumables_dispensed', 'cost_of_excess_separately_managed_medical_supplies_stocked': 'cost_of_excess_consumables_stocked', 'supply_chain':'supply_chain'} +input_costs_new = input_costs.copy() +input_costs_new['cost_subcategory'] = input_costs_new['cost_subcategory'].map(revised_consumable_subcategories).fillna(input_costs_new['cost_subcategory']) + +# Figure 3: Estimated costs by cost sub-category +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_new, _cost_category = 'medical consumables', _disaggregate_by_subgroup = False, + _year = list_of_years_for_plot, show_title = False, + _outputfilepath = figurespath, _scenario_dict = cost_scenarios) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'human resources for health', _disaggregate_by_subgroup = False, + _year = list_of_years_for_plot, show_title = False, + _outputfilepath = figurespath, _scenario_dict = cost_scenarios) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical equipment', _disaggregate_by_subgroup = False, + _year = list_of_years_for_plot, show_title = False, + _outputfilepath = figurespath, _scenario_dict = cost_scenarios) +do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'facility operating cost', _disaggregate_by_subgroup = False, + _year = list_of_years_for_plot, show_title = False, + _outputfilepath = figurespath, _scenario_dict = cost_scenarios) + + +# Figure 4: Estimated costs by year +do_line_plot_of_cost(_df = input_costs_undiscounted, _cost_category='all', + _year=list_of_years_for_plot, _draws= [0], + disaggregate_by= 'cost_category', + _y_lim = 400, + show_title = False, + _outputfilepath = figurespath) +do_line_plot_of_cost(_df = input_costs_undiscounted, _cost_category='all', + _year=list_of_years_for_plot, _draws= [3], + disaggregate_by= 'cost_category', + _y_lim = 400, + show_title = False, + _outputfilepath = figurespath) +do_line_plot_of_cost(_df = input_costs_undiscounted, _cost_category='all', + _year=list_of_years_for_plot, _draws= [5], + disaggregate_by= 'cost_category', + _y_lim = 400, + show_title = False, + _outputfilepath = figurespath) +do_line_plot_of_cost(_df = input_costs_undiscounted, _cost_category='all', + _year=list_of_years_for_plot, _draws= [8], + disaggregate_by= 'cost_category', + _y_lim = 400, + show_title = False, + _outputfilepath = figurespath) + +# Figure D1: Total cost by scenario assuming 0% discount rate +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_undiscounted, + _cost_category = 'all', + _year=list_of_years_for_plot, + _disaggregate_by_subgroup = False, + _outputfilepath = figurespath, + _scenario_dict = cost_scenarios, + _add_figname_suffix = '_UNDISCOUNTED') + +# Figure D2: Total cost by scenario assuming variable discount rates +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_variable_discounting, + _cost_category = 'all', + _year=list_of_years_for_plot, + _disaggregate_by_subgroup = False, + _outputfilepath = figurespath, + _scenario_dict = cost_scenarios, + _add_figname_suffix = '_VARIABLE_DISCOUNTING') + + +# Figure F1-F4: Cost by cost sub-group +cost_categories = ['human resources for health', 'medical consumables', + 'medical equipment', 'facility operating cost'] +draws = input_costs.draw.unique().tolist() +colourmap_for_consumables = {'First-line ART regimen: adult':'#1f77b4', + 'Test, HIV EIA Elisa': '#ff7f0e', + 'VL Test': '#2ca02c', + 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly': '#d62728', + 'Oxygen, 1000 liters, primarily with oxygen cylinders': '#9467bd', + 'Phenobarbital, 100 mg': '#8c564b', + 'Rotavirus vaccine': '#e377c2', + 'Carbamazepine 200mg_1000_CMST': '#7f7f7f', + 'Infant resuscitator, clear plastic + mask + bag_each_CMST': '#bcbd22', + 'Dietary supplements (country-specific)': '#17becf', + 'Tenofovir (TDF)/Emtricitabine (FTC), tablet, 300/200 mg': '#2b8cbe', + 'Pneumococcal vaccine': '#fdae61', + 'Pentavalent vaccine (DPT, Hep B, Hib)': '#d73027', + 'male circumcision kit, consumables (10 procedures)_1_IDA': '#756bb1', + 'Jadelle (implant), box of 2_CMST': '#ffdd44', + 'Urine analysis': '#66c2a5'} + +for _cat in cost_categories: + for _d in draws: + if _cat == 'medical consumables': + create_summary_treemap_by_cost_subgroup(_df = input_costs, _year = list_of_years_for_plot, + _cost_category = _cat, _draw = _d, _color_map=colourmap_for_consumables, + show_title= False, _label_fontsize= 8, _outputfilepath=figurespath) + else: + create_summary_treemap_by_cost_subgroup(_df=input_costs, _year=list_of_years_for_plot, + _cost_category=_cat, _draw=_d, show_title= False, + _label_fontsize= 8.5, _outputfilepath=figurespath) + + +# Get tables for overview paper +# ----------------------------------------------------------------------------------------------------------------------- +# Group data and aggregate cost for each draw and stat +def generate_detail_cost_table(_groupby_var, _groupby_var_name, _longtable = False): + edited_input_costs = input_costs.copy() + edited_input_costs[_groupby_var] = edited_input_costs[_groupby_var].replace('_', ' ', regex=True) + edited_input_costs[_groupby_var] = edited_input_costs[_groupby_var].replace('%', '\%', regex=True) + edited_input_costs[_groupby_var] = edited_input_costs[_groupby_var].replace('&', '\&', regex=True) + + grouped_costs = edited_input_costs.groupby(['cost_category', _groupby_var, 'draw', 'stat'])['cost'].sum() + # Format the 'cost' values before creating the LaTeX table + grouped_costs = grouped_costs.apply(lambda x: f"{float(x):,.0f}") + # Remove underscores from all column values + + # Create a pivot table to restructure the data for LaTeX output + pivot_data = {} + for draw in [0, 3, 5, 8]: + draw_data = grouped_costs.xs(draw, level='draw').unstack(fill_value=0) # Unstack to get 'stat' as columns + # Concatenate 'mean' with 'lower-upper' in the required format + pivot_data[draw] = draw_data['mean'].astype(str) + ' [' + \ + draw_data['lower'].astype(str) + '-' + \ + draw_data['upper'].astype(str) + ']' + + # Combine draw data into a single DataFrame + table_data = pd.concat([pivot_data[0], pivot_data[3], pivot_data[5], pivot_data[8]], axis=1, keys=['draw=0', 'draw=3', 'draw=5', 'draw=8']).reset_index() + + # Rename columns for clarity + table_data.columns = ['Cost Category', _groupby_var_name, 'Actual', 'Expanded HRH', 'Improved consumable availability', 'Expanded HRH +\n Improved consumable availability'] + + # Replace '\n' with '\\' for LaTeX line breaks + #table_data['Real World'] = table_data['Real World'].apply(lambda x: x.replace("\n", "\\\\")) + #table_data['Perfect Health System'] = table_data['Perfect Health System'].apply(lambda x: x.replace("\n", "\\\\")) + + # Convert to LaTeX format with horizontal lines after every row + latex_table = table_data.to_latex( + longtable=_longtable, # Use the longtable environment for large tables + column_format='|R{3cm}|R{3cm}|R{2.2cm}|R{2.2cm}|R{2.2cm}|R{2.2cm}|', + caption=f"Summarized Costs by Category and {_groupby_var_name}", + label=f"tab:cost_by_{_groupby_var}", + position="h", + index=False, + escape=False, # Prevent escaping special characters like \n + header=True + ) + + # Add \hline after the header and after every row for horizontal lines + latex_table = latex_table.replace("\\\\", "\\\\ \\hline") # Add \hline after each row + #latex_table = latex_table.replace("_", " ") # Add \hline after each row + + # Specify the file path to save + latex_file_path = figurespath / f'cost_by_{_groupby_var}.tex' + + # Write to a file + with open(latex_file_path, 'w') as latex_file: + latex_file.write(latex_table) + + # Print latex for reference + print(latex_table) + +# Table F1: Cost by cost subcategory +generate_detail_cost_table(_groupby_var = 'cost_subcategory', _groupby_var_name = 'Cost Subcategory', _longtable = True) +# Table F2: Cost by cost subgroup +generate_detail_cost_table(_groupby_var = 'cost_subgroup', _groupby_var_name = 'Category Subgroup', _longtable = True) + +# Figure E1: Consumable inflow to outflow ratio figure +# ----------------------------------------------------------------------------------------------------------------------- +inflow_to_outflow_ratio = pd.read_csv(resourcefilepath / "costing/ResourceFile_Consumables_Inflow_Outflow_Ratio.csv") + +# Clean category names for plot +clean_category_names = {'cancer': 'Cancer', 'cardiometabolicdisorders': 'Cardiometabolic Disorders', + 'contraception': 'Contraception', 'general': 'General', 'hiv': 'HIV', 'malaria': 'Malaria', + 'ncds': 'Non-communicable Diseases', 'neonatal_health': 'Neonatal Health', + 'other_childhood_illnesses': 'Other Childhood Illnesses', 'reproductive_health': 'Reproductive Health', + 'road_traffic_injuries': 'Road Traffic Injuries', 'tb': 'Tuberculosis', + 'undernutrition': 'Undernutrition'} +inflow_to_outflow_ratio['category'] = inflow_to_outflow_ratio['item_category'].map(clean_category_names) + + +def plot_inflow_to_outflow_ratio(_df, groupby_var, _outputfilepath): + # Plot the bar plot with gray bars + plt.figure(figsize=(10, 6)) + sns.barplot(data=_df, x=groupby_var, y='inflow_to_outflow_ratio', errorbar=None, color="gray") + + # Add points representing the distribution of individual values + sns.stripplot(data=_df, x=groupby_var, y='inflow_to_outflow_ratio', color='black', size=5, alpha=0.2) + + # Wrap x-axis labels ONLY if they are strings and longer than 15 characters + labels = [] + for label in _df[groupby_var].unique(): + if isinstance(label, str) and len(label) > 15: + labels.append(textwrap.fill(label, width=15)) + else: + labels.append(label) + plt.xticks(ticks=range(len(labels)), labels=labels, rotation=90, ha='center') + + # Set labels and title + plt.xlabel(groupby_var) + plt.ylabel('Inflow to Outflow Ratio') + + # Show and save plot + plt.tight_layout() + plt.savefig(_outputfilepath / f'inflow_to_outflow_ratio_by_{groupby_var}.png') + plt.close() + +plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'fac_type_tlo', _outputfilepath = figurespath) +plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'district', _outputfilepath = figurespath) +plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'item_code', _outputfilepath = figurespath) +plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'category', _outputfilepath = figurespath) diff --git a/src/scripts/costing/costing_validation.py b/src/scripts/costing/costing_validation.py new file mode 100644 index 0000000000..9f8d8614fe --- /dev/null +++ b/src/scripts/costing/costing_validation.py @@ -0,0 +1,618 @@ +from pathlib import Path +from collections import defaultdict + +import datetime +import os +import textwrap + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from tlo.analysis.utils import ( + extract_results, + get_scenario_outputs, +) +from tlo.methods.healthsystem import get_item_code_from_item_name +from scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios, + do_stacked_bar_plot_of_cost_by_category) + +# Define a timestamp for script outputs +timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M") + +# Print the start time of the script +print('Script Start', datetime.datetime.now().strftime('%H:%M')) + +# Establish common paths +resourcefilepath = Path("./resources") + +# Steps: 1. Create a mapping of data labels in model_costing and relevant calibration data, 2. Create a dataframe with model_costs and calibration costs; +# Load costing resourcefile +workbook_cost = pd.read_excel((resourcefilepath / "costing/ResourceFile_Costing.xlsx"), + sheet_name=None) +# Prepare data for calibration +calibration_data = workbook_cost["resource_mapping_r7_summary"] +# Make sure values are numeric +budget_columns = ['BUDGETS (USD) (Jul 2019 - Jun 2020)', 'BUDGETS (USD) (Jul 2020 - Jun 2021)', + 'BUDGETS (USD) (Jul 2021 - Jun 2022)'] +expenditure_columns = ['EXPENDITURE (USD) (Jul 2018 - Jun 2019)'] +calibration_data[budget_columns + expenditure_columns] = calibration_data[budget_columns + expenditure_columns].apply(lambda x: pd.to_numeric(x, errors='coerce')) +# For calibration to budget figures, we take the maximum value across the three years in the RM to provide an +# the maximum of the budget between 2020 and 2022 provides the upper limit to calibrate to (expenditure providing the lower limit) +calibration_data['max_annual_budget_2020-22'] = calibration_data[budget_columns].max(axis=1, skipna = True) +calibration_data = calibration_data.rename(columns = {'EXPENDITURE (USD) (Jul 2018 - Jun 2019)': 'actual_expenditure_2019', + 'Calibration_category': 'calibration_category'}) +calibration_data = calibration_data[['calibration_category','actual_expenditure_2019', 'max_annual_budget_2020-22']] +calibration_data = calibration_data.groupby('calibration_category')[['actual_expenditure_2019', 'max_annual_budget_2020-22']].sum().reset_index() +# Repeat this dataframe three times to map to the lower, upper and mean stats in the cost data +calibration_data1 = calibration_data.copy() +calibration_data1['stat'] = 'lower' +calibration_data2 = calibration_data.copy() +calibration_data2['stat'] = 'mean' +calibration_data3 = calibration_data.copy() +calibration_data3['stat'] = 'upper' +calibration_data = pd.concat([calibration_data1, calibration_data2, calibration_data3], axis = 0) +calibration_data = calibration_data.set_index(['calibration_category', 'stat']) + +# %% +# Estimate cost for validation +#----------------------------- +# Load result files +resourcefilepath = Path("./resources") +outputfilepath = Path('./outputs/t.mangal@imperial.ac.uk') +#results_folder = get_scenario_outputs('hss_elements-2024-11-12T172311Z.py', outputfilepath)[0] # November 2024 runs +results_folder = get_scenario_outputs('htm_and_hss_runs-2025-01-16T135243Z.py', outputfilepath)[0] # January 2025 runs + +# Estimate costs for 2018 +input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _years = [2018], _draws = [0], summarize = True, cost_only_used_staff=False) +#input_costs = input_costs[input_costs.year == 2018] + +# Manually create a dataframe of model costs and relevant calibration values +def assign_item_codes_to_consumables(_df): + path_for_consumable_resourcefiles = resourcefilepath / "healthsystem/consumables" + # Retain only consumable costs + _df = _df[_df['cost_category'] == 'medical consumables'] + + ''' + consumables_dict = pd.read_csv(path_for_consumable_resourcefiles / 'ResourceFile_consumables_matched.csv', low_memory=False, + encoding="ISO-8859-1")[['item_code', 'consumable_name_tlo']] + consumables_dict = consumables_dict.rename(columns = {'item_code': 'Item_Code'}) + consumables_dict = dict(zip(consumables_dict['consumable_name_tlo'], consumables_dict['Item_Code'])) + ''' + + # Create dictionary mapping item_codes to consumables names + consumables_df = workbook_cost["consumables"] + consumables_df = consumables_df.rename(columns=consumables_df.iloc[0]) + consumables_df = consumables_df[['Item_Code', 'Consumable_name_tlo']].reset_index( + drop=True).iloc[1:] + consumables_df = consumables_df[consumables_df['Item_Code'].notna()] + consumables_dict = dict(zip(consumables_df['Consumable_name_tlo'], consumables_df['Item_Code'])) + + # Replace consumable_name_tlo with item_code + _df = _df.copy() + _df['cost_subgroup'] = _df['cost_subgroup'].map(consumables_dict) + + return _df + +def get_calibration_relevant_subset_of_costs(_df, _col, _col_value, _calibration_category): + if (len(_col_value) == 1): + _df = _df[_df[_col] == _col_value[0]] + else: + _df = _df[_df[_col].isin(_col_value)] + _df['calibration_category'] = _calibration_category + return _df.groupby(['calibration_category' ,'stat'])['cost'].sum() + +# Consumables +#----------------------------------------------------------------------------------------------------------------------- +calibration_data['model_cost'] = np.nan +consumables_costs_by_item_code = assign_item_codes_to_consumables(input_costs) +consumable_list = pd.read_csv(resourcefilepath / 'healthsystem' / 'consumables' / 'ResourceFile_Consumables_Items_and_Packages.csv') +def get_item_code(item): + return get_item_code_from_item_name(consumable_list, item) + +# Malaria consumables +irs = [get_item_code('Indoor residual spraying drugs/supplies to service a client')] +bednets = [get_item_code('Insecticide-treated net')] +antimalarials = [get_item_code('Lumefantrine 120mg/Artemether 20mg, 30x18_540_CMST'), + get_item_code('Injectable artesunate'), + get_item_code('Fansidar (sulphadoxine / pyrimethamine tab)')] +malaria_rdts = [get_item_code('Malaria test kit (RDT)')] + +# HIV consumables +hiv_screening = [get_item_code('Test, HIV EIA Elisa'), get_item_code('VL Test'), get_item_code('CD4 test')] + +art = [get_item_code("First-line ART regimen: adult"), get_item_code("Cotrimoxizole, 960mg pppy"), # adult + get_item_code("First line ART regimen: older child"), get_item_code("Cotrimoxazole 120mg_1000_CMST"), # Older children + get_item_code("First line ART regimen: young child"), # younger children (also get cotrimoxazole 120mg + get_item_code('Sulfamethoxazole + trimethropin, tablet 400 mg + 80 mg'), + get_item_code("Tenofovir (TDF)/Emtricitabine (FTC), tablet, 300/200 mg"), # Adult prep + get_item_code("Nevirapine, oral solution, 10 mg/ml")] # infant prep + +circumcision = [get_item_code('male circumcision kit, consumables (10 procedures)_1_IDA')] + +# Tuberculosis consumables +tb_tests = [get_item_code("ZN Stain"), get_item_code("Sputum container"), get_item_code("Microscope slides, lime-soda-glass, pack of 50"), + get_item_code("Xpert"), get_item_code("Lead rubber x-ray protective aprons up to 150kVp 0.50mm_each_CMST"), + get_item_code("X-ray"), get_item_code("MGIT960 Culture and DST"), + get_item_code("Solid culture and DST")] +# consider removing X-ray +tb_treatment = [get_item_code("Cat. I & III Patient Kit A"), # adult primary + get_item_code("Cat. I & III Patient Kit B"), # child primary + get_item_code("Cat. II Patient Kit A1"), # adult secondary + get_item_code("Cat. II Patient Kit A2"), # child secondary + get_item_code("Treatment: second-line drugs"), # MDR + get_item_code("Isoniazid/Pyridoxine, tablet 300 mg"), # IPT + get_item_code("Isoniazid/Rifapentine")] # 3 HP +# Family planning consumables +other_family_planning = [get_item_code("Levonorgestrel 0.15 mg + Ethinyl estradiol 30 mcg (Microgynon), cycle"), # pill + get_item_code("IUD, Copper T-380A"), # IUD + get_item_code("Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly"), # injection + get_item_code("Jadelle (implant), box of 2_CMST"), # implant + get_item_code('Implanon (Etonogestrel 68 mg)'), # implant - not currently in use in the model + get_item_code("Atropine sulphate 600 micrograms/ml, 1ml_each_CMST")] # female sterilization +condoms = [get_item_code("Condom, male"), + get_item_code("Female Condom_Each_CMST")] +# Undernutrition +undernutrition = [get_item_code('Supplementary spread, sachet 92g/CAR-150'), + get_item_code('Complementary feeding--education only drugs/supplies to service a client'), + get_item_code('SAM theraputic foods'), + get_item_code('SAM medicines'), + get_item_code('Therapeutic spread, sachet 92g/CAR-150'), + get_item_code('F-100 therapeutic diet, sach., 114g/CAR-90')] +# Cervical cancer +cervical_cancer = [get_item_code('Specimen container'), + get_item_code('Biopsy needle'), + get_item_code('Cyclophosphamide, 1 g')] +# Vaccines +vaccines = [get_item_code("Syringe, autodisposable, BCG, 0.1 ml, with needle"), + get_item_code("Polio vaccine"), + get_item_code("Pentavalent vaccine (DPT, Hep B, Hib)"), + get_item_code("Rotavirus vaccine"), + get_item_code("Measles vaccine"), + get_item_code("Pneumococcal vaccine"), + get_item_code("HPV vaccine"), + get_item_code("Tetanus toxoid, injection")] # not sure if this should be included + +other_drugs = set(consumables_costs_by_item_code['cost_subgroup'].unique()) - set(irs) - set(bednets) - set(undernutrition) - set(other_family_planning) - set(vaccines) \ + - set(art) - set(tb_treatment) - set(antimalarials) - set(malaria_rdts) - set(hiv_screening)\ + - set(condoms) - set(tb_tests) # - set(cervical_cancer) + +# Note that the main ARV regimen in 2018 was tenofovir/lamivudine/efavirenz as opposed to Tenofovir/Lamivudine/Dolutegravir as used in the RF_Costing. The price of this +# was $82 per year (80/(0.103*365)) times what's estimated by the model so let's update this +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = art, _calibration_category = 'Antiretrovirals')* 82/(0.103*365)) +# Other consumables costs do not need to be adjusted +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = irs, _calibration_category = 'Indoor Residual Spray')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = bednets, _calibration_category = 'Bednets')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = undernutrition, _calibration_category = 'Undernutrition commodities')) +#calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = cervical_cancer, _calibration_category = 'Cervical Cancer')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = other_family_planning, _calibration_category = 'Other family planning commodities')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = vaccines, _calibration_category = 'Vaccines')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = tb_treatment, _calibration_category = 'TB Treatment')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = antimalarials, _calibration_category = 'Antimalarials')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = malaria_rdts, _calibration_category = 'Malaria RDTs')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = hiv_screening, _calibration_category = 'HIV Screening/Diagnostic Tests')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = condoms, _calibration_category = 'Condoms and Lubricants')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = tb_tests, _calibration_category = 'TB Tests (including RDTs)')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = other_drugs, _calibration_category = 'Other Drugs, medical supplies, and commodities')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = consumables_costs_by_item_code, _col = 'cost_subgroup', _col_value = circumcision, _calibration_category = 'Voluntary Male Medical Circumcision')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subcategory', _col_value = ['supply_chain'], _calibration_category = 'Supply Chain')) + +# HR +#----------------------------------------------------------------------------------------------------------------------- +hr_costs = input_costs[input_costs['cost_category'] == 'human resources for health'] +#ratio_of_all_to_used_staff = total_salary_for_all_staff[(0,2018)]/total_salary_for_staff_used_in_scenario[( 0, 'lower')][2018] +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = hr_costs, _col = 'cost_subcategory', _col_value = ['salary_for_all_staff'], _calibration_category = 'Health Worker Salaries')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = hr_costs, _col = 'cost_subcategory', _col_value = ['preservice_training_and_recruitment_cost_for_attrited_workers'], _calibration_category = 'Health Worker Training - Pre-Service')) # TODO remove recruitment costs? +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = hr_costs, _col = 'cost_subcategory', _col_value = ['inservice_training_cost_for_all_staff'], _calibration_category = 'Health Worker Training - In-Service')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = hr_costs, _col = 'cost_subcategory', _col_value = ['mentorship_and_supportive_cost_for_all_staff'], _calibration_category = 'Mentorships & Supportive Supervision')) + +# Equipment +equipment_costs = input_costs[input_costs['cost_category'] == 'medical equipment'] +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = equipment_costs, _col = 'cost_subcategory', _col_value = ['replacement_cost_annual_total'], _calibration_category = 'Medical Equipment - Purchase')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = equipment_costs, _col = 'cost_subcategory', + _col_value = ['service_fee_annual_total', 'spare_parts_annual_total','major_corrective_maintenance_cost_annual_total'], + _calibration_category = 'Medical Equipment - Maintenance')) +#calibration_data[calibration_data['calibration_category'] == 'Vehicles - Purchase and Maintenance'] = get_calibration_relevant_subset() +#calibration_data[calibration_data['calibration_category'] == 'Vehicles - Purchase and Maintenance'] = get_calibration_relevant_subset() + +# Facility operation costs +#----------------------------------------------------------------------------------------------------------------------- +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subgroup', _col_value = ['Electricity', 'Water', 'Cleaning', 'Security', 'Food for inpatient cases', 'Facility management'], _calibration_category = 'Facility utility bills')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subgroup', _col_value = ['Building maintenance'], _calibration_category = 'Infrastructure - Rehabilitation')) +calibration_data['model_cost'] = calibration_data['model_cost'].fillna(get_calibration_relevant_subset_of_costs(_df = input_costs, _col = 'cost_subgroup', _col_value = ['Vehicle maintenance', 'Ambulance fuel'], _calibration_category = 'Vehicles - Fuel and Maintenance')) + +# %% +# 3. Create calibration plot +list_of_consumables_costs_for_calibration_only_hiv = ['Voluntary Male Medical Circumcision', 'HIV Screening/Diagnostic Tests', 'Antiretrovirals'] +list_of_consumables_costs_for_calibration_without_hiv =['Indoor Residual Spray', 'Bednets', 'Malaria RDTs', 'Antimalarials', 'TB Tests (including RDTs)', 'TB Treatment', 'Vaccines', + 'Condoms and Lubricants', 'Other family planning commodities', + 'Undernutrition commodities', 'Other Drugs, medical supplies, and commodities'] +list_of_hr_costs_for_calibration = ['Health Worker Salaries', 'Health Worker Training - In-Service', 'Health Worker Training - Pre-Service', 'Mentorships & Supportive Supervision'] +list_of_equipment_costs_for_calibration = ['Medical Equipment - Purchase', 'Medical Equipment - Maintenance'] +list_of_operating_costs_for_calibration = ['Facility utility bills', 'Infrastructure - Rehabilitation', 'Vehicles - Maintenance','Vehicles - Fuel and Maintenance'] + +# Create folders to store results +costing_outputs_folder = Path('./outputs/costing') +if not os.path.exists(costing_outputs_folder): + os.makedirs(costing_outputs_folder) +figurespath = costing_outputs_folder / "figures_post_jan2025fix" +if not os.path.exists(figurespath): + os.makedirs(figurespath) +calibration_outputs_folder = Path(figurespath / 'calibration') +if not os.path.exists(calibration_outputs_folder): + os.makedirs(calibration_outputs_folder) + +def do_cost_calibration_plot(_df, _costs_included, _xtick_fontsize = 10): + # Filter the dataframe + _df = _df[(_df.model_cost.notna()) & (_df.index.get_level_values(0).isin(_costs_included))] + + # Reorder the first level of the index based on _costs_included while keeping the second level intact + _df.index = pd.MultiIndex.from_arrays([ + pd.CategoricalIndex(_df.index.get_level_values(0), categories=_costs_included, ordered=True), + _df.index.get_level_values(1) + ]) + _df = _df.sort_index() # Apply the custom order by sorting the DataFrame + + # For df_mean + df_mean = _df.loc[_df.index.get_level_values('stat') == 'mean'].reset_index(level='stat', drop=True)/1e6 + total_mean = pd.DataFrame(df_mean.sum()).T # Calculate the total and convert it to a DataFrame + total_mean.index = ['Total'] # Name the index of the total row as 'Total' + df_mean = pd.concat([df_mean, total_mean], axis=0) # Concatenate the total row + + # For df_lower + df_lower = _df.loc[_df.index.get_level_values('stat') == 'lower'].reset_index(level='stat', drop=True)/1e6 + total_lower = pd.DataFrame(df_lower.sum()).T # Calculate the total and convert it to a DataFrame + total_lower.index = ['Total'] # Name the index of the total row as 'Total' + df_lower = pd.concat([df_lower, total_lower], axis=0) # Concatenate the total row + + # For df_upper + df_upper = _df.loc[_df.index.get_level_values('stat') == 'upper'].reset_index(level='stat', drop=True)/1e6 + total_upper = pd.DataFrame(df_upper.sum()).T # Calculate the total and convert it to a DataFrame + total_upper.index = ['Total'] # Name the index of the total row as 'Total' + df_upper = pd.concat([df_upper, total_upper], axis=0) # Concatenate the total row + + # Create the dot plot + plt.figure(figsize=(12, 8)) + + # Plot model_cost as dots with confidence interval error bars + yerr_lower = (df_mean['model_cost'] - df_lower['model_cost']).clip(lower = 0) + yerr_upper = (df_upper['model_cost'] - df_mean['model_cost']).clip(lower = 0) + plt.errorbar(df_mean.index, df_mean['model_cost'], + yerr=[yerr_lower, yerr_upper], + fmt='o', label='Model Cost', ecolor='gray', capsize=5, color='saddlebrown') + + # Plot annual_expenditure_2019 and max_annual_budget_2020-22 as dots + plt.plot(df_mean.index, df_mean['actual_expenditure_2019'], 'bo', label='Actual Expenditure 2019', markersize=8) + plt.plot(df_mean.index, df_mean['max_annual_budget_2020-22'], 'go', label='Max Annual Budget 2020-22', markersize=8) + + # Draw a blue line between annual_expenditure_2019 and max_annual_budget_2020-22 + plt.vlines(df_mean.index, df_mean['actual_expenditure_2019'], df_mean['max_annual_budget_2020-22'], color='blue', + label='Expenditure-Budget Range') + + # Add labels to the model_cost dots (yellow color, slightly shifted right) + for i, (x, y) in enumerate(zip(df_mean.index, df_mean['model_cost'])): + plt.text(i + 0.05, y, f'{y:.2f}', ha='left', va='bottom', fontsize=9, + color='saddlebrown') # label model_cost values + + # Add labels and title + cost_subcategory = [name for name in globals() if globals()[name] is _costs_included][0] + cost_subcategory = cost_subcategory.replace('list_of_', '').replace('_for_calibration', '') + plt.xlabel('Cost Sub-Category') + plt.ylabel('Costs (USD), millions') + plt.title(f'Model Cost vs Annual Expenditure 2019 and Max(Annual Budget 2020-22)\n {cost_subcategory}') + + # Set a white background and black border + plt.grid(False) + ax = plt.gca() # Get current axes + ax.set_facecolor('white') # Set the background color to white + for spine in ax.spines.values(): # Iterate over all borders (spines) + spine.set_edgecolor('black') # Set the border color to black + spine.set_linewidth(1.5) # Adjust the border width if desired + + # Customize x-axis labels for readability + max_label_length = 15 # Define a maximum label length for wrapping + wrapped_labels = [textwrap.fill(str(label), max_label_length) for label in df_mean.index] + plt.xticks(ticks=range(len(wrapped_labels)), labels=wrapped_labels, rotation=45, ha='right', fontsize=_xtick_fontsize) + + # Adding a legend + plt.legend(loc='upper left', bbox_to_anchor=(1, 1), fontsize=10) + + # Tight layout and save the figure + plt.tight_layout() + plt.savefig(calibration_outputs_folder / f'calibration_dot_plot_{cost_subcategory}.png', dpi=100, + bbox_inches='tight') + plt.close() + +# Call the function for each variable and cost list +all_consumable_costs = list_of_consumables_costs_for_calibration_only_hiv + list_of_consumables_costs_for_calibration_without_hiv + ['Supply Chain'] +all_calibration_costs = all_consumable_costs + list_of_hr_costs_for_calibration + list_of_equipment_costs_for_calibration + list_of_operating_costs_for_calibration + +do_cost_calibration_plot(calibration_data,list_of_consumables_costs_for_calibration_without_hiv) +do_cost_calibration_plot(calibration_data,list_of_consumables_costs_for_calibration_only_hiv) +do_cost_calibration_plot(calibration_data,all_consumable_costs) +do_cost_calibration_plot(calibration_data, list_of_hr_costs_for_calibration) +do_cost_calibration_plot(calibration_data, list_of_equipment_costs_for_calibration) +do_cost_calibration_plot(calibration_data, list_of_operating_costs_for_calibration) +do_cost_calibration_plot(calibration_data,all_calibration_costs, _xtick_fontsize = 7) + +# Extract calibration data table for manuscript appendix +calibration_data_extract = calibration_data[calibration_data.index.get_level_values(1) == 'mean'] +calibration_data_extract = calibration_data_extract.droplevel(level=1).reset_index() +# Create a higher level cost category in the calibration data +calibration_categories_dict = {'Other Drugs, medical supplies, and commodities': 'medical consumables', +'Program Management & Administration': 'Not represented in TLO model', +'Non-EHP consumables': 'Not represented in TLO model', +'Voluntary Male Medical Circumcision': 'medical consumables', +'Indoor Residual Spray': 'medical consumables', +'Bednets': 'medical consumables', +'Antimalarials': 'medical consumables', +'Undernutrition commodities': 'medical consumables', +'Cervical Cancer': 'medical consumables', +'Condoms and Lubricants': 'medical consumables', +'Other family planning commodities': 'medical consumables', +'TB Tests (including RDTs)': 'medical consumables', +'TB Treatment': 'medical consumables', +'Vaccines': 'medical consumables', +'Malaria RDTs': 'medical consumables', +'HIV Screening/Diagnostic Tests': 'medical consumables', +'Antiretrovirals': 'medical consumables', +'Health Worker Salaries': 'human resources for health', +'Health Worker Training - In-Service': 'human resources for health', +'Health Worker Training - Pre-Service': 'human resources for health', +'Mentorships & Supportive Supervision': 'human resources for health', +'Facility utility bills': 'facility operating cost', +'Infrastructure - New Builds': 'Not represented in TLO model', +'Infrastructure - Rehabilitation': 'facility operating cost', +'Infrastructure - Upgrades': 'Not represented in TLO model', +'Medical Equipment - Maintenance': 'medical equipment', +'Medical Equipment - Purchase': 'medical equipment', +'Vehicles - Fuel and Maintenance': 'facility operating cost', +'Vehicles - Purchase': 'Not represented in TLO model', +'Vehicles - Fuel and Maintenance (Beyond Government and CHAM)': 'Not represented in TLO model', +'Supply Chain': 'medical consumables', +'Supply Chain - non-EHP consumables': 'Not represented in TLO model', +'Unclassified': 'Not represented in TLO model'} +calibration_data_extract['cost_category'] = calibration_data_extract['calibration_category'].map(calibration_categories_dict) + +# Obtain the magnitude of deviation of model estimate from expenditure or budget estimate, whichever is closer +# Step 1: Calculate signed deviations +calibration_data_extract['dev_from_expenditure'] = ( + (calibration_data_extract['model_cost'] - calibration_data_extract['actual_expenditure_2019']) + / calibration_data_extract['actual_expenditure_2019'] +) + +calibration_data_extract['dev_from_budget'] = ( + (calibration_data_extract['model_cost'] - calibration_data_extract['max_annual_budget_2020-22']) + / calibration_data_extract['max_annual_budget_2020-22'] +) + +# Step 2: For each row, pick the value (signed) with the minimum absolute deviation +calibration_data_extract['Deviation of estimated cost from nearest benchmark (%)'] = calibration_data_extract.apply( + lambda row: min( + [row['dev_from_expenditure'], row['dev_from_budget']], + key=lambda x: abs(x) if pd.notna(x) else float('inf') + ), + axis=1 +) + +# Format the deviation as a percentage with 2 decimal points +calibration_data_extract['Deviation of estimated cost from nearest benchmark (%)'] = ( + calibration_data_extract['Deviation of estimated cost from nearest benchmark (%)'] + .map(lambda x: f"{x * 100:.2f}%") +) +calibration_data_extract.loc[calibration_data_extract['Deviation of estimated cost from nearest benchmark (%)'] == 'nan%', 'Deviation of estimated cost from nearest benchmark (%)'] = 'NA' +# Replace if calibration is fine +calibration_condition_met = ((calibration_data_extract['model_cost'] > calibration_data_extract[['actual_expenditure_2019', 'max_annual_budget_2020-22']].min(axis=1)) & + (calibration_data_extract['model_cost'] < calibration_data_extract[['actual_expenditure_2019', 'max_annual_budget_2020-22']].max(axis=1))) + +calibration_data_extract.loc[calibration_condition_met, + 'Deviation of estimated cost from nearest benchmark (%)' +] = 'Within expenditure-budget range' + +calibration_data_extract.loc[calibration_data_extract['model_cost'].isna(), 'model_cost'] = 'NA' + +calibration_data_extract = calibration_data_extract.sort_values(by=['cost_category', 'calibration_category']) +calibration_data_extract = calibration_data_extract[['cost_category', 'calibration_category', 'actual_expenditure_2019', 'max_annual_budget_2020-22', 'model_cost', 'Deviation of estimated cost from nearest benchmark (%)']] +calibration_data_extract = calibration_data_extract.rename(columns = {'cost_category': 'Cost Category', + 'calibration_category': 'Relevant RM group', + 'actual_expenditure_2019': 'Recorded Expenditure (FY 2018/19)', + 'max_annual_budget_2020-22': 'Maximum Recorded Annual Budget (FY 2019/20 - 2021/22)', + 'model_cost': 'Estimated cost (TLO Model, 2018)' + }) + +calibration_data_extract.to_csv(figurespath / 'calibration/calibration.csv') +def convert_df_to_latex(_df, _longtable = False, numeric_columns = []): + _df['Relevant RM group'] = _df['Relevant RM group'].str.replace('&', r'\&', regex=False) + # Format numbers to the XX,XX,XXX.XX format for all numeric columns + _df[numeric_columns] = _df[numeric_columns].applymap(lambda x: f"{x:,.2f}" if isinstance(x, (int, float)) else x) + + # Convert to LaTeX format with horizontal lines after every row + latex_table = _df.to_latex( + longtable=_longtable, # Use the longtable environment for large tables + column_format='|R{3.5cm}|R{3.5cm}|R{2.1cm}|R{2.1cm}|R{2.1cm}|R{2.1cm}|', + caption=f"Comparison of Model Estimates with Resource Mapping data", + label=f"tab:calibration_breakdown", + position="h", + index=False, + escape=False, # Prevent escaping special characters like \n + header=True + ) + + # Add \hline after the header and after every row for horizontal lines + latex_table = latex_table.replace("\\\\", "\\\\ \\hline") # Add \hline after each row + latex_table = latex_table.replace("%", "\%") # Add \hline after each row + latex_table = latex_table.replace("Program Management & Administration", "Program Management \& Administration") # Add \hline after each row + latex_table = latex_table.replace("Mentorships & Supportive Supervision", "Mentorships \& Supportive Supervision") # Add \hline after each row + + # latex_table = latex_table.replace("_", " ") # Add \hline after each row + + # Specify the file path to save + latex_file_path = calibration_outputs_folder / f'calibration_breakdown.tex' + + # Write to a file + with open(latex_file_path, 'w') as latex_file: + latex_file.write(latex_table) + + # Print latex for reference + print(latex_table) + +convert_df_to_latex(calibration_data_extract, _longtable = True, numeric_columns = ['Recorded Expenditure (FY 2018/19)', + 'Maximum Recorded Annual Budget (FY 2019/20 - 2021/22)', + 'Estimated cost (TLO Model, 2018)']) + +# Stacked bar charts to represent all cost sub-groups +do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'all', _disaggregate_by_subgroup = False, + _outputfilepath = calibration_outputs_folder) + +# Extract for manuscript +# Convert column to float (removing commas first) +cols_to_convert = ['Recorded Expenditure (FY 2018/19)', 'Estimated cost (TLO Model, 2018)'] +calibration_data_extract[cols_to_convert] = ( + calibration_data_extract[cols_to_convert] + .replace({'NA': None}) # Convert 'NA' to None (optional, depends on dataset) + .apply(lambda x: x.str.replace(',', '', regex=True)) + .astype(float) +) +# Sum only the relevant rows +total_expenditure = calibration_data_extract[calibration_data_extract['Cost Category'] != 'Not represented in TLO model']['Recorded Expenditure (FY 2018/19)'].sum() +total_cost_estimate = calibration_data_extract[calibration_data_extract['Cost Category'] != 'Not represented in TLO model']['Estimated cost (TLO Model, 2018)'].sum() + +# Extract +print(f"Based on the TLO model, we estimate the total healthcare cost to be " + f"\${total_cost_estimate/1e6:,.2f} million " + f"({(1 - total_cost_estimate/total_expenditure)*100:,.2f}\% " + f"lower than the RM expenditure estimate).") + +# Extracts on consumable calibration for Appendix C +# first obtain consumables dispensed estimate +years = [2018] + +def drop_outside_period(_df, _years): + """Return a DataFrame filtered to only include rows within the specified _years""" + # Define year range + start_year = min(_years) + end_year = max(_years) + + # Filter rows by year + return _df[_df['date'].dt.year.between(start_year, end_year)] + +def get_quantity_of_consumables_dispensed(results_folder, _years): + def get_counts_of_items_requested(_df): + _df = drop_outside_period(_df, _years) + counts_of_used = defaultdict(lambda: defaultdict(int)) + counts_of_not_available = defaultdict(lambda: defaultdict(int)) + + for _, row in _df.iterrows(): + date = row['date'] + for item, num in row['Item_Used'].items(): + counts_of_used[date][item] += num + for item, num in row['Item_NotAvailable'].items(): + counts_of_not_available[date][item] += num + used_df = pd.DataFrame(counts_of_used).fillna(0).astype(int).stack().rename('Used') + not_available_df = pd.DataFrame(counts_of_not_available).fillna(0).astype(int).stack().rename('Not_Available') + + # Combine the two dataframes into one series with MultiIndex (date, item, availability_status) + combined_df = pd.concat([used_df, not_available_df], axis=1).fillna(0).astype(int) + + # Convert to a pd.Series, as expected by the custom_generate_series function + return combined_df.stack() + + cons_req = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Consumables', + custom_generate_series=get_counts_of_items_requested, + do_scaling=True) + + cons_dispensed = cons_req.xs("Used", level=2) # only keep actual dispensed amount, i.e. when available + return cons_dispensed +idx = pd.IndexSlice +consumables_dispensed = get_quantity_of_consumables_dispensed(results_folder, _years = years) +consumables_dispensed = consumables_dispensed.reset_index().rename(columns={'level_0': 'Item_Code', 'level_1': 'year'}) +consumables_dispensed[idx['year']] = pd.to_datetime( + consumables_dispensed[idx['year']]).dt.year # Extract only year from date +# Keep only baseline +consumables_dispensed_filtered = consumables_dispensed.loc[:, consumables_dispensed.columns.get_level_values(0) == 0] +consumables_dispensed_summary = pd.concat([ + consumables_dispensed_filtered.mean(axis=1).rename(('mean',)), + consumables_dispensed_filtered.quantile(0.025, axis=1).rename(('lower',)), + consumables_dispensed_filtered.quantile(0.975, axis=1).rename(('upper',)) +], axis=1) + +consumables_dispensed_summary = pd.concat([consumables_dispensed_summary, consumables_dispensed[[idx['Item_Code'], idx['year']]]], axis=1) +consumables_dispensed_summary.columns = ['mean', 'lower', 'upper', 'Item_Code', 'year'] +consumables_dispensed_dict = dict(zip(consumables_dispensed_summary['Item_Code'], consumables_dispensed_summary['mean'])) + +# Antimalarials +la = get_item_code('Lumefantrine 120mg/Artemether 20mg, 30x18_540_CMST') +artesunate = get_item_code('Injectable artesunate') +sp = get_item_code('Fansidar (sulphadoxine / pyrimethamine tab)') +print(f"{consumables_dispensed_dict[str(la)]:,.0f} tablets of Lumefantrine/Arthemeter, " + f"{consumables_dispensed_dict[str(artesunate)]:,.0f} ampoules of Injectable artesunate, " + f"and {consumables_dispensed_dict[str(sp)]:,.0f} tablets of Sulphadoxine / pyrimethamine were dispensed as per the model." + f"The units of dispensation in the Open LMIS are not clear so these could not be compared directly. ") + +# Malaria testing +malaria_rdt = get_item_code('Malaria test kit (RDT)') +print(f"There is good correspondence between quantity of Malaria test kits (RDT) logged by the TLO model and LMIS data - " + f"14,295,107 units dispensed as per OpenLMIS, " + f"{consumables_dispensed_dict[str(malaria_rdt)]:,.0f} units dispensed as per modelled estimates") + +# Bednets +bednets = get_item_code('Insecticide-treated net') +print(f"792,101 units dispensed as per OpenLMIS, " + f"{consumables_dispensed_dict[str(bednets)]:,.0f} units dispensed as per modelled estimates") + +# TB treatment +adult_primary = get_item_code("Cat. I & III Patient Kit A") # adult primary +child_primary = get_item_code("Cat. I & III Patient Kit B") # child primary +adult_second = get_item_code("Cat. II Patient Kit A1") # adult secondary +child_second = get_item_code("Cat. II Patient Kit A2") # child secondary +mdr = get_item_code("Treatment: second-line drugs") # MDR +ipt = get_item_code("Isoniazid/Pyridoxine, tablet 300 mg") # IPT +iso_rifa = get_item_code("Isoniazid/Rifapentine") +print(f"\item {consumables_dispensed_dict[str(adult_primary)]:,.0f} units of primary treatment kits for adults " + f"\item {consumables_dispensed_dict[str(child_primary)]:,.0f} units of primary treatment kids for children " + f"\item {consumables_dispensed_dict[str(adult_second)]:,.0f} units of secondary treatment kits for adults " + f"\item {consumables_dispensed_dict[str(child_second)]:,.0f} units of secondary treatment kits for children " + f"\item {consumables_dispensed_dict[str(mdr)]:,.0f} kits for Multi-drug resistant treatment " + f"\item {consumables_dispensed_dict[str(ipt)]:,.0f} tablets of preventive Isoniazid/Pyridoxine, and " + f"\item {consumables_dispensed_dict[str(iso_rifa)]:,.0f} tablets of preventive Isoniazid/Rifapentine") + +# TB testing +zn_stain = get_item_code("ZN Stain") +sputum_container = get_item_code("Sputum container") +slides = get_item_code("Microscope slides, lime-soda-glass, pack of 50") +xpert = get_item_code("Xpert") +xray_aprons = get_item_code("Lead rubber x-ray protective aprons up to 150kVp 0.50mm_each_CMST") +film = get_item_code("X-ray") +culture = get_item_code("MGIT960 Culture and DST") +solid_culture = get_item_code("Solid culture and DST") + +print(f"\item `ZN Stain' - No record in OpenLMIS; {consumables_dispensed_dict[str(zn_stain)]:,.0f} units dispensed as per modelled estimates" + f"\item `Sputum container' - No record in OpenLMIS; {consumables_dispensed_dict[str(sputum_container)]:,.0f} units dispensed as per modelled estimates" + f"\item `Microscope slides, lime-soda-glass, pack of 50' - No record in OpenLMIS; {consumables_dispensed_dict[str(slides)]:,.0f} units dispensed as per modelled estimates" + f"\item `Xpert cartridge' - 25,205 cartridges recorded in OpenLMIS; {consumables_dispensed_dict[str(xpert)]:,.0f} units dispensed as per modelled estimates. " + f"\item `Lead rubber x-ray protective aprons up to 150kVp 0.50mm' - No record in OpenLMIS; {consumables_dispensed_dict[str(xray_aprons)]:,.0f} units dispensed as per modelled estimates. " + f"\item `X-Ray film' - No record in OpenLMIS; {consumables_dispensed_dict[str(film)]:,.0f} units dispensed as per modelled estimates. ") +# Culture not included as this these have been replaced by ZN stain - there was no record in OpenLMIS + + +# HIV testing +hiv_test = get_item_code('Test, HIV EIA Elisa') +vl_test = get_item_code('VL Test') +print(f"{consumables_dispensed_dict[str(hiv_test)]:,.0f} units of 'Test, HIV EIA Elisa', and " + f"{consumables_dispensed_dict[str(vl_test)]:,.0f} units of 'VL Test' were dispensed in 2018 as per the model." + f" OpenLMIS recorded 9,382,640 units of 'Test, HIV EIA Elisa' and there was no record of VL tests. We suspect that this discrepancy arises " + f"because some channels of HIV testing might not be recorded in the model.") + +# Family Planning commodities +jadelle = get_item_code("Jadelle (implant), box of 2_CMST") +iud = get_item_code("IUD, Copper T-380A") +levonorgestrel = get_item_code("Levonorgestrel 0.15 mg + Ethinyl estradiol 30 mcg (Microgynon), cycle") +depot = get_item_code("Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly") + +print(f"\item `Jadelle (implant), box of 2\_CMST' - 53,585 units dispensed as per OpenLMIS, {consumables_dispensed_dict[str(jadelle)]:,.0f} units dispensed as per modelled estimates" + f"\item `IUD, Copper T-380A' - 4,079 units dispensed as per OpenLMIS, {consumables_dispensed_dict[str(iud)]:,.0f} units dispensed as per modelled estimates" + f"\item `Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly' - 2,807,681 units dispensed as per OpenLMIS, {consumables_dispensed_dict[str(depot)]:,.0f} dispensed as per modelled estimates" + f"\item `Levonorgestrel 0.15 mg + Ethinyl estradiol 30 mcg (Microgynon), cycle' - 1,795,325 units (37,701,825 tablets) dispensed as per OpenLMIS, {consumables_dispensed_dict[str(levonorgestrel)]:,.0f} tablets dispensed as per modelled estimates") diff --git a/src/scripts/costing/example_costing_scenario.py b/src/scripts/costing/example_costing_scenario.py new file mode 100644 index 0000000000..30ca966f96 --- /dev/null +++ b/src/scripts/costing/example_costing_scenario.py @@ -0,0 +1,122 @@ +''' +Run on the batch system using: +```tlo batch-submit src/scripts/costing/example_costing_scenario.py``` + +or locally using: + ```tlo scenario-run src/scripts/costing/example_costing_scenario.py``` + +''' + +from pathlib import Path +from typing import Dict + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher +from tlo.scenario import BaseScenario + +class CostingScenarios(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 0 + self.start_date = Date(2010, 1, 1) + self.end_date = Date(2030, 1, 1) + self.pop_size = 1_000 # <- recommended population size for the runs + self._scenarios = self._get_scenarios() + self.number_of_draws = len(self._scenarios) + self.runs_per_draw = 2 # <- repeated this many times + + def log_configuration(self): + return { + 'filename': 'cost_scenarios', + 'directory': './outputs', # <- (specified only for local running) + 'custom_levels': { + '*': logging.WARNING, + 'tlo.methods.demography': logging.INFO, + 'tlo.methods.healthburden': logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + } + } + + def modules(self): + return (fullmodel(resourcefilepath=self.resources) + + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)]) + + def draw_parameters(self, draw_number, rng): + if draw_number < len(self._scenarios): + return list(self._scenarios.values())[draw_number] + + def _get_scenarios(self) -> Dict[str, Dict]: + """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario.""" + + self.YEAR_OF_SYSTEM_CHANGE = 2020 + self.mode_appt_constraints_postSwitch = [1,2] + self.cons_availability = ['default', 'all'] + self.healthsystem_function = [[False, False], [False, True]] + self.healthcare_seeking = [[False, False], [False, True]] + + return { + "Real world": self._common_baseline(), + + "Perfect health system": + mix_scenarios( + self._common_baseline(), + { + 'HealthSystem': { + # Human Resources + 'mode_appt_constraints_postSwitch': self.mode_appt_constraints_postSwitch[1], # <-- Mode 2 post-change to show effects of HRH + "scale_to_effective_capabilities": True, # <-- Transition into Mode2 with the effective capabilities in HRH 'revealed' in Mode 1 + "year_mode_switch": self.YEAR_OF_SYSTEM_CHANGE, + + # Consumables + 'cons_availability_postSwitch': self.cons_availability[1], + 'year_cons_availability_switch': self.YEAR_OF_SYSTEM_CHANGE, + }, + 'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': { + 'max_healthcare_seeking': self.healthcare_seeking[1], + 'max_healthsystem_function': self.healthsystem_function[1], + 'year_of_switch': self.YEAR_OF_SYSTEM_CHANGE, + } + } + ), + } + + def _common_baseline(self) -> Dict: + """Return the Dict with values for the parameter changes that define the baseline scenario. """ + return mix_scenarios( + get_parameters_for_status_quo(), # <-- Parameters that have been the calibration targets + # Set up the HealthSystem to transition from Mode 1 -> Mode 2, with rescaling when there are HSS changes + { + 'HealthSystem': { + # Human resources + 'mode_appt_constraints': 1, # <-- Mode 1 prior to change to preserve calibration + 'mode_appt_constraints_postSwitch': self.mode_appt_constraints_postSwitch[0], # <-- Mode 2 post-change to show effects of HRH + "scale_to_effective_capabilities": True, # <-- Transition into Mode2 with the effective capabilities in HRH 'revealed' in Mode 1 + # This happens in the year before mode change, as the model calibration is done by that year + "year_mode_switch": self.YEAR_OF_SYSTEM_CHANGE, + 'yearly_HR_scaling_mode': 'historical_scaling', # for 5 years of 2020-2024; source data year 2019 + + # Consumables + 'cons_availability': 'default', + 'cons_availability_postSwitch': self.cons_availability[0], + 'year_cons_availability_switch': self.YEAR_OF_SYSTEM_CHANGE, + + # Normalize the behaviour of Mode 2 + "policy_name": 'Naive', + "tclose_overwrite": 1, + "tclose_days_offset_overwrite": 7, + }, + 'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': { + 'max_healthcare_seeking': self.healthcare_seeking[0], + 'max_healthsystem_function': self.healthsystem_function[0], + 'year_of_switch': self.YEAR_OF_SYSTEM_CHANGE, + } + }, + ) + + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/hiv/projections_jan2023/analysis_logged_deviance.py b/src/scripts/hiv/projections_jan2023/analysis_logged_deviance.py index 6694515618..28d2d5ebed 100644 --- a/src/scripts/hiv/projections_jan2023/analysis_logged_deviance.py +++ b/src/scripts/hiv/projections_jan2023/analysis_logged_deviance.py @@ -21,6 +21,9 @@ simplified_births, symptommanager, tb, + cardio_metabolic_disorders, + depression, + service_integration, ) # Where will outputs go @@ -30,12 +33,12 @@ datestamp = datetime.date.today().strftime("__%Y_%m_%d") # The resource files -resourcefilepath = './resources' +resourcefilepath = Path("./resources") # %% Run the simulation start_date = Date(2010, 1, 1) -end_date = Date(2014, 1, 1) -popsize = 25000 +end_date = Date(2025, 1, 1) +popsize = 10_000 # scenario = 1 @@ -46,9 +49,9 @@ "custom_levels": { "*": logging.WARNING, # "tlo.methods.deviance_measure": logging.INFO, - # "tlo.methods.epi": logging.INFO, - "tlo.methods.hiv": logging.INFO, - "tlo.methods.tb": logging.INFO, + "tlo.methods.epi": logging.INFO, + # "tlo.methods.hiv": logging.INFO, + # "tlo.methods.tb": logging.INFO, "tlo.methods.demography": logging.INFO, # "tlo.methods.demography.detail": logging.WARNING, "tlo.methods.healthsystem.summary": logging.INFO, @@ -61,13 +64,14 @@ # need to call epi before tb to get bcg vax seed = random.randint(0, 50000) # seed = 41728 # set seed for reproducibility -sim = Simulation(start_date=start_date, seed=seed, log_config=log_config, - show_progress_bar=True, resourcefilepath=resourcefilepath) +sim = Simulation(start_date=start_date, seed=seed, log_config=log_config, show_progress_bar=True) sim.register( - demography.Demography(), - simplified_births.SimplifiedBirths(), - enhanced_lifestyle.Lifestyle(), - healthsystem.HealthSystem(service_availability=["*"], # all treatment allowed + demography.Demography(resourcefilepath=resourcefilepath), + simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), + healthsystem.HealthSystem( + resourcefilepath=resourcefilepath, + service_availability=["*"], # all treatment allowed mode_appt_constraints=1, # mode of constraints to do with officer numbers and time cons_availability="default", # mode for consumable constraints (if ignored, all consumables available) ignore_priority=False, # do not use the priority information in HSI event to schedule @@ -76,37 +80,22 @@ disable=False, # disables the healthsystem (no constraints and no logging) and every HSI runs disable_and_reject_all=False, # disable healthsystem and no HSI runs ), - symptommanager.SymptomManager(), - healthseekingbehaviour.HealthSeekingBehaviour(), - healthburden.HealthBurden(), - epi.Epi(), - hiv.Hiv(run_with_checks=False), - tb.Tb(), + symptommanager.SymptomManager(resourcefilepath=resourcefilepath), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), + healthburden.HealthBurden(resourcefilepath=resourcefilepath), + epi.Epi(resourcefilepath=resourcefilepath), + # hiv.Hiv(resourcefilepath=resourcefilepath, run_with_checks=False), + # tb.Tb(resourcefilepath=resourcefilepath), + # cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath), + # depression.Depression(resourcefilepath=resourcefilepath), + # service_integration.ServiceIntegration(resourcefilepath=resourcefilepath), # deviance_measure.Deviance(resourcefilepath=resourcefilepath), ) # set the scenario -sim.modules["Hiv"].parameters["do_scaleup"] = True -sim.modules["Hiv"].parameters["scaleup_start_year"] = 2019 -sim.modules["Tb"].parameters["first_line_test"] = 'xpert' -# sim.modules["Tb"].parameters["scenario_start_date"] = Date(2010, 1, 1) -# sim.modules["Tb"].parameters["scenario_SI"] = "z" +# sim.modules["ServiceIntegration"].parameters["serv_int_chronic"] = True +# sim.modules["ServiceIntegration"].parameters["integration_date"] = Date(2010, 1, 1) -# sim.modules["Tb"].parameters["rr_tb_hiv"] = 5 # default 13 -# rr relapse if HIV+ 4.7 -# sim.modules["Tb"].parameters["rr_tb_aids"] = 26 # default 26 - -# to cluster tests in positive people -# sim.modules["Hiv"].parameters["rr_test_hiv_positive"] = 1.1 # default 1.5 - -# to account for people starting-> defaulting, or not getting cons -# this not used now if perfect referral testing->treatment -# affects the prob of art start once diagnosed -# sim.modules["Hiv"].parameters["treatment_initiation_adjustment"] = 1 # default 1.5 - -# assume all defaulting is due to cons availability -# sim.modules["Hiv"].parameters["probability_of_being_retained_on_art_every_6_months"] = 1.0 -# sim.modules["Hiv"].parameters["probability_of_seeking_further_art_appointment_if_drug_not_available"] = 1.0 # Run the simulation and flush the logger sim.make_initial_population(n=popsize) diff --git a/src/scripts/service_integration/analysis_script_serv_int_final.py b/src/scripts/service_integration/analysis_script_serv_int_final.py new file mode 100644 index 0000000000..fbe0f2bad6 --- /dev/null +++ b/src/scripts/service_integration/analysis_script_serv_int_final.py @@ -0,0 +1,701 @@ +from pathlib import Path + +from collections import Counter, defaultdict + +import os +import scipy.stats as st +from pandas import read_excel +from scipy.stats import t, norm, shapiro + +import pandas as pd + +import matplotlib.pyplot as plt +import numpy as np + +from typing import Callable, Dict, Iterable, List, Literal, Optional, TextIO, Tuple, Union + +from tlo import Date +from tlo.analysis.utils import (bin_hsi_event_details, extract_results, extract_params, + get_scenario_outputs, compute_summary_statistics, + make_age_grp_types, get_scenario_info, make_calendar_period_lookup, make_calendar_period_type, parse_log_file) + +from src.scripts.costing.cost_estimation import estimate_input_cost_of_scenarios, do_stacked_bar_plot_of_cost_by_category, summarize_cost_data + +plt.style.use('seaborn-darkgrid') + +# Get results file +resourcefilepath = Path("./resources") + +outputspath = './outputs/sejjj49@ucl.ac.uk/' +scenario = 'integration_scenario_max_test_2524959' +results_folder= get_scenario_outputs(scenario, outputspath)[-1] + +# Create a dict of {run: 'scenario'} from the updated parameters +params = extract_params(results_folder) +subset = params[params['module_param'] == ('ServiceIntegration:serv_integration')] +p_dict = subset.drop(columns='module_param').to_dict() +scen_draws = p_dict['value'] + +# create output folder for graphs +g_path = f'{outputspath}graphs_{scenario}_test' +if not os.path.isdir(g_path): + os.makedirs(f'{outputspath}graphs_{scenario}_test') + +dalys_folder = f'{g_path}/DALYs' +if not os.path.isdir(dalys_folder): + os.makedirs(f'{g_path}/DALYs') + +hsi_folder = f'{g_path}/HSIs' +if not os.path.isdir(hsi_folder): + os.makedirs(f'{g_path}/HSIs') + +cons_folder = f'{g_path}/Consumables' +if not os.path.isdir(cons_folder): + os.makedirs(f'{g_path}/Consumables') + +# Define target period +TARGET_PERIOD = (Date(2011, 1, 1), Date(2015, 12, 31)) + +# =================================================HELPER FUNCTIONS =================================================== +def get_dalys_by_period_sex_agegrp_label(df): + """Sum the dalys by period, sex, age-group and label""" + df['age_grp'] = df['age_range'].astype(make_age_grp_types()) + df = df.drop(columns=['date', 'age_range', 'sex']) + df = df.groupby(by=["year", "age_grp"]).sum().stack() + df.index = df.index.set_names('label', level=2) + return df + +def get_pop_by_agegrp_label(df): + """Sum the dalys by period, sex, age-group and label""" + df['year'] = df['date'].dt.year + df_melted = df.melt(id_vars=['year'], value_vars=[col for col in df.columns if col not in ['date', 'year']], + var_name='age_group', value_name='count') + series_multi = df_melted.set_index(['year', 'age_group'])['count'].sort_index() + + return series_multi + +def get_diff(df): + diff = df.copy() + for col in df.columns: + if col[0] != 0: + # Get corresponding (0, col[1]) for comparison + base_col = (0, col[1]) + diff[col] = df[base_col] - df[col] + else: + diff[col] = 0 # or np.nan if you prefer + + diff_sum = compute_summary_statistics(diff, use_standard_error=True) + return diff_sum + +def compute_service_statistics(counters_by_draw_and_run): + grouped_data = defaultdict(lambda: defaultdict(list)) + + # Step 1: Group counts by first key and service name + for (group_idx, _), counter in counters_by_draw_and_run.items(): + for service_name, count in counter.items(): + grouped_data[group_idx][service_name].append(count) + + data_df = pd.DataFrame.from_dict(grouped_data) + + def safe_sum_lists(series): + # Filter out non-list values (like float/NaN) + valid_lists = [x for x in series if isinstance(x, list)] + if not valid_lists: + return np.nan # or return [0]*length if you want default + return [sum(items) for items in zip(*valid_lists)] + + def diff_from_col0(df): + def diff_row(row): + base = row[0] + result = {} + for col in df.columns: + if col == 0: + result[col] = np.nan # or keep base if needed + else: + val = row[col] + if not isinstance(base, list) or not isinstance(val, list): + result[col] = np.nan + else: + result[col] = [b - v if b != 0 else np.nan for b, v in zip(base, val)] + return pd.Series(result) + + return df.apply(diff_row, axis=1) + + # Run the function + # Apply to entire DataFrame grouped by level + appt_type = data_df.groupby(level=1).agg(safe_sum_lists) + diff_appt_type = diff_from_col0(appt_type) + + width_of_range = 0.95 + + def summarize_list(cell): + arr = np.array(cell) + n = arr.size + std_deviation = arr.std() + std_error = std_deviation / np.sqrt(n) + z_value = st.norm.ppf(1 - (1. - width_of_range) / 2.) + + mean = float(np.mean(arr)) + + return { + "median": mean, + "lower": mean - z_value * std_error, + "upper": mean + z_value * std_error, + } + # Apply to every cell in the DataFrame + appt_type_summ = appt_type.applymap(summarize_list) + diff_appt_type_summ = diff_appt_type.applymap(summarize_list) + + return appt_type_summ, diff_appt_type_summ + +def barcharts(data, y_label, title, by_cause, folder): + # Extract means and errors + + if by_cause: + labels = data.index.values + + median = data['central'].values + yerr_lower = median - data['lower'].values + yerr_upper = data['upper'].values - median + + else: + + labels = scen_draws.values() + + median = [float(data[v, 'central'].values) for v in scen_draws.keys()] + lower_errors = [float(data[v, 'lower'].values) for v in scen_draws.keys()] + upper_errors = [float(data[v, 'upper'].values) for v in scen_draws.keys()] + + # Compute distances from mean to bounds (must be non-negative) + yerr_lower = [med - low for med, low in zip(median, lower_errors)] + yerr_upper = [up - med for med, up in zip(median, upper_errors)] + + # Create bar chart with error bars + fig, ax = plt.subplots() + ax.bar(labels, median, yerr=[yerr_lower, yerr_upper], capsize=5, alpha=0.7, ecolor='black') + + if by_cause: + ax.axhline(0, color='gray', linestyle='--', linewidth=1) + + ax.set_ylabel(y_label) + ax.set_title(title) + + # Adjust label size + plt.xticks(fontsize=8, rotation=90) + plt.tight_layout() + if by_cause and y_label.endswith('(Weighted)'): + plt.savefig(f'{folder}/wtd_diff_{scen_draws[k]}.png', bbox_inches='tight') + + elif by_cause and not y_label.endswith('(Weighted)'): + plt.savefig(f'{folder}/diff_{scen_draws[k]}.png', bbox_inches='tight') + + else: + plt.savefig(f'{folder}/{title}.png', bbox_inches='tight') + + plt.show() + + +def grouped_bar_chart(df, draw, title, ylabel, folder): + categories = df.index + x = np.arange(len(categories)) + width = 0.35 + + fig, ax = plt.subplots(figsize=(10, 6)) + + baseline_central = df[0]['central'] + baseline_err_lower = baseline_central - df[0]['lower'] + baseline_err_upper = df[0]['upper'] - baseline_central + + # Comparison + comp_central = df[draw]['central'] + comp_err_lower = comp_central - df[draw]['lower'] + comp_err_upper = df[draw]['upper'] - comp_central + + # Plot bars with asymmetric error bars + ax.bar(x - width / 2, baseline_central, width, + yerr=[baseline_err_lower, baseline_err_upper], + capsize=5, label='Status Quo', alpha=0.8) + + ax.bar(x + width / 2, comp_central, width, + yerr=[comp_err_lower, comp_err_upper], + capsize=5, label=scen_draws[draw], alpha=0.8) + + ax.axhline(0, color='gray', linestyle='--', linewidth=1) + + ax.set_title(title) + ax.set_ylabel(ylabel) + ax.set_xticks(x) + ax.set_xticklabels(categories, rotation=45, ha='right') + ax.legend() + ax.grid(axis='y', linestyle='--', alpha=0.4) + plt.tight_layout() + if ylabel.startswith('Weighted'): + plt.savefig(f'{folder}/{scen_draws[draw]}_wtd_dalys_cause.png', bbox_inches='tight') + else: + plt.savefig(f'{folder}/{scen_draws[draw]}_dalys_cause.png', bbox_inches='tight') + + plt.show() + + +# ==================================================== DALYS ========================================================== +# TODO: STACK DALYS BY TIME FRAME...10/20/30 years... + + +# taking the numbers of DALYS by age-group, weighting them by the proportion of the pop in that age-group and +# summing to get a weighted (and more representative) total number of DALYS. This can be done by condition or overall +# if you like. + +dalys_by_age_date_and_cause = extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked_by_age_and_time", # <-- for DALYS stacked by age and time + custom_generate_series=get_dalys_by_period_sex_agegrp_label, + do_scaling=False + ) +dalys_by_age_date_and_cause.index = dalys_by_age_date_and_cause.index.set_names('age_group', level=1) + +# Get total DALYs by cause across the intervention period (including % diff from status quo) +dalys_by_year_cause = dalys_by_age_date_and_cause.groupby(by=["year", "label"]).sum() +dalys_by_year_cause_int_period = dalys_by_year_cause.loc[TARGET_PERIOD[0].year:TARGET_PERIOD[-1].year] +total_dalys_by_year_cause = dalys_by_year_cause_int_period.groupby('label').sum() +total_dalys_by_year_summ = compute_summary_statistics(total_dalys_by_year_cause, use_standard_error=True) +diff_dalys_by_cause = get_diff(total_dalys_by_year_cause) + +# Get total dalys per scenario (unweighted) +dalys_by_age_date = dalys_by_age_date_and_cause.groupby(by=["year", "age_group"]).sum() +dalys_unweighted_year = dalys_by_age_date.groupby(by='year').sum() +total_dalys_unweighted = dalys_unweighted_year.loc[TARGET_PERIOD[0].year:TARGET_PERIOD[-1].year].sum().to_frame().T +total_dalys_unweighted_summ = compute_summary_statistics(total_dalys_unweighted, use_standard_error=True) +diff_total_dalys_unweighted = get_diff(total_dalys_unweighted) + +# Get total dalys per scenario (weighted by population size across age groups) +pop_f = extract_results( + results_folder, + module="tlo.methods.demography", + key="age_range_f", + custom_generate_series=get_pop_by_agegrp_label, + do_scaling=False + ) + +pop_m = extract_results( + results_folder, + module="tlo.methods.demography", + key="age_range_m", + custom_generate_series=get_pop_by_agegrp_label, + do_scaling=False + ) + +pop = pop_f + pop_m +proportion_df = pop.div(pop.groupby(level='year').transform('sum')) + +# Get the column subset from b_aligned (e.g., top-level 0) +def age_standardize_dalys(dalys_df): + + if len(dalys_df.index.levels) == 3: + pop_df = proportion_df.reindex(dalys_df.index.droplevel('label')) + else: + pop_df = proportion_df.reindex(dalys_df.index) + + base_level = 0 + subset_cols = pop_df.columns.get_level_values(0) == base_level + base_columns = pop_df.columns[subset_cols] + + # Create result DataFrame + dalys_weighted = dalys_df.copy() + + # Loop over each top-level column index in `a` + for level in sorted(set(dalys_df.columns.get_level_values(0))): + # Shift base_columns to this new level + new_columns = [(level, col[1]) for col in base_columns] + + # Ensure these columns exist in both a and result + if all(col in dalys_df.columns for col in new_columns): + # Multiply corresponding columns + dalys_weighted.loc[:, new_columns] = (dalys_df.loc[:, new_columns].values + * pop_df.loc[:, base_columns].values) + + return dalys_weighted + +total_dalys_weighted = age_standardize_dalys(dalys_by_age_date) +total_dalys_weighted_yr = total_dalys_weighted.groupby(level='year').sum() +total_dalys_weighted_yr_int = total_dalys_weighted_yr.loc[TARGET_PERIOD[0].year:TARGET_PERIOD[-1].year].sum().to_frame().T +total_weighted_dalys_summ = compute_summary_statistics(total_dalys_weighted_yr_int, use_standard_error=True) +diff_weighted_dalys_sum = get_diff(total_dalys_weighted_yr_int) + +dalys_by_cause_weighted = age_standardize_dalys(dalys_by_age_date_and_cause) +d_by_cause_int_period = dalys_by_cause_weighted.loc[TARGET_PERIOD[0].year:TARGET_PERIOD[-1].year] +dalys_by_cause_weighted = d_by_cause_int_period.groupby(level='label').sum() +dalys_by_cause_weighted_summ = compute_summary_statistics(dalys_by_cause_weighted) +diff_dalys_by_cause_weighted = get_diff(dalys_by_cause_weighted) + + +# Output and save plots +# Non-weighted DALYs +barcharts(total_dalys_unweighted_summ, 'Total Population DALYs', + 'Total Population DALYs by Scenario', False, g_path) + +barcharts(diff_total_dalys_unweighted, 'DALYs Averted', + 'Difference from Status Quo for Total DALYs by Scenario', False, + g_path) + +# Weighted DALYs +barcharts(diff_weighted_dalys_sum, 'Population Weighted DALYs Averted', + 'Difference from Status Quo for Population Weighted DALYs by Scenario', False, g_path) + +barcharts(total_weighted_dalys_summ, 'Total Population Weighted DALYs', + 'Total Population Weighted DALYs by Scenario', False, g_path) + +# Weighted and Non-weighted DALYs by cause +for k in scen_draws: + if k == 0: + pass + else: + grouped_bar_chart(total_dalys_by_year_summ, k, + f'DALYs by Cause: Status Quo vs {scen_draws[k]}', 'DALYs', dalys_folder) + grouped_bar_chart(dalys_by_cause_weighted_summ, k, + f'Weighted DALYs by Cause: Status Quo vs {scen_draws[k]}', + 'Weighted DALYs', dalys_folder) + + barcharts(diff_dalys_by_cause_weighted[k], + 'Weighted DALYs Averted', + f'P.diff Weighted DALYs by cause compared to Status Quo: {scen_draws[k]}', + True, dalys_folder) + + barcharts(diff_dalys_by_cause[k], + 'DALYs Averted', + f'P.diff DALYs by cause compared to Status Quo: {scen_draws[k]}', + True, dalys_folder) + +# ============================================= HCW TIME/APPOINTMENTS ================================================= +# I think presenting numbers of appointments by the appointment type may be neater (U5 OPD, O5 OPD etc), could then +# behind the scenes breakdown into XX% were for HIV services, CMD services etc. Again, only if needed. + +counts_by_treatment_id_and_appt_type = bin_hsi_event_details( + results_folder, + lambda event_details, count: sum( + [ + Counter({ + ( + event_details["treatment_id"], + appt_type + ): + count * appt_number + }) + for appt_type, appt_number in event_details["appt_footprint"] + ], + Counter() + ), + *TARGET_PERIOD, + False + ) +apt_data = compute_service_statistics(counts_by_treatment_id_and_appt_type) +apt_type_summ = apt_data[0] +diff_apt_type_sum = apt_data[1] + +labels = apt_type_summ.index +sq_data = [[apt_type_summ.at[appt, 0]['median'] for appt in labels], + [apt_type_summ.at[appt, 0]['lower'] for appt in labels], + [apt_type_summ.at[appt, 0]['upper'] for appt in labels] + ] +sq_yerr_lower = [med - low for med, low in zip(sq_data[0], sq_data[1])] +sq_yerr_upper = [up - med for med, up in zip(sq_data[0], sq_data[2])] + +for k in scen_draws: + median = [apt_type_summ.at[appt, k]['median'] for appt in labels] + lower_errors = [apt_type_summ.at[appt, k]['lower'] for appt in labels] + upper_errors = [apt_type_summ.at[appt, k]['upper'] for appt in labels] + + yerr_lower = [med - low for med, low in zip(median, lower_errors)] + yerr_upper = [up - med for med, up in zip(median, upper_errors)] + + x = np.arange(len(labels)) + width = 0.35 + + fig, ax = plt.subplots(figsize=(10, 6)) + + # Plot bars with asymmetric error bars + ax.bar(x - width / 2, sq_data[0], width, + yerr=[sq_yerr_lower, sq_yerr_upper], + capsize=5, label='Status Quo', alpha=0.8) + + ax.bar(x + width / 2, median, width, + yerr=[yerr_lower, yerr_upper], + capsize=5, label=scen_draws[k], alpha=0.8) + + ax.set_title(f'Total Appointment Types: Status Quo vs {scen_draws[k]}') + ax.set_ylabel('Number of Appointments') + ax.set_xticks(x) + ax.set_xticklabels(labels, rotation=45, ha='right') + ax.legend() + ax.grid(axis='y', linestyle='--', alpha=0.4) + plt.tight_layout() + plt.savefig(f'{hsi_folder}/{scen_draws[k]}_appt_types.png', bbox_inches='tight') + plt.show() + + # do percentaga diff from SQ + median = [diff_apt_type_sum.at[appt, k]['median'] for appt in labels] + lower_errors = [diff_apt_type_sum.at[appt, k]['lower'] for appt in labels] + upper_errors = [diff_apt_type_sum.at[appt, k]['upper'] for appt in labels] + + yerr_lower = [med - low for med, low in zip(median, lower_errors)] + yerr_upper = [up - med for med, up in zip(median, upper_errors)] + + # Create bar chart with error bars + fig, ax = plt.subplots() + ax.bar(labels, median, yerr=[yerr_lower, yerr_upper], capsize=5, alpha=0.7, ecolor='black') + ax.axhline(0, color='gray', linestyle='--', linewidth=1) + ax.set_ylabel('Difference from SQ') + ax.set_title(f'P Diff. from Status Quo of Number of Appointments: {scen_draws[k]}') + + # Adjust label size + plt.xticks(fontsize=8, rotation=90) + plt.tight_layout() + plt.savefig(f'{hsi_folder}/diff_appt_{scen_draws[k]}.png', bbox_inches='tight') + plt.show() + + + +# ----------------------------------------- HCW time use by cadre --------------------------------------------------- +appointment_time_table = pd.read_csv( + resourcefilepath + / 'healthsystem' + / 'human_resources' + / 'definitions' + / 'ResourceFile_Appt_Time_Table.csv', + index_col=["Appt_Type_Code", "Facility_Level", "Officer_Category"] +) + +appt_type_facility_level_officer_category_to_appt_time = ( + appointment_time_table.Time_Taken_Mins.to_dict() +) + +officer_categories = appointment_time_table.index.levels[ + appointment_time_table.index.names.index("Officer_Category") +].to_list() + +def get_hcw_time_counts(descriptor): + + times_by_officer_category_run = bin_hsi_event_details( + results_folder, + lambda event_details, count: sum( + [ + Counter({ + ( + officer_category, + (event_details["treatment_id"].split("_")[0] if descriptor=='treatment_id' else + event_details['appt_footprint'][0][0]) + ): + count + * appt_number + * appt_type_facility_level_officer_category_to_appt_time.get( + ( + appt_type, + event_details["facility_level"], + officer_category + ), + 0 + ) + for officer_category in officer_categories + }) + for appt_type, appt_number in event_details["appt_footprint"] + ], + Counter() + ), + *TARGET_PERIOD, + False + ) + + return times_by_officer_category_run + +# difference in total clinica/nmw/pharmacy time from SQ per scenario (in shifts) +times_by_officer_category_appt_per_run = get_hcw_time_counts('appt_footprint') + +total_time_by_cadre_per_run = {} +for key, counter in times_by_officer_category_appt_per_run.items(): + new_counter = defaultdict(int) + for (first, second), value in counter.items(): + new_counter[first] += value + total_time_by_cadre_per_run[key] = dict(new_counter) + +# # Total time by cadre by run +time_data_df = pd.DataFrame.from_dict(total_time_by_cadre_per_run) +time_data_df.columns.names = ['draw', 'run'] + +time_data_df_shift = time_data_df / 480 +# Total time by cadre by run +time_data_df_shift_summ = compute_summary_statistics(time_data_df_shift) + +diff = time_data_df_shift.copy() +for col in time_data_df_shift.columns: + if col[0] != 0: + # Get corresponding (0, col[1]) for comparison + base_col = (0, col[1]) + diff[col] = time_data_df_shift[col] - time_data_df_shift[base_col] + else: + diff[col] = 0 # or np.nan if you prefer + +diff_sum = compute_summary_statistics(diff, use_standard_error=True) + +def get_hcw_time_data(data, hcw): + mean= [data.at[hcw, (k, 'central')] for k in scen_draws] + lower_errors = [data.at[hcw, (k, 'lower')] for k in scen_draws] + upper_errors = [data.at[hcw, (k, 'upper')] for k in scen_draws] + + yerr_lower = [med - low for med, low in zip(mean, lower_errors)] + yerr_upper = [up - med for med, up in zip(mean, upper_errors)] + + return (mean, yerr_lower, yerr_upper) + +clin_data= get_hcw_time_data(diff_sum, 'Clinical') +nm_data = get_hcw_time_data(diff_sum, 'Nursing_and_Midwifery') + +pharm_data = get_hcw_time_data(diff_sum, 'Pharmacy') +dcsa_data= get_hcw_time_data(diff_sum, 'DCSA') + +rad_data = get_hcw_time_data(diff_sum, 'Radiography') +ment_data = get_hcw_time_data(diff_sum, 'Mental') + + +labels = scen_draws.values() +x = np.arange(len(labels)) +width = 0.35 + +def do_hcw_time_graphs(hcw1_data, hcw2_data, hcws_lab): + fig, ax = plt.subplots(figsize=(10, 6)) + + # Plot bars with asymmetric error bars + ax.bar(x - width / 2, hcw1_data[0], width, + yerr=[hcw1_data[1], hcw1_data[2]], + capsize=5, label=hcws_lab[0], alpha=0.8) + + ax.bar(x + width / 2, hcw2_data[0], width, + yerr=[hcw2_data[1], hcw2_data[2]], + capsize=5, label=hcws_lab[1], alpha=0.8) + + ax.set_title('Difference in HCW time use across scenarios') + ax.set_ylabel('Number of 8-hr shifts') + ax.set_xticks(x) + ax.set_xticklabels(labels, rotation=45, ha='right') + ax.legend() + ax.grid(axis='y', linestyle='--', alpha=0.4) + plt.tight_layout() + plt.savefig(f'{hsi_folder}/{hcws_lab[0]}_{hcws_lab[1]}_time_use.png', bbox_inches='tight') + plt.show() + +# todo: we need to exclude HTN data as its such an outlier... +do_hcw_time_graphs(clin_data, nm_data, ['Clinical', 'Nursing Miwifery']) +do_hcw_time_graphs(pharm_data, dcsa_data, ['Pharmacy', 'DCSA']) +do_hcw_time_graphs(rad_data, ment_data, ['Radiography', 'Mental Health']) + +times_by_officer_category_treat_id_per_run = get_hcw_time_counts('treatment_id') +time_treat_id_data_df = pd.DataFrame.from_dict(times_by_officer_category_treat_id_per_run) +# time_data_df.columns.names = ['draw', 'run'] +# time_data_df_diff = get_diff(time_data_df) + +# total_time_by_cadre_per_run = {} +# for key, counter in times_by_officer_category_appt_per_run.items(): +# new_counter = defaultdict(int) +# for (first, second), value in counter.items(): +# new_counter[first] += value +# total_time_by_cadre_per_run[key] = dict(new_counter) +# +# time_data_df = pd.DataFrame.from_dict(total_time_by_cadre_per_run) +# time_data_df.columns.names = ['draw', 'run'] +# # Total time by cadre by run +# time_data_df_summ = compute_summary_statistics(time_data_df) +# time_data_df_diff = get_diff(time_data_df) +# +# for k in scen_draws: +# if k == 0: +# pass +# else: +# barcharts(time_data_df_diff[k], +# 'Difference from SQ (mins)', +# f'Difference in requested time by cadre compared to Status Quo: {scen_draws[k]}', +# True, hsi_folder) + +# ================================================== CONSUMABLES ====================================================== +# Wonder if instead of looking at consumables use by category, you could just look at the relative consumables costs +# using Sakshi's module? This breaks down costs into cons and HR etc so you could possibly just say scenario X led to +# additional cons costs of $Y million!? + +# I think I have a list somewhere of the cons required for HIV, TB and malaria services. These may not be up-to-date +# though. Could we put all cons availability and run the scenarios under perfect cons as a comparator to default cons? +# I know this will impact everything outside of your integration scenarios too - with unplanned downstream effects - +# but could still give a guide to optimal impact, like an upper bound...? I'm just thinking of the crudest approaches +# here to try and reduce your workload, feel free to push back here obviously! + +list_of_relevant_years_for_costing = list(range(TARGET_PERIOD[0].year, TARGET_PERIOD[-1].year + 1)) + +input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, + _years=list_of_relevant_years_for_costing, + cost_only_used_staff=True, + _discount_rate = 0.03) + +total_input_cost = input_costs.groupby(['draw', 'run'])['cost'].sum() +total_input_cost_summarized = summarize_cost_data(total_input_cost.unstack(level='run')) + +def find_difference_relative_to_comparison(_ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return _ser \ + .unstack(level=0) \ + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ + .drop(columns=([comparison] if drop_comparison else [])) \ + .stack() + +incremental_scenario_cost = (pd.DataFrame( + find_difference_relative_to_comparison( + total_input_cost, + comparison=0) # sets the comparator to 0 which is the Actual scenario +).T.iloc[0].unstack()).T + +# First summarize all input costs +input_costs_for_plot_summarized = input_costs.groupby(['draw', 'year', 'cost_subcategory', 'Facility_Level', + 'cost_subgroup', 'cost_category']).agg( + mean=('cost', 'mean'), + lower=('cost', lambda x: x.quantile(0.025)), + upper=('cost', lambda x: x.quantile(0.975)) +).reset_index() +input_costs_for_plot_summarized = input_costs_for_plot_summarized.melt( + id_vars=['draw', 'year', 'cost_subcategory', 'Facility_Level', 'cost_subgroup', 'cost_category'], + value_vars=['mean', 'lower', 'upper'], + var_name='stat', + value_name='cost' +) + +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'all', +_disaggregate_by_subgroup = False, _outputfilepath = Path(g_path), +_scenario_dict = scen_draws) + +do_stacked_bar_plot_of_cost_by_category(_df = input_costs_for_plot_summarized, _cost_category = 'medical consumables', +_disaggregate_by_subgroup = False, _outputfilepath = Path(g_path), +_scenario_dict = scen_draws) + +# ===================================================== OTHERS ======================================================== + +# 'htn'/'dm' + + + +# 'dm' + +# 'hiv' + +# 'tb' + +# 'mal' + +# 'fp' + +# 'anc' + +# 'pnc' + +# 'epi' + diff --git a/src/scripts/service_integration/final_service_integration_scenario.py b/src/scripts/service_integration/final_service_integration_scenario.py new file mode 100644 index 0000000000..d56281e6a7 --- /dev/null +++ b/src/scripts/service_integration/final_service_integration_scenario.py @@ -0,0 +1,87 @@ +from tlo import Date, logging + +from tlo.methods import service_integration +from tlo.methods.fullmodel import fullmodel + +from tlo.scenario import BaseScenario + + +class ServiceIntegrationScenario(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 537184 + self.start_date = Date(2010, 1, 1) + self.end_date = Date(2055, 1, 1) + self.pop_size = 150_000 + self.number_of_draws = 28 + self.runs_per_draw = 10 + + def log_configuration(self): + return { + 'filename': 'service_integration_scenario', 'directory': './outputs', + "custom_levels": { + "*": logging.WARNING, + "tlo.methods.contraception": logging.INFO, + "tlo.methods.cardio_metabolic_disorders": logging.INFO, + "tlo.methods.demography": logging.INFO, + "tlo.methods.depression": logging.INFO, + "tlo.methods.epilepsy": logging.INFO, + "tlo.methods.hiv": logging.INFO, + "tlo.methods.healthsystem.summary": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + "tlo.methods.tb": logging.INFO, + "tlo.methods.labour": logging.INFO, + "tlo.methods.labour.detail": logging.INFO, + "tlo.methods.newborn_outcomes": logging.INFO, + "tlo.methods.care_of_women_during_pregnancy": logging.INFO, + "tlo.methods.pregnancy_supervisor": logging.INFO, + "tlo.methods.postnatal_supervisor": logging.INFO, + "tlo.methods.stunting": logging.INFO, + } + } + + def modules(self): + return [*fullmodel(), + service_integration.ServiceIntegration()] + + def draw_parameters(self, draw_number, rng): + + params_all = {'ServiceIntegration':{'integration_year': 2025}} + params_oth ={1: {'serv_integration': 'htn'}, + 2: {'serv_integration': 'htn_max'}, + 3: {'serv_integration': 'dm'}, + 4: {'serv_integration': 'dm_max'}, + 5: {'serv_integration': 'hiv'}, + 6: {'serv_integration': 'hiv_max'}, + 7: {'serv_integration': 'tb'}, + 8: {'serv_integration': 'tb_max'}, + 9: {'serv_integration': 'mal'}, + 10: {'serv_integration': 'mal_max'}, + 11: {'serv_integration': 'fp_scr'}, + 12: {'serv_integration': 'fp_scr_max'}, + 13: {'serv_integration': 'anc'}, + 14: {'serv_integration': 'anc_max'}, + 15: {'serv_integration': 'pnc'}, + 16: {'serv_integration': 'pnc_max'}, + 17: {'serv_integration': 'fp_pn'}, + 18: {'serv_integration': 'fp_pn_max'}, + 19: {'serv_integration': 'epi'}, + 20: {'serv_integration': 'chronic_care'}, + 21: {'serv_integration': 'chronic_care_max'}, + 22: {'serv_integration': 'all_screening'}, + 23: {'serv_integration': 'all_screening_max'}, + 24: {'serv_integration': 'all_mch'}, + 25: {'serv_integration': 'all_mch_max'}, + 26: {'serv_integration': 'all_int'}, + 27: {'serv_integration': 'all_int_max'}, + } + + if draw_number == 0: + return params_all + else: + params_all['ServiceIntegration'].update(params_oth[draw_number]) + return params_all + +if __name__ == '__main__': + from tlo.cli import scenario_run + scenario_run([__file__]) diff --git a/src/scripts/service_integration/service_integration_analyses.py b/src/scripts/service_integration/service_integration_analyses.py new file mode 100644 index 0000000000..928e8d7527 --- /dev/null +++ b/src/scripts/service_integration/service_integration_analyses.py @@ -0,0 +1,1731 @@ +# === Standard Library === +import os +import datetime +from pathlib import Path +from collections import Counter, defaultdict + +# === Third-Party Packages === +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +import scipy.stats as st + +from scripts.epi.analysis_epi import fontsize +# === Local / Project-Specific Imports === +from tlo import Date +from tlo.analysis.utils import ( + bin_hsi_event_details, extract_results, extract_params, + get_scenario_outputs, compute_summary_statistics, make_age_grp_types +) +from tlo.analysis.life_expectancy import get_life_expectancy_estimates +from src.scripts.costing.cost_estimation import ( + estimate_input_cost_of_scenarios, summarize_cost_data +) + +plt.style.use('seaborn-v0_8') + +# Get results folder +resourcefilepath = Path("./resources") +outputspath = './outputs/sejjj49@ucl.ac.uk/' +scenario = 'service_integration_scenario-2025-07-01T144012Z' +results_folder= get_scenario_outputs(scenario, outputspath)[-1] + +# Create a dict of {run: 'scenario'} from the updated parameters +params = extract_params(results_folder) +subset = params[params['module_param'] == ('ServiceIntegration:serv_integration')] +p_dict = subset.drop(columns='module_param').to_dict() +scen_draws = p_dict['value'] + +# create output folder for graphs +def make_folder(path): + folder_path = path + if not os.path.exists(folder_path): + os.makedirs(folder_path) + return folder_path + +g_path = make_folder(f'{outputspath}graphs_{scenario}_final') + +# create a dict with proper labels for each scenario +full_lab = {'htn':'Hypertension screening', + 'htn_max': 'Hypertension screening (max. cons)', + 'dm': 'Diabetes screening', + 'dm_max': 'Diabetes screening (max. cons)', + 'hiv': 'HIV screening', + 'hiv_max': 'HIV screening (max. cons)', + 'tb': 'Tb screening', + 'tb_max':'Tb screening (max. cons)', + 'mal':'Malnutrition screening', + 'mal_max':'Malnutrition screening (max. cons)', + 'fp_scr':'Family planning (WRA)', + 'fp_scr_max':'Family planning (WRA) (max. cons)', + 'anc': 'Antenatal care', + 'anc_max': 'Antenatal care (max.cons)', + 'pnc':'Postnatal care', + 'pnc_max':'Postnatal care (max. cons)', + 'fp_pn': 'Family planning (postnatal)', + 'fp_pn_max':'Family planning (postnatal) (max. cons)', + 'epi': 'EPI', + 'chronic_care': 'Chronic care services', + 'chronic_care_max': 'Chronic care services (max. cons)', + 'all_screening': 'All screening', + 'all_screening_max':'All screening (max. cons)', + 'all_mch': 'MCH services', + 'all_mch_max': 'MCH services (max. cons)', + 'all_int': 'All services', + 'all_int_max': 'All services (max. cons)'} + +# -------------------------------------- HELPER FUNCTIONS ------------------------------------------------------------ +def get_dalys_by_period_sex_agegrp_label(df): + """Sum the dalys by period, sex, age-group and label""" + df['age_grp'] = df['age_range'].astype(make_age_grp_types()) + df = df.drop(columns=['date', 'age_range', 'sex']) + df = df.groupby(by=["year", "age_grp"]).sum().stack() + df.index = df.index.set_names('label', level=2) + return df + +def get_diff(df, pdiff): + """Returns summary statistics of either crude difference or percentage difference from SQ scenario""" + diff = df.copy() + for col in df.columns: + if col[0] != 0: + base_col = (0, col[1]) + if not pdiff: + # Get corresponding (0, col[1]) for comparison + diff[col] = df[base_col] - df[col] + else: + diff[col] = ((df[base_col] - df[col]) / df[base_col]) * 100 + else: + diff[col] = 0 # or np.nan if you prefer + + diff_sum = compute_summary_statistics(diff, use_standard_error=True) + return diff_sum + +def get_full_pop(): + def get_pop_by_agegrp_label(df): + """Sum the dalys by period, sex, age-group and label""" + df['year'] = df['date'].dt.year + df_melted = df.melt(id_vars=['year'], value_vars=[col for col in df.columns if col not in ['date', 'year']], + var_name='age_group', value_name='count') + series_multi = df_melted.set_index(['year', 'age_group'])['count'].sort_index() + + return series_multi + + + pop_f = extract_results( + results_folder, + module="tlo.methods.demography", + key="age_range_f", + custom_generate_series=get_pop_by_agegrp_label, + do_scaling=True + ) + + pop_m = extract_results( + results_folder, + module="tlo.methods.demography", + key="age_range_m", + custom_generate_series=get_pop_by_agegrp_label, + do_scaling=True + ) + + pop = pop_f + pop_m + return pop + + +def age_standardize_dalys(dalys_df): + """Age-standardizes DALYs across draws using the population size from the Status Quo scenario""" + + pop = get_full_pop() + + # ensure population and dalys dataframe indexes match + if len(dalys_df.index.levels) == 3: + pop_df = pop.reindex(dalys_df.index) + else: + pop_df = pop.reindex(dalys_df.index) + + # Define the 'reference population' - draw 0 is the status quo + base_level = 0 + subset_cols = pop_df.columns.get_level_values(0) == base_level + base_columns = pop_df.columns[subset_cols] + + # Drop rows without data + pop_df = pop_df.drop(TARGET_PERIOD[1].year + 1, errors='ignore') + dalys_df = dalys_df.drop(TARGET_PERIOD[1].year + 1, errors='ignore') + + # check indexes are the same + assert set(pop_df.index) == set(dalys_df.index) + + # Calculate dalys per person per age group across all draws + dalys_per_person = dalys_df / pop_df + dalys_per_person = dalys_per_person.fillna(0) + dalys_age_standardized = dalys_per_person.copy() + + # Loop over each top-level column index + for level in sorted(set(dalys_age_standardized.columns.get_level_values(0))): + + # Shift base_columns to this new level + new_columns = [(level, col[1]) for col in base_columns] + + # Ensure these columns exist in both a and result + if all(col in dalys_age_standardized.columns for col in new_columns): + + # Multiply corresponding columns + dalys_age_standardized.loc[:, new_columns] = (dalys_age_standardized.loc[:, new_columns].values + * pop_df.loc[:, base_columns].values) + + # remove any NaN/inf values + dalys_age_standardized.fillna(0, inplace=True) + dalys_age_standardized.replace([np.inf, -np.inf], 0, inplace=True) + + return dalys_age_standardized + +def compute_service_statistics(counters_by_draw_and_run): + """Returns summary statistics for total HSI counts and difference in HSI counts from the SQ scenario""" + + grouped_data = defaultdict(lambda: defaultdict(list)) + + # Group counts by first key and service name + for (group_idx, _), counter in counters_by_draw_and_run.items(): + for service_name, count in counter.items(): + grouped_data[group_idx][service_name].append(count) + + data_df = pd.DataFrame.from_dict(grouped_data) + + def safe_sum_lists(series): + # Filter out non-list values (like float/NaN) + valid_lists = [x for x in series if isinstance(x, list)] + if not valid_lists: + return np.nan # or return [0]*length if you want default + return [sum(items) for items in zip(*valid_lists)] + + def p_diff_from_col0(df): + # Calculates difference from the status quo + def diff_row(row): + base = row[0] + result = {} + for col in df.columns: + if col == 0: + result[col] = np.nan # or keep base if needed + else: + val = row[col] + if not isinstance(base, list) or not isinstance(val, list): + result[col] = np.nan + else: + result[col] = [((v - b) / b) * 100 if b != 0 else np.nan for v, b in zip(val, base)] + return pd.Series(result) + + return df.apply(diff_row, axis=1) + + # Run the functioion - apply to entire DataFrame grouped by level + appt_type = data_df.groupby(level=1).agg(safe_sum_lists) + diff_appt_type = p_diff_from_col0(appt_type) + + width_of_range = 0.95 + + def summarize_list(cell): + # Calculate mean/CIs (inline with other estimates) + arr = np.array(cell) + n = arr.size + std_deviation = arr.std() + std_error = std_deviation / np.sqrt(n) + z_value = st.norm.ppf(1 - (1. - width_of_range) / 2.) + + mean = float(np.mean(arr)) + + return { + "mean": mean, + "lower": mean - z_value * std_error, + "upper": mean + z_value * std_error, + } + + # Apply to every cell in the DataFrame + appt_type_summ = appt_type.applymap(summarize_list) + diff_appt_type_summ = diff_appt_type.applymap(summarize_list) + + return appt_type_summ, diff_appt_type_summ + +def reform_df_to_save(df, dp): + """Reformats a dataframe to be saved as CSV""" + new_df = pd.DataFrame() + + for scenario in df.columns.levels[0]: + central = round(df[(scenario, 'central')].values[0], dp) + lower = round(df[(scenario, 'lower')].values[0], dp) + upper = round(df[(scenario, 'upper')].values[0], dp) + if scenario == 0: + col = 'sq' + else: + col = scen_draws[scenario] + new_df[col] = [(central, lower, upper)] + + return new_df + +# Define variables for plotting + +scenario_groups = { + 'Integrated screening': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 22, 23], + 'MCH clinic': [13, 14, 15, 16, 17, 18, 19, 24, 25], + 'Chronic care clinic': [20, 21], + 'Combined': [26, 27] + } + +# extract data +scenario_groups_pathways_ = { + 'Integrated screening': [22, 23], + 'MCH clinic': [24, 25], + 'Chronic care clinic': [20, 21], + 'Combined': [26, 27] +} + +# === Flatten scenarios in the order you want +ordered_scenario_ids = [] +ordered_scenario_ids_pathways = [] +for group in ['Integrated screening', 'MCH clinic', 'Chronic care clinic', 'Combined']: + ordered_scenario_ids.extend(scenario_groups[group]) + ordered_scenario_ids_pathways.extend(scenario_groups_pathways_[group]) + +groupings = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [22, 23], [13, 14], [15, 16], [17, 18], [19], [24, 25], + [20, 21], [26, 27]] + +# Colour pallet +n_scenarios_total = len(ordered_scenario_ids) +palette = sns.color_palette("husl", n_colors=n_scenarios_total) +step = 10 +spread_indices = [(i * step) % n_scenarios_total for i in range(n_scenarios_total)] +spread_palette = [palette[i] for i in spread_indices] +color_map_full = {sc: spread_palette[i] for i, sc in enumerate(ordered_scenario_ids)} +color_map_pathways = {sc: color_map_full[sc] for sc in ordered_scenario_ids_pathways} + +# -------------------------------------------------- ANALYSIS --------------------------------------------------------- +# Define target period +TARGET_PERIOD = (Date(2025, 1, 1), Date(2054, 12, 31)) + +# ========================================= TOTAL DALYS AVERTED BY SCENARIO =========================================== + +# get DALY df +dalys_by_age_date_and_cause = extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked_by_age_and_time", # <-- for DALYS stacked by age and time + custom_generate_series=get_dalys_by_period_sex_agegrp_label, + do_scaling=True + ) +dalys_by_age_date_and_cause.index = dalys_by_age_date_and_cause.index.set_names('age_group', level=1) + +# Get the total dalys by year and then the total dalys in the target period +dalys_by_age_date = dalys_by_age_date_and_cause.groupby(by=["year", "age_group"]).sum() +dalys_non_standardized_year = dalys_by_age_date.groupby(by='year').sum() +total_dalys_non_standardized = dalys_non_standardized_year.loc[ + TARGET_PERIOD[0].year:TARGET_PERIOD[-1].year].sum().to_frame().T + +# Get summary statistics for total DALYs (not age adjusted) +total_dalys_non_standardized_summ = compute_summary_statistics(total_dalys_non_standardized, use_standard_error=True) + +# calculate the total dalys averted when compared to the status quo +diff_total_dalys_non_standardized = get_diff(total_dalys_non_standardized, False) + +# calculate the total dalys averted when compared to the status quo by year +diff_dalys_by_year = get_diff(dalys_non_standardized_year, False) + +# Save the total DALYs by scenario and the difference from SQ +total_dalys_to_save = reform_df_to_save(total_dalys_non_standardized_summ, 0) +total_dalys_to_save.to_csv(f'{g_path}/total_dalys_int_period_summ.csv') +diff_total_dalys_to_save = reform_df_to_save(diff_total_dalys_non_standardized, 0) +diff_total_dalys_to_save.to_csv(f'{g_path}/total_dalys_int_period_summ_diff.csv') + +# Now repeat the above process but we age standardize the dalys +total_dalys_age_standardized = age_standardize_dalys(dalys_by_age_date) +dalys_age_standardized_year = total_dalys_age_standardized.groupby(by='year').sum() +total_dalys_age_standardized_yr_int = dalys_age_standardized_year.loc[ + TARGET_PERIOD[0].year:TARGET_PERIOD[-1].year].sum().to_frame().T +total_age_standardized_dalys_summ = compute_summary_statistics(total_dalys_age_standardized_yr_int, + use_standard_error=True) + +diff_age_standardized_dalys_sum = get_diff(total_dalys_age_standardized_yr_int, False) +diff_dalys_age_standardized_by_year = get_diff(dalys_age_standardized_year, False) + +total_age_standardized_dalys_to_save = reform_df_to_save(total_age_standardized_dalys_summ, 0) +total_age_standardized_dalys_to_save.to_csv(f'{g_path}/total_stnd_dalys_int_period_summ.csv') +diff_age_standardized_dalys_to_save = reform_df_to_save(diff_age_standardized_dalys_sum, 0) +diff_age_standardized_dalys_to_save.to_csv(f'{g_path}/total_stnd_dalys_int_period_summ_diff.csv') + +p_diff_age_standardized_dalys_sum = get_diff(total_dalys_age_standardized_yr_int, True) +p_diff_age_standardized_dalys_sum_to_save = reform_df_to_save(p_diff_age_standardized_dalys_sum, 2) +p_diff_age_standardized_dalys_sum_to_save.to_csv(f'{g_path}/total_stnd_dalys_int_period_summ_percent_diff.csv') + + +def figure_total_dalys_averted_by_scenario_with_uncertainty(data, age_standardized): + """Outputs an annotated bar graph showing the mean total DALYs averted by scenario along with 95% confidence + intervals""" + + """Outputs an annotated bar graph showing the mean total DALYs averted by scenario along with 95% confidence intervals""" + color_map = color_map_full + + # === Extract data in the new order + labels = [full_lab[scen_draws[sc]] for sc in ordered_scenario_ids] + mean = [float(data[sc, 'central'].values) for sc in ordered_scenario_ids] + lower_errors = [float(data[sc, 'lower'].values) for sc in ordered_scenario_ids] + upper_errors = [float(data[sc, 'upper'].values) for sc in ordered_scenario_ids] + + # === Compute error margins + yerr_lower = [med - low for med, low in zip(mean, lower_errors)] + yerr_upper = [up - med for med, up in zip(mean, upper_errors)] + + # === Create bar chart + fig, ax = plt.subplots(figsize=(12, 6)) + + # === Draw dotted lines between groups + index_map = {sc: i for i, sc in enumerate(ordered_scenario_ids)} + for g in groupings[:-1]: # skip last group + last_sid = g[-1] + ix = index_map[last_sid] + ax.axvline(ix + 0.5, color='black', linestyle=':', linewidth=0.5) + + # === Plot bars + bar_colors = [color_map[sc] for sc in ordered_scenario_ids] + bars = ax.bar(labels, mean, yerr=[yerr_lower, yerr_upper], + capsize=5, alpha=0.7, ecolor='black', color=bar_colors) + + # === Add horizontal line at zero + ax.axhline(0, color='gray', linestyle='--', linewidth=1) + + # === Annotate values on bars + offset = max(upper_errors) * 0.05 # consistent vertical offset + for bar, value in zip(bars, mean): + height = bar.get_height() + scaled_val = value / 1e6 # display in millions + if height >= 0: + ax.text(bar.get_x() + bar.get_width() / 2, + height + offset, + f'{scaled_val:.1f}M', + ha='center', va='bottom', fontsize=8) + else: + ax.text(bar.get_x() + bar.get_width() / 2, + height - offset, + f'{scaled_val:.1f}M', + ha='center', va='top', fontsize=6) + + # === Axis and formatting + ax.set_ylabel('DALYs Averted', fontsize=9) + plt.xticks(fontsize=8, rotation=90) + plt.tight_layout() + + # === Save output + plt.savefig(f'{g_path}/total_dalys_averted_by_scenario.png' if not age_standardized else + f'{g_path}/total_age_standardized_dalys_averted_by_scenario.png', + bbox_inches='tight') + plt.show() + + +figure_total_dalys_averted_by_scenario_with_uncertainty(data=diff_age_standardized_dalys_sum, + age_standardized=True) + +def figure_total_dalys_averted_by_scenario_by_time_period(data, age_standardized): + """Outputs plot showing total DALYs averted during three time periods across all scenarios""" + + # Define time periods + p1 = [2025, 2034] + p2 = [2035, 2044] + p3 = [2045, 2055] + + def get_data_for_time_period(tp): + data_tp = data.loc[tp[0]:tp[-1]].sum().to_frame().T + mean_tp = [float(data_tp[v, 'central'].values) for v in scen_draws.keys()] + lower_tp = [float(data_tp[v, 'lower'].values) for v in scen_draws.keys()] + upper_tp = [float(data_tp[v, 'upper'].values) for v in scen_draws.keys()] + + return mean_tp, lower_tp, upper_tp + + # Extract data + data_p1 = get_data_for_time_period(p1) + data_p2 = get_data_for_time_period(p2) + data_p3 = get_data_for_time_period(p3) + + # Time period labels and scenario order + period_labels = ['2025–34', '2035–244', '2045–55'] + scenarios = list(scen_draws.keys()) + + # Extract data from your function + mean_p1, lower_p1, upper_p1 = data_p1 + mean_p2, lower_p2, upper_p2 = data_p2 + mean_p3, lower_p3, upper_p3 = data_p3 + + # Prepare data dicts + data_mean = { + scenario: [mean_p1[i], mean_p2[i], mean_p3[i]] + for i, scenario in enumerate(scenarios) + } + data_err_lower = { + scenario: [mean_p1[i] - lower_p1[i], mean_p2[i] - lower_p2[i], mean_p3[i] - lower_p3[i]] + for i, scenario in enumerate(scenarios) + } + data_err_upper = { + scenario: [upper_p1[i] - mean_p1[i], upper_p2[i] - mean_p2[i], upper_p3[i] - mean_p3[i]] + for i, scenario in enumerate(scenarios) + } + + # color map + color_map = color_map_full + + # === Define scenario pairs to plot together === + scenario_groups = groupings # adjust as needed + + groups_per_figure = 8 + group_batches = [scenario_groups[:groups_per_figure], scenario_groups[groups_per_figure:]] + + for fig_idx, group_set in enumerate(group_batches): + n_cols = 2 + n_rows = -(-len(group_set) // n_cols) + + fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 3), sharey=True) + axes = axes.flatten() + + for i, group in enumerate(group_set): + ax = axes[i] + + for scenario in group: + means = data_mean[scenario] + yerr = [data_err_lower[scenario], data_err_upper[scenario]] + + ax.errorbar( + period_labels, + means, + yerr=yerr, + fmt='-o', + color=color_map[scenario], + capsize=3, + linewidth=1.5, + markersize=5, + label=full_lab[scen_draws[scenario]] + ) + + ax.axhline(0, color='gray', linewidth=0.5, linestyle='--') + title_labels = " vs ".join([full_lab[scen_draws[sc]] for sc in group]) + ax.set_title(title_labels, fontsize=9) + + row_idx = i // n_cols + is_bottom_row = row_idx == n_rows - 1 + if is_bottom_row: + ax.set_xticklabels(period_labels) + else: + ax.set_xticklabels([]) + + ax.legend(fontsize=8) + + # Remove unused axes + for j in range(len(group_set), len(axes)): + fig.delaxes(axes[j]) + + fig.tight_layout(rect=[0.05, 0, 1, 0.95]) + fig.text(0.04, 0.5, 'DALYs averted (millions)', va='center', rotation='vertical', fontsize=12) + + # Save with figure index + filename = f"{g_path}/dalys_averted_scenario_groups_fig{fig_idx + 1}.png" + if age_standardized: + filename = f"{g_path}/age_standardized_dalys_averted_scenario_groups_fig{fig_idx + 1}.png" + + plt.savefig(filename, bbox_inches='tight') + plt.show() + + +# Output plots for non age-standardized and age-standardized DALYs +# figure_total_dalys_averted_by_scenario_by_time_period(data=diff_dalys_by_year, age_standardized=False) +figure_total_dalys_averted_by_scenario_by_time_period(data=diff_dalys_age_standardized_by_year, age_standardized=True) + +# ================================== CAUSE-SPECIFIC DALYS AVERTED BY SCENARIO ======================================== + +# Sum the DALYs by cause across the target period +dalys_by_year_cause = dalys_by_age_date_and_cause.groupby(by=["year", "label"]).sum() +dalys_by_year_cause_int_period = dalys_by_year_cause.loc[TARGET_PERIOD[0].year:TARGET_PERIOD[-1].year] +# get the total DALYs by cause 2025-2055 +total_dalys_by_year_cause = dalys_by_year_cause_int_period.groupby('label').sum() + +# Get the summary statistics for cause-specific DALYs and the difference from SQ +total_dalys_by_year_cause_summ = compute_summary_statistics(total_dalys_by_year_cause, use_standard_error=True) +diff_dalys_by_cause = get_diff(total_dalys_by_year_cause, False) + +# Save data +total_dalys_by_year_cause_summ.to_csv(f'{g_path}/cause_specific_dalys_int_period_summ.csv') +diff_dalys_by_cause.to_csv(f'{g_path}/cause_specific_dalys_int_period_summ_diff.csv') + +# Now get the difference in dalys by cause for each year of the target period +diff_dalys_by_cause_year = get_diff(dalys_by_year_cause_int_period, False) +diff_dalys_by_cause_year.to_csv(f'{g_path}/cause_specific_dalys_by_year_int_period_summ_diff.csv') + +# Repeat this process with age-standardized DALYs +dalys_by_cause_age_year_standardize = age_standardize_dalys(dalys_by_age_date_and_cause) +d_by_cause_int_period = dalys_by_cause_age_year_standardize.loc[TARGET_PERIOD[0].year:TARGET_PERIOD[-1].year] +dalys_by_cause_age_standardize = d_by_cause_int_period.groupby(level='label').sum() +total_dalys_by_cause_age_standardize_summ = compute_summary_statistics(dalys_by_cause_age_standardize, + use_standard_error=True) +diff_dalys_by_cause_age_standardize = get_diff(dalys_by_cause_age_standardize, False) + +total_dalys_by_year_cause_summ.to_csv(f'{g_path}/cause_specific_stnd_dalys_int_period_summ.csv') +diff_dalys_by_cause.to_csv(f'{g_path}/cause_specific_stnd_dalys_int_period_summ_diff.csv') + +diff_dalys_by_cause_year_age_standardize = get_diff(d_by_cause_int_period, False) +diff_dalys_by_cause_year_age_standardize.to_csv(f'{g_path}/cause_specific_stnd_dalys_by_year_int_period_summ_diff.csv') + +def figure_heatmap_cause_specific_dalys_averted(data): + """Outputs a heatmap showing the difference in DALYs """ + data = data.drop(columns=0, level='draw') + + # Extract central, lower, upper from MultiIndex or flat columns + central_df = data.loc[:, data.columns.get_level_values(1) == 'central'] + lower_df = data.loc[:, data.columns.get_level_values(1) == 'lower'] + upper_df = data.loc[:, data.columns.get_level_values(1) == 'upper'] + + # Clean up column names + scenario_labels_in_order = [full_lab[scen_draws[sc]] for sc in ordered_scenario_ids] + + # Rename columns + central_df.columns = [full_lab[scen_draws[col[0]]] for col in central_df.columns] + lower_df.columns = [full_lab[scen_draws[col[0]]] for col in lower_df.columns] + upper_df.columns = [full_lab[scen_draws[col[0]]] for col in upper_df.columns] + + # Reindex to enforce consistent order + central_df = central_df[scenario_labels_in_order] + lower_df = lower_df[scenario_labels_in_order] + upper_df = upper_df[scenario_labels_in_order] + + # STEP 3: Masks + uncertainty_includes_zero = (lower_df <= 0) & (upper_df >= 0) + significant = ~uncertainty_includes_zero + positive = central_df > 0 + significant_positive = significant & positive + + # === Format function + def format_daly(value): + if pd.isna(value): + return "" + abs_val = abs(value) + if abs_val < 1_000: + return f"{int(value)}" + elif abs_val < 1_000_000: + return f"{value / 1_000:.0f}K" + else: + return f"{value / 1_000_000:.1f}M" + + # === Create annotation matrix + annot = central_df.copy().astype(str) + for i in range(central_df.shape[0]): + for j in range(central_df.shape[1]): + if significant_positive.iloc[i, j]: + val = central_df.iloc[i, j] + annot.iloc[i, j] = format_daly(val) + else: + annot.iloc[i, j] = "" + + # === Plotting + vmin = central_df.min().min() + vmax = central_df.max().max() + abs_max = max(abs(vmin), abs(vmax)) + + from matplotlib.colors import TwoSlopeNorm + norm = TwoSlopeNorm(vmin=-abs_max, vcenter=0, vmax=abs_max) + + plt.figure(figsize=(16, 10)) + sns.heatmap( + central_df, + cmap="RdBu", # inverted so blue = higher values + norm=norm, + annot=annot, + fmt="", + linewidths=0.5, + cbar_kws={'label': 'DALY Difference from Status Quo'} + ) + + plt.ylabel("Condition") + plt.xlabel("Scenario") + plt.xticks(rotation=45, ha='right') + plt.tight_layout() + plt.savefig(f'{g_path}/cause_specific_dalys_heatmap.png', bbox_inches='tight') + plt.show() + +figure_heatmap_cause_specific_dalys_averted(diff_dalys_by_cause_age_standardize) + + +# =============================================== LIFE EXPECTANCY =================================================== +# Get life expectancy by scenario in 2054 (last year of sim) +le_estimates = get_life_expectancy_estimates(results_folder=results_folder, + target_period= + (datetime.date(2054, 1, 1), + datetime.date(2054, 12, 31)), + summary=False) +le_estimate_avg = le_estimates.mean(axis=0).to_frame().T + +# calculate difference +le_diff = le_estimate_avg.copy() +for col in le_estimate_avg.columns: + if col[0] != 0: + base_col = (0, col[1]) + + le_diff[col] = le_estimate_avg[col] - le_estimate_avg[base_col] + else: + le_diff[col] = 0 # or np.nan if you prefer + +le_diff = le_diff.drop(columns=0) +le_estimate_avg_diff_summ = compute_summary_statistics(le_diff, use_standard_error=True) + +# Plot LE +# === Extract central, lower, upper for 2055 === +def figure_life_expectancy_difference(data): + + # === Extract central, lower, upper values === + central = data.xs('central', level=1, axis=1).iloc[0] + lower = data.xs('lower', level=1, axis=1).iloc[0] + upper = data.xs('upper', level=1, axis=1).iloc[0] + + # === Reorder using ordered_scenario_ids === + central = central.loc[ordered_scenario_ids] + lower = lower.loc[ordered_scenario_ids] + upper = upper.loc[ordered_scenario_ids] + + # === Significance mask + significant_mask = (lower > 0) | (upper < 0) + + # === Replace non-significant values with 0s (for plotting) + central_plot = central.copy() + yerr_lower = (central - lower).copy() + yerr_upper = (upper - central).copy() + + central_plot[~significant_mask] = 0 + yerr_lower[~significant_mask] = 0 + yerr_upper[~significant_mask] = 0 + yerr = [yerr_lower.values, yerr_upper.values] + + # === Color setup + scenario_ids = central.index.tolist() + color_map = color_map_full + colors = [color_map[s] for s in scenario_ids] + + # === Plotting + x = np.arange(len(scenario_ids)) + fig, ax = plt.subplots(figsize=(12, 5)) + + bars = ax.bar( + x, + central_plot.values, + yerr=yerr, + capsize=5, + color=colors, + edgecolor='black' + ) + + # === Annotate only significant bars + offset = 0.02 * max(abs(upper.max()), 1e-6) + for xpos, is_sig, mean_val, hi, lo in zip(x, significant_mask, central.values, upper.values, lower.values): + if is_sig: + y = hi + offset if mean_val >= 0 else lo - offset + ax.text( + xpos, + y, + f"{mean_val:.2f}", + ha='center', + va='bottom' if mean_val >= 0 else 'top', + fontsize=8, + fontweight='bold' + ) + + # Map all scenario positions + index_map = {s: i for i, s in enumerate(scenario_ids)} + + # Draw line between groups only if both scenarios are present + for g in groupings[:-1]: + present = [s for s in g if s in index_map] + if present: + ix = index_map[present[-1]] + ax.axvline(ix + 0.5, color='black', linestyle=':', linewidth=0.5) + + # === Axis & aesthetics + ax.axhline(0, color='gray', linestyle='--', linewidth=1) + ax.set_xticks(x) + ax.set_xticklabels([full_lab[scen_draws[s]] for s in scenario_ids], rotation=90, fontsize=7) + ax.set_ylabel('Difference in Life Expectancy (2054, years)') + + plt.tight_layout() + plt.savefig(f'{g_path}/life_expectancy_diff_2054.png', bbox_inches='tight') + plt.show() + +figure_life_expectancy_difference(le_estimate_avg_diff_summ) + +# ===================================== DIFFERENCE IN HCW TIME USE BY SCENARIO ======================================== +# Output HCW time use by treatment_id +appointment_time_table = pd.read_csv( + resourcefilepath + / 'healthsystem' + / 'human_resources' + / 'definitions' + / 'ResourceFile_Appt_Time_Table.csv', + index_col=["Appt_Type_Code", "Facility_Level", "Officer_Category"] +) + +appt_type_facility_level_officer_category_to_appt_time = ( + appointment_time_table.Time_Taken_Mins.to_dict() +) + +officer_categories = appointment_time_table.index.levels[ + appointment_time_table.index.names.index("Officer_Category") +].to_list() + +hcw_time_by_treatment_id = bin_hsi_event_details( + results_folder, + lambda event_details, count: sum( + [ + Counter({ + ( + officer_category, + event_details["treatment_id"] + ): + count + * appt_number + * appt_type_facility_level_officer_category_to_appt_time.get( + ( + appt_type, + event_details["facility_level"], + officer_category + ), + 0 + ) + for officer_category in officer_categories + }) + for appt_type, appt_number in event_details["appt_footprint"] + ], + Counter() + ), + *TARGET_PERIOD, + True + ) + +# Calculate average change in population size per year in the SQ scenario +pop = get_full_pop() +pop = pop.groupby(by='year').sum() +relative_increase_df = pop.pct_change() +avg_rel_increase = relative_increase_df.loc[2025:2054].mean(axis=0).to_frame().T +avg_rel_increase_summ = compute_summary_statistics(avg_rel_increase, use_standard_error=True) + +# Read in HCW capabilities data and sum across facility levels etc. +daily_cap = pd.read_csv('./resources/healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv') +daily_mins = daily_cap.set_index('Officer_Category')[['Total_Mins_Per_Day']] +daily_mins = daily_mins.drop('Dental') +daily_mins = daily_mins.drop('Nutrition') +daily_mins = daily_mins.groupby(daily_mins.index).sum() + +# Next we calculate the average HCW capabilities assuming capabilities increase yearly in line with population growth +yearly_mins = daily_mins * 365.25 +yearly_mins.rename(columns={'Total_Mins_Per_Day': 'mins_per_year'}, inplace=True) + +value = 1 + avg_rel_increase_summ[(0, 'central')].values +n_times = 30 # number of times to multiply (represents number of years in the intervention period) +steps = [(yearly_mins * (value ** i)) for i in range(n_times + 1)] +avg_annual_hcw_capabilities = sum(steps) / len(steps) +avg_annual_hcw_capabilities_sum = avg_annual_hcw_capabilities.sum(axis=0).to_frame().T + +# --------------------------------------------- PLOT TOTAL HCW TIME RATIO ------------------------------------------ +# Next we calculate the total HCW time use +hcw_time_by_treatment_id_df = pd.DataFrame.from_dict(hcw_time_by_treatment_id) +hcw_time_by_treatment_id_df = hcw_time_by_treatment_id_df.fillna(0) +hcw_time_by_treatment_id_df.index.names = ['first', 'second'] + +# Now we calculate the demand of HCW time by cadre relative to yearly HCW capabilities (adjusted for predicted +# population growth) + +def get_hcw_time_use_ratios(df): + """Get HCW time used/time available - compare that to the status QUO""" + def get_hcw_time_fold_diff(df_for_diff): + diff = df_for_diff.copy() + for col in df_for_diff.columns: + if col[0] != 0: + base_col = (0, col[1]) + diff[col] = df_for_diff[col] / df_for_diff[base_col] + else: + diff[col] = 1 + return diff + + # Calculate total and annual HCW time during the intervention period + total_hcw_time = df.sum(axis=0).to_frame().T + yearly_total_hcw_time = total_hcw_time / 30 + + # Divide total annual HCW time by total annual HCW time capabilities + total_hcw_time_ratio = yearly_total_hcw_time.div(avg_annual_hcw_capabilities_sum.iloc[:, 0], axis=0) + total_hcw_time_ratio.columns.names = ['draw', 'run'] + + # Find the fold change from the Status Quo scenario + fold_change = get_hcw_time_fold_diff(total_hcw_time_ratio) + total_hcw_time_ratio_fc = compute_summary_statistics(fold_change, use_standard_error=True) + + hcw_time_by_cadre = df.groupby(level='first').sum() + annual_hcw_time_by_cadre = hcw_time_by_cadre / 30 + + # Now we calculate the ratio of time use to time available (by cadre) and summarise it + hcw_time_ratio_by_cadre = annual_hcw_time_by_cadre.div(avg_annual_hcw_capabilities.iloc[:, 0], axis=0) + hcw_time_ratio_by_cadre.columns.names = ['draw', 'run'] + + # Now we get the diff from the SQ + fold_change_cadre = get_hcw_time_fold_diff(hcw_time_ratio_by_cadre) + hcw_time_ratio_by_cadre_fc = compute_summary_statistics(fold_change_cadre, use_standard_error=True) + + # We also return the ratios for all scenarios across cadres (not the difference from the SQ) + hcw_time_ratio_by_cadre_summ = compute_summary_statistics(hcw_time_ratio_by_cadre, use_standard_error=True) + + return [total_hcw_time_ratio_fc, hcw_time_ratio_by_cadre_fc, hcw_time_ratio_by_cadre_summ] + +hcw_ratios_unadjusted = get_hcw_time_use_ratios(hcw_time_by_treatment_id_df) + + +def figure_annual_hcw_time_use_over_annual_capabilities(data, tol=1e-9): + + # --- Extract in requested order + scenario_ids = ordered_scenario_ids + labels_all = [full_lab[scen_draws[sc]] for sc in scenario_ids] + mean_all = [float(data.loc[:, (sc, 'central')].iloc[0]) for sc in scenario_ids] + lo_all = [float(data.loc[:, (sc, 'lower')].iloc[0]) for sc in scenario_ids] + up_all = [float(data.loc[:, (sc, 'upper')].iloc[0]) for sc in scenario_ids] + + # --- Normalise bounds + ci_low_all = [min(lo, up) for lo, up in zip(lo_all, up_all)] + ci_up_all = [max(lo, up) for lo, up in zip(lo_all, up_all)] + + # --- Check which bars have CI crossing 1 + crosses_one = [(l <= 1 + tol) and (u >= 1 - tol) for l, u in zip(ci_low_all, ci_up_all)] + + # --- Fixed x positions for ALL scenarios + x_all = np.arange(len(scenario_ids)) + + # --- Error distances for all bars + yerr_lower = [max(0.0, m - l) for m, l in zip(mean_all, ci_low_all)] + yerr_upper = [max(0.0, u - m) for m, u in zip(mean_all, ci_up_all)] + + # --- Color mapping (consistent across full set) + scenarios_except_0 = [s for s in scenario_ids if s != 0] + color_map_full = {s: spread_palette[i] for i, s in enumerate(scenarios_except_0)} + if 0 in scenario_ids: + color_map_full[0] = palette[-1] + colors_all = [color_map_full[s] for s in scenario_ids] + + # --- Plot + fig, ax = plt.subplots() + + bars = ax.bar(x_all, mean_all, yerr=[yerr_lower, yerr_upper], + capsize=5, alpha=0.7, ecolor='black', color=colors_all) + + # --- Annotations + max_up_err = max(yerr_upper) if any(yerr_upper) else (0.05 * max(abs(m) for m in mean_all)) + offset = max_up_err * 0.5 if max_up_err > 0 else 0.05 + for bar, value, cross in zip(bars, mean_all, crosses_one): + h = bar.get_height() + # Format: 3 dp for <1, 2 dp for >=1 + txt = f"{value:.3f}" if value < 1 else f"{value:.2f}" + if cross: + txt += "*" # add asterisk if CI includes 1 + if h >= 0: + ax.text(bar.get_x() + bar.get_width()/2, h + offset, txt, + ha='center', va='bottom', fontsize=7) + else: + ax.text(bar.get_x() + bar.get_width()/2, h - offset, txt, + ha='center', va='top', fontsize=7) + + # --- X ticks + ax.set_xticks(x_all) + ax.set_xticklabels(labels_all, fontsize=7, rotation=90) + + # --- Dotted lines between defined groups, using full set positions + index_map_full = {s: i for i, s in enumerate(scenario_ids)} + for g in groupings[:-1]: + present = [s for s in g if s in index_map_full] + if present: + ix = index_map_full[present[-1]] + ax.axvline(ix + 0.5, color='black', linestyle=':', linewidth=0.5) + + # --- Reference line and formatting + ax.axhline(y=1, color='grey', linestyle='--', linewidth=1, label='Status Quo = 1') + ax.set_ylabel('Relative change in HCW demand – HCW capabilities compared to SQ', fontsize=9) + + # Legend (baseline line only) + ax.legend(fontsize=8, loc='lower center', bbox_to_anchor=(0.5, 1.02), + ncol=1, frameon=False) + + plt.tight_layout() + plt.savefig(f'{g_path}/hcw_time_ratios.png', bbox_inches='tight') + plt.show() + +figure_annual_hcw_time_use_over_annual_capabilities(hcw_ratios_unadjusted[0]) + +def figure_all_cadre_bar_charts_color_coded(df, tol=1e-9): + + central_df = df.xs('central', axis=1, level=1) + lower_df = df.xs('lower', axis=1, level=1) + upper_df = df.xs('upper', axis=1, level=1) + + scenarios = ordered_scenario_ids + color_map = color_map_full + index_map = {s: i for i, s in enumerate(scenarios)} + + cadres = central_df.index + fig, axes = plt.subplots(len(cadres), 1, figsize=(14, 3.5 * len(cadres)), sharex=True) + if len(cadres) == 1: + axes = [axes] + + baseline_handle = None + + for i, cadre in enumerate(cadres): + central = central_df.loc[cadre] + lower = lower_df.loc[cadre] + upper = upper_df.loc[cadre] + + # CI filter + ci_low = np.minimum(lower, upper) + ci_up = np.maximum(lower, upper) + keep_mask = (ci_up < 1 - tol) | (ci_low > 1 + tol) + + # Keep all ticks, NaN for excluded bars + plot_vals = pd.Series(np.nan, index=scenarios) + plot_vals[keep_mask] = central[keep_mask] + + ax = axes[i] + + # Bars only for kept scenarios + for s in scenarios: + if not np.isnan(plot_vals[s]): + ax.bar(index_map[s], plot_vals[s], color=color_map[s]) + + # Annotate kept bars + if keep_mask.any(): + offset = 0.02 * max(abs(np.nanmax(plot_vals)), abs(np.nanmin(plot_vals)), 1e-6) + for s in scenarios: + val = plot_vals[s] + if not np.isnan(val): + va = 'bottom' if val >= 0 else 'top' + txt = f"{val:.3f}" if abs(val) < 1 else f"{val:.2f}" + ax.text( + index_map[s], + val + offset if val >= 0 else val - offset, + txt, + ha='center', + va=va, + fontsize=9, + fontweight='bold' + ) + + # Group dividers + for g in groupings[:-1]: + present = [s for s in g if s in index_map] + if present: + ix = index_map[present[-1]] + ax.axvline(ix + 0.5, color='black', linestyle=':', linewidth=0.5) + + # Baseline line + if i == 0: + baseline_handle = ax.axhline( + y=1, color='grey', linestyle='--', linewidth=1, label='Status Quo = 1' + ) + else: + ax.axhline(y=1, color='grey', linestyle='--', linewidth=1) + + ax.set_title(f"{cadre}") + ax.set_ylabel("") + + # X ticks only on bottom subplot + if i < len(cadres) - 1: + ax.set_xticks([]) + else: + xtick_pos = [index_map[s] for s in scenarios] + xtick_labels = [full_lab[scen_draws[s]] for s in scenarios] + ax.set_xticks(xtick_pos) + ax.set_xticklabels(xtick_labels, rotation=90, fontsize=8) + + # Legend anchored to the top subplot + if baseline_handle is not None: + axes[0].legend( + handles=[baseline_handle], + loc='lower center', + bbox_to_anchor=(0.5, 1.05), # just above the title + frameon=False, + fontsize=9 + ) + + plt.tight_layout(rect=[0.05, 0.02, 1, 0.97]) + fig.text(0.01, 0.5, 'Relative change in HCW demand – HCW capabilities compared to SQ', + va='center', rotation='vertical', fontsize=12) + plt.savefig(f'{g_path}/hcw_time_ratios_by_cadre.png', bbox_inches='tight') + plt.show() + +# === Call the function === +desired_order = [ + 'Clinical', + 'Nursing_and_Midwifery', + 'Pharmacy', + 'Mental', + 'Laboratory', + 'Radiography' +] + +combined_df_ordered = hcw_ratios_unadjusted[1].loc[desired_order] +figure_all_cadre_bar_charts_color_coded(combined_df_ordered) + +# ----------------------------------------- HCW TIME ADJUSTED FOR EFFICIENCY ------------------------------------------ +# Now we adjust total HCW time in the intervention period for scenarios representing the integration pathways +# We adjust HCW time for a subset of HSIs represeting 'integration interventions' + +# Get DF with HCW time by treatment IDs for relevant scenarios +hcw_time_by_treatment_id_pathways_df = hcw_time_by_treatment_id_df[[0, 20, 21, 22, 23, 24, 25, 26, 27]] +hcw_time_by_treatment_id_pathways_df = hcw_time_by_treatment_id_pathways_df.fillna(0) +hcw_time_by_treatment_id_pathways_df.index.names = ['first', 'second'] + +def multiply_subset(df, col_level_0_vals, care_types_to_update, multiplier): + """ + Multiply values in selected columns and rows of a multi-index DataFrame. + + Parameters: + df: pandas DataFrame with multi-level columns and multi-level index. + col_level_0_vals: collection of ints. First level column values to match (e.g. [0, 1]). + care_types_to_update: collection. Values from the second index level to update. + multiplier: float. Value to multiply the selected cells by. + """ + # Step 1: Select columns where first level is in the provided list + columns_to_update = [col for col in df.columns if col[0] in col_level_0_vals] + + # Step 2: Get mask for rows to update + rows_to_update = df.index.get_level_values(1).isin(care_types_to_update) + + # Step 3: Apply multiplication + df.loc[rows_to_update, columns_to_update] *= multiplier + +# Creat copys of HCW time DFs to adjust +hcw_time_by_treatment_id_adj_25 = hcw_time_by_treatment_id_pathways_df.copy() +hcw_time_by_treatment_id_adj_50 = hcw_time_by_treatment_id_pathways_df.copy() +hcw_time_by_treatment_id_adj_75 = hcw_time_by_treatment_id_pathways_df.copy() + +# For the relevant scenarios and treatment IDs, we adjust total time (25% reduction, 50% reduction, 75% reduction) +scaling_groups = [ + ([20, 21], [ + 'CardioMetabolicDisorders_Investigation_diabetes', + 'CardioMetabolicDisorders_Investigation_hypertension', + 'CardioMetabolicDisorders_Investigation_hypertension_and_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_hypertension', + 'CardioMetabolicDisorders_Treatment_hypertension', + 'CardioMetabolicDisorders_Treatment_diabetes', + 'Hiv_Test', + 'Hiv_Treatment', + 'Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Culture', + 'Tb_Test_Xray', + 'Tb_Treatment', + 'Tb_Test_FollowUp', + 'Depression_TalkingTherapy', + 'Depression_Treatment', + 'Epilepsy_Treatment_Start', + 'Epilepsy_Treatment_Followup' + ]), + ([22, 23], [ + 'CardioMetabolicDisorders_Prevention_CommunityTestingForHypertension', + 'CardioMetabolicDisorders_Investigation_hypertension', + 'CardioMetabolicDisorders_Investigation_hypertension_and_diabetes', + 'CardioMetabolicDisorders_Investigation_diabetes', + 'Contraception_Routine', + 'Hiv_Test', + 'Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Culture', + 'Tb_Test_Xray' + ]), + ([24, 25], [ + 'AntenatalCare_Outpatient', + 'AntenatalCare_FollowUp', + 'PostnatalCare_Neonatal', + 'PostnatalCare_Maternal', + 'Contraception_Routine_Postnatal', + 'Epi_Childhood_Bcg', + 'Epi_Childhood_Opv', + 'Epi_Childhood_DtpHibHep', + 'Epi_Childhood_Rota', + 'Epi_Childhood_Pneumo', + 'Epi_Childhood_MeaslesRubella', + 'Epi_Pregnancy_Td' + ]), + ([26, 27], [ + 'CardioMetabolicDisorders_Prevention_CommunityTestingForHypertension', + 'CardioMetabolicDisorders_Investigation_hypertension', + 'CardioMetabolicDisorders_Investigation_hypertension_and_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_hypertension', + 'CardioMetabolicDisorders_Treatment_hypertension', + 'CardioMetabolicDisorders_Investigation_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_diabetes', + 'CardioMetabolicDisorders_Treatment_diabetes', + 'Contraception_Routine', + 'Undernutrition_Feeding', + 'Hiv_Test', + 'Hiv_Treatment', + 'Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Culture', + 'Tb_Test_Xray', + 'Tb_Treatment', + 'Tb_Test_FollowUp', + 'AntenatalCare_Outpatient', + 'AntenatalCare_FollowUp', + 'PostnatalCare_Neonatal', + 'PostnatalCare_Maternal', + 'Contraception_Routine_Postnatal', + 'Epi_Childhood_Bcg', + 'Epi_Childhood_Opv', + 'Epi_Childhood_DtpHibHep', + 'Epi_Childhood_Rota', + 'Epi_Childhood_Pneumo', + 'Epi_Childhood_MeaslesRubella', + 'Epi_Pregnancy_Td', + 'Depression_TalkingTherapy', + 'Depression_Treatment', + 'Epilepsy_Treatment_Start', + 'Epilepsy_Treatment_Followup' + ]) +] + +for cols, care_types in scaling_groups: + multiply_subset( + df=hcw_time_by_treatment_id_adj_25, + col_level_0_vals=cols, + care_types_to_update=care_types, + multiplier=0.75 + ) + +for cols, care_types in scaling_groups: + multiply_subset( + df=hcw_time_by_treatment_id_adj_50, + col_level_0_vals=cols, + care_types_to_update=care_types, + multiplier=0.5 + ) + +for cols, care_types in scaling_groups: + multiply_subset( + df=hcw_time_by_treatment_id_adj_75, + col_level_0_vals=cols, + care_types_to_update=care_types, + multiplier=0.25 + ) + +# Get adjusted estimates +total_hcw_time_ratio_diff_adj_25 = get_hcw_time_use_ratios(hcw_time_by_treatment_id_adj_25) +total_hcw_time_ratio_diff_adj_50 = get_hcw_time_use_ratios(hcw_time_by_treatment_id_adj_50) +total_hcw_time_ratio_diff_adj_75 = get_hcw_time_use_ratios(hcw_time_by_treatment_id_adj_75) + + +def figure_adjusted_hcw_ratios(un_adj, adj_25, adj_50, adj_75): + # === Labels for x-axis + labels = [full_lab[scen_draws[sc]] for sc in ordered_scenario_ids_pathways] + + # === Helper to extract values and error bars + def get_mean_and_errors(data): + mean = [float(data.loc[:, (sc, 'central')].iloc[0]) for sc in ordered_scenario_ids_pathways] + lower = [float(data.loc[:, (sc, 'lower')].iloc[0]) for sc in ordered_scenario_ids_pathways] + upper = [float(data.loc[:, (sc, 'upper')].iloc[0]) for sc in ordered_scenario_ids_pathways] + yerr_lower = [m - l for m, l in zip(mean, lower)] + yerr_upper = [u - m for u, m in zip(upper, mean)] + return np.array(mean), np.array([yerr_lower, yerr_upper]) + + # === Extract data + datasets = { + 'Unadjusted': get_mean_and_errors(un_adj), + '25% adjusted': get_mean_and_errors(adj_25), + '50% adjusted': get_mean_and_errors(adj_50), + '75% adjusted': get_mean_and_errors(adj_75), + } + + # === Plot settings + bar_width = 0.2 + x = np.arange(len(ordered_scenario_ids_pathways)) + fig, ax = plt.subplots(figsize=(12, 6)) + + # === Color per scenario + bar_colors = [color_map_pathways[sc] for sc in ordered_scenario_ids_pathways] + + # === Hatching pattern per dataset + hatches = ["", "//", "xx", ".."] + + # Store for hatch legend + hatch_patches = [] + + # === Plot bars + annotate + for i, (dlabel, (means, yerr)) in enumerate(datasets.items()): + offset = (i - 1.5) * bar_width + bars = ax.bar( + x + offset, means, width=bar_width, yerr=yerr, + label=dlabel, capsize=5, color=bar_colors, alpha=0.9, + hatch=hatches[i], edgecolor='black', linewidth=0.7 + ) + # Annotate each bar with 2 decimal places + for bar, val in zip(bars, means): + height = bar.get_height() + ax.annotate(f"{val:.2f}", + xy=(bar.get_x() + bar.get_width() / 2, height), + xytext=(0, 3), + textcoords="offset points", + ha='center', va='bottom', fontsize=8) + + # Create patch for hatch legend + patch = plt.Rectangle((0, 0), 1, 1, facecolor="white", hatch=hatches[i], + edgecolor="black", linewidth=0.7) + hatch_patches.append((patch, dlabel)) + + # === X-axis formatting + ax.set_xticks(x) + ax.set_xticklabels(labels, rotation=90, ha='right', fontsize=8) + + # === Dotted lines between defined groups + groupings = [ + [22, 23], + [24, 25], + [20, 21], + [26, 27] + ] + index_map = {s: i for i, s in enumerate(ordered_scenario_ids_pathways)} + for g in groupings[:-1]: + present = [s for s in g if s in index_map] + if present: + ix = index_map[present[-1]] + ax.axvline(ix + 0.5, color='black', linestyle=':', linewidth=0.5) + + ax.set_ylabel('Relative change in HCW demand – HCW capabilities compared to SQ') + + # === Hatch legend only + hatch_handles, hatch_labels = zip(*hatch_patches) + hatch_legend = fig.legend(hatch_handles, hatch_labels, title="Hypothetical Assumption on Improved HCW Efficiency", + loc="upper center", bbox_to_anchor=(0.5, 1.08), ncol=4) + + # --- Reference line and formatting + ax.axhline(y=1, color='grey', linestyle='--', linewidth=1, label='Status Quo = 1') + + # # Legend (baseline line only) + # ax.legend(fontsize=8, loc='lower center', bbox_to_anchor=(0.5, 1.02), + # ncol=1, frameon=False) + + # === Save without cropping legend + plt.tight_layout(rect=[0, 0, 1, 0.95]) + plt.savefig(f'{g_path}/adjusted_hcw_time_ratios.png', + bbox_inches='tight', + bbox_extra_artists=[hatch_legend]) + plt.show() + +figure_adjusted_hcw_ratios(hcw_ratios_unadjusted[0], + total_hcw_time_ratio_diff_adj_25[0], + total_hcw_time_ratio_diff_adj_50[0], + total_hcw_time_ratio_diff_adj_75[0]) + +# ==================================== CONSUMABLE COST BY SCENARIO (AND DIFFS) ======================================== +TARGET_PERIOD = (Date(2025, 1, 1), Date(2054, 12, 31)) + +# list_of_relevant_years_for_costing = list(range(TARGET_PERIOD[0].year, TARGET_PERIOD[-1].year + 1)) +# +# input_costs_df = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, +# _years=list_of_relevant_years_for_costing, +# cost_only_used_staff=True, +# _discount_rate = 0.03) +# input_costs_df.to_csv(f'{g_path}/input_costs.csv') + +# Read in costs (takes a long time to generate) +input_costs = pd.read_csv(f'{g_path}/input_costs.csv') +input_costs = input_costs.set_index('Unnamed: 0') + +# --------------------------- Adjust HCW costs based on average difference in HCW use --------------------------------- +# Copy cost data to allow for adjustments based on HCW time ratios +adj_25_input_costs = input_costs.copy() +adj_50_input_costs = input_costs.copy() +adj_75_input_costs = input_costs.copy() + +def return_cost_adjusted_for_hcw_growth(cost_data, hcw_ratios): + # Multiply the HCW cost estimates by ratios + central_df = hcw_ratios.xs('central', axis=1, level=1) + + # Function to safely get multiplier + def get_multiplier(row): + subgroup = row['cost_subgroup'] + draw = row['draw'] + if subgroup in central_df.index and draw in central_df.columns: + return central_df.loc[subgroup, draw] + else: + return 1.0 # or np.nan, or row['cost'] unmodified depending on your logic + + cost_data['cost'] = cost_data.apply(lambda row: row['cost'] * get_multiplier(row), axis=1) + total_input_cost = cost_data.groupby(['draw', 'run'])['cost'].sum() + total_input_cost_annual = total_input_cost / 30 + + return total_input_cost, total_input_cost_annual + +# Adjust cost data based on HCW time ratios (we multiply cadre salary cost by demand ratios) +# TODO: SHOULD WE BE ADJUSTING SQ for population growth... +costs_und_adj_hcw_ratio = return_cost_adjusted_for_hcw_growth(input_costs, hcw_ratios_unadjusted[1]) +costs_adj_25_hcw_ratio = return_cost_adjusted_for_hcw_growth(adj_25_input_costs, total_hcw_time_ratio_diff_adj_25[1]) +costs_adj_50_hcw_ratio = return_cost_adjusted_for_hcw_growth(adj_50_input_costs, total_hcw_time_ratio_diff_adj_50[1]) +costs_adj_75_hcw_ratio = return_cost_adjusted_for_hcw_growth(adj_75_input_costs, total_hcw_time_ratio_diff_adj_75[1]) + +def find_cost_diff_from_sq_and_sum(data): + + def find_difference_relative_to_comparison(_ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return _ser \ + .unstack(level=0) \ + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ + .drop(columns=([comparison] if drop_comparison else [])) \ + .stack() + + incremental_scenario_cost_annual = (pd.DataFrame( + find_difference_relative_to_comparison( + data, + comparison=0) # sets the comparator to 0 which is the Actual scenario + ).T.iloc[0].unstack()).T + + incremental_scenario_cost_summarized = summarize_cost_data(incremental_scenario_cost_annual) + + return incremental_scenario_cost_summarized + + +incremental_scenario_cost_annual_summarized = find_cost_diff_from_sq_and_sum(costs_und_adj_hcw_ratio[1]) +incremental_scenario_cost_annual_adj_25 = find_cost_diff_from_sq_and_sum(costs_adj_25_hcw_ratio[1]) +incremental_scenario_cost_annual_adj_50 = find_cost_diff_from_sq_and_sum(costs_adj_50_hcw_ratio[1]) +incremental_scenario_cost_annual_adj_75 = find_cost_diff_from_sq_and_sum(costs_adj_75_hcw_ratio[1]) + + +def figure_avg_difference_in_cost_from_status_quo_per_year(cost_data): + name_of_plot = 'Incremental scenario cost relative to baseline during intervention period' + + # === Reorder cost_data + cost_data = cost_data.loc[ordered_scenario_ids] + + # === Error bars + yerr = np.array([ + (cost_data['mean'] - cost_data['lower']).values, + (cost_data['upper'] - cost_data['mean']).values, + ]) + + spacing = 1.55 + xtick_positions = [(i * spacing) for i in range(len(cost_data))] + xticks = dict(zip(xtick_positions, cost_data.index)) + index_map = {s: x for x, s in zip(xtick_positions, cost_data.index)} + + fig, ax = plt.subplots(figsize=(10, 5)) + + color_map = color_map_full + colors = [color_map[s] for s in cost_data.index] + + # === Bar chart + ax.bar( + xtick_positions, + cost_data['mean'].values, + yerr=yerr, + ecolor='black', + capsize=10, + label=[str(s) for s in cost_data.index], + color=colors, + ) + + # === Format for currency annotation + def format_currency(val): + if abs(val) >= 1e9: + return f"${val / 1e9:.1f}B" + else: + return f"${val / 1e6:.0f}M" + + # === Annotate bars + for xpos, mean, lower, upper in zip( + xtick_positions, + cost_data['mean'].values, + cost_data['lower'].values, + cost_data['upper'].values + ): + text = format_currency(mean) + if mean >= 0: + annotation_y = upper + 0.02 * 1e9 + valign = 'bottom' + else: + annotation_y = lower - 0.02 * 1e9 + valign = 'top' + + ax.text( + xpos, + annotation_y, + text, + ha='center', + va=valign, + fontsize='x-small', + rotation='horizontal' + ) + + for g in groupings[:-1]: + present = [s for s in g if s in index_map] + if present: + xpos = index_map[present[-1]] + ax.axvline(xpos + spacing / 2, color='black', linestyle=':', linewidth=0.5) + + # === Axis and labels + ax.set_xticks(xtick_positions) + ax.set_xticklabels([full_lab[scen_draws[s]] for s in cost_data.index], rotation=90, fontsize=7) + + ax.grid(axis='both', linestyle='--', linewidth=0.5, alpha=0.7) + ax.axhline(0, color='gray', linewidth=0.5, linestyle='--') + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + + ax.set_ylabel('Difference in annual cost (USD) from SQ scenario', fontsize=7) + ax.set_ylim(bottom=-0.25 * 1e9) + + fig.tight_layout(pad=2.0) + plt.subplots_adjust(left=0.15, right=0.85) + + # === Save + Show + fig.savefig(Path(g_path) / name_of_plot.replace(' ', '_').replace(',', ''), bbox_inches='tight') + plt.show() + + +figure_avg_difference_in_cost_from_status_quo_per_year(incremental_scenario_cost_annual_summarized) + +def figure_adjusted_avg_difference_in_cost_from_status_quo_per_year(un_adj, adj_25, adj_50, adj_75): + # === Labels for x-axis + labels = [full_lab[scen_draws[sc]] for sc in ordered_scenario_ids_pathways] + + # === Helper to extract means and asymmetric errors in the right order + def get_mean_and_errors(df): + sub = df.loc[ordered_scenario_ids_pathways] # ensure order + mean = sub['mean'].values.astype(float) + lower = (sub['mean'] - sub['lower']).values.astype(float) + upper = (sub['upper'] - sub['mean']).values.astype(float) + yerr = np.vstack([lower, upper]) # shape (2, N) + return mean, yerr + + # === Datasets + datasets = { + 'Unadjusted': get_mean_and_errors(un_adj), + '25% adjusted': get_mean_and_errors(adj_25), + '50% adjusted': get_mean_and_errors(adj_50), + '75% adjusted': get_mean_and_errors(adj_75), + } + + # === Plot + fig, ax = plt.subplots(figsize=(12, 6)) + x = np.arange(len(ordered_scenario_ids_pathways)) + n_series = len(datasets) + width = 0.18 # bar width + offsets = (np.arange(n_series) - (n_series - 1) / 2) * (width + 0.02) + + # === Color per scenario (consistent with previous figure) + bar_colors = [color_map_pathways[sc] for sc in ordered_scenario_ids_pathways] + + # === Hatching pattern per dataset (consistent with previous figure) + hatches = ["", "//", "xx", ".."] + hatch_patches = [] # for legend + + # === Format for currency annotation (user spec) + def format_currency(val): + if abs(val) >= 1e9: + return f"${val / 1e9:.1f}B" + else: + return f"${val / 1e6:.0f}M" + + # === Bars + annotations + for i, (name, (means, yerr)) in enumerate(datasets.items()): + xpos = x + offsets[i] + bars = ax.bar( + xpos, + means, + width=width, + yerr=yerr, + capsize=6, + ecolor='black', + color=bar_colors, # scenario-consistent colors + alpha=0.9, + hatch=hatches[i], # dataset hatch + edgecolor='black', # outlines + linewidth=0.7 + ) + + # Currency annotations per bar + # Use the asymmetric errors to place the label just beyond the error bar + for j, (bar, mean_val) in enumerate(zip(bars, means)): + upper = mean_val + yerr[1, j] + lower = mean_val - yerr[0, j] + text = format_currency(mean_val) + + if mean_val >= 0: + annotation_y = upper + 0.02 * 1e9 # small absolute offset (20M) + valign = 'bottom' + else: + annotation_y = lower - 0.02 * 1e9 + valign = 'top' + + ax.text( + xpos[j], + annotation_y, + text, + ha='center', + va=valign, + fontsize='x-small', + rotation='horizontal' + ) + + # Build entries for hatch legend + patch = plt.Rectangle((0, 0), 1, 1, facecolor="white", + hatch=hatches[i], edgecolor="black", linewidth=0.7) + hatch_patches.append((patch, name)) + + # === Axes cosmetics + ax.axhline(0, linewidth=1, linestyle='--', color='gray') + ax.set_xticks(x) + ax.set_xticklabels(labels, rotation=90, ha='right', fontsize=8) + ax.set_ylabel('Difference in annual cost (USD) from SQ scenario') + ax.set_xlabel('Scenario') + ax.margins(x=0.02) + + # Light separators between scenario pairs + for idx in range(2, len(ordered_scenario_ids_pathways), 2): + ax.axvline(idx - 0.5, linestyle=':', linewidth=0.8, color='lightgray', zorder=0) + + # === Legend: hatch only (adjustment levels) + hatch_handles, hatch_labels = zip(*hatch_patches) + hatch_legend = fig.legend(hatch_handles, hatch_labels, title="Hypothetical Assumption on Improved HCW Efficiency", + loc="upper center", bbox_to_anchor=(0.5, 1.08), ncol=4) + + # === Save without cropping legend + fig.tight_layout(rect=[0, 0, 1, 0.95]) + plt.savefig(f'{g_path}/adj_total_costs.png', + bbox_inches='tight', + bbox_extra_artists=[hatch_legend]) + plt.show() + +figure_adjusted_avg_difference_in_cost_from_status_quo_per_year(incremental_scenario_cost_annual_summarized, + incremental_scenario_cost_annual_adj_25, + incremental_scenario_cost_annual_adj_50, + incremental_scenario_cost_annual_adj_75) + + +# CALCULATE AND PLOT COST PER DALY AVERTED +def get_cost_per_daly_averted(incremental_cost): + columns_of_interest = [20, 21, 22, 23, 24, 25, 26, 27] + + all_total_dalys_averted = diff_age_standardized_dalys_sum + total_dalys_averted = all_total_dalys_averted[columns_of_interest].xs('central', axis=1, level=1) + + # pivot costs + inc_cost = incremental_cost['mean'].to_frame().T + final_costs = inc_cost[columns_of_interest] + + final_costs.index = total_dalys_averted.index + + cost_per_daly_averted = final_costs / total_dalys_averted + + return cost_per_daly_averted + +total_inc_cost_unadj = find_cost_diff_from_sq_and_sum(costs_und_adj_hcw_ratio[0]) +inc_cost_adj_25 = find_cost_diff_from_sq_and_sum(costs_adj_25_hcw_ratio[0]) +inc_cost_adj_50 = find_cost_diff_from_sq_and_sum(costs_adj_50_hcw_ratio[0]) +inc_cost_adj_75 = find_cost_diff_from_sq_and_sum(costs_adj_75_hcw_ratio[0]) + + +cost_per_daly_averted = [] +for cost_data in [total_inc_cost_unadj, + inc_cost_adj_25, + inc_cost_adj_50, + inc_cost_adj_75]: + cost_per_daly_averted.append(get_cost_per_daly_averted(cost_data)) +cost_per_daly_averted_df = pd.concat(cost_per_daly_averted) +cost_per_daly_averted_df.index = ['Unadjusted', '25% adjusted', '50% adjusted', '75% adjusted'] + +def figure_cost_per_daly_averted_group_bar_chart(cost_per_daly_averted_df): + labels = [full_lab[scen_draws[sc]] for sc in ordered_scenario_ids_pathways] + + # Reorder: columns are scenarios; rows are assumptions (each row = one series) + data = cost_per_daly_averted_df[ordered_scenario_ids_pathways] # use the pathways order + n_groups = data.shape[1] # scenarios + n_series = data.shape[0] # assumptions (bars per group) + + x = np.arange(n_groups, dtype=float) + width = min(0.8 / max(n_series, 1), 0.2) + offset_start = - (n_series - 1) * width / 2.0 + + fig, ax = plt.subplots(figsize=(12, 6)) + + # Colors per scenario (consistent across assumptions) + bar_colors = [color_map_pathways[sc] for sc in ordered_scenario_ids_pathways] + + # Hatch per assumption + base_hatches = ["", "//", "xx", "..", "\\\\", "++", "**", "oo", "--"] + hatches = [base_hatches[i % len(base_hatches)] for i in range(n_series)] + hatch_patches = [] + + # Annotation format + def fmt_money(val): + v = float(val) + return f"${v:.0f}" + + # Plot series (each row) with hatch + outlines; annotate + for i, row_label in enumerate(data.index): + vals = data.iloc[i].values.astype(float) + xpos = x + offset_start + i * width + + bars = ax.bar( + xpos, vals, width=width, + color=bar_colors, alpha=0.9, + hatch=hatches[i], + edgecolor='black', linewidth=0.7 + ) + + # Annotations: slight offset based on data range + # compute per-plot offset once we know the current y-lims; use a temporary + # safe default; we'll refine after plotting all bars by resetting texts positions + for j, (bar, val) in enumerate(zip(bars, vals)): + ax.text( + xpos[j], val, + fmt_money(val), + ha='center', va='bottom' if val >= 0 else 'top', + fontsize='x-small' + ) + + # Legend entry for hatch + patch = plt.Rectangle((0, 0), 1, 1, facecolor="white", + hatch=hatches[i], edgecolor="black", linewidth=0.7) + hatch_patches.append((patch, str(row_label))) + + # Cosmetics + ax.axhline(0, linewidth=1, linestyle='--', color='gray') + ax.set_xticks(x) + ax.set_xticklabels(labels, rotation=90, fontsize=7, ha='right') + ax.set_ylabel("Cost per DALY averted (USD)") + ax.margins(x=0.02) + + # Light separators between scenario pairs (22–23, 24–25, 20–21, 26–27 in that order) + for idx in range(2, len(ordered_scenario_ids_pathways), 2): + ax.axvline(idx - 0.5, linestyle=':', linewidth=0.8, color='lightgray', zorder=0) + + # Hatch legend only + hatch_handles, hatch_labels = zip(*hatch_patches) + hatch_legend = fig.legend(hatch_handles, hatch_labels, title="Hypothetical Assumption on Improved HCW Efficiency", + loc="upper center", bbox_to_anchor=(0.5, 1.08), ncol=min(n_series, 5)) + + # Nudge annotation positions slightly away from bars using a fraction of y-range + ymin, ymax = ax.get_ylim() + yspan = max(ymax - ymin, 1.0) + offset = 0.02 * yspan + # Update positions: re-loop texts in this Axes + for txt in ax.texts: + x_t, y_t = txt.get_position() + if txt.get_va() == 'bottom': + txt.set_position((x_t, y_t + offset)) + else: + txt.set_position((x_t, y_t - offset)) + + # Save without cropping legend + fig.tight_layout(rect=[0, 0, 1, 0.95]) + plt.savefig(f'{g_path}/cost_per_daly_averted.png', + bbox_inches='tight', + bbox_extra_artists=[hatch_legend]) + plt.show() + +figure_cost_per_daly_averted_group_bar_chart(cost_per_daly_averted_df) + diff --git a/src/scripts/service_integration/service_integration_analysis_script.py b/src/scripts/service_integration/service_integration_analysis_script.py new file mode 100644 index 0000000000..a093002d1d --- /dev/null +++ b/src/scripts/service_integration/service_integration_analysis_script.py @@ -0,0 +1,761 @@ +from pathlib import Path + +from collections import Counter, defaultdict + +import os +import scipy.stats as st +from scipy.stats import t, norm, shapiro + +import pandas as pd +import tableone +from tableone import TableOne + +import matplotlib.pyplot as plt +import numpy as np + +from typing import Callable, Dict, Iterable, List, Literal, Optional, TextIO, Tuple, Union + +from tlo import Date +from tlo.analysis.utils import (bin_hsi_event_details, extract_results, get_scenario_outputs, compute_summary_statistics, +make_age_grp_types, get_scenario_info, make_calendar_period_lookup, make_calendar_period_type, parse_log_file) + +outputspath = './outputs/sejjj49@ucl.ac.uk/' + +scenario = 'integration_scenario_max_test_2462999' +results_folder= get_scenario_outputs(scenario, outputspath)[-1] +# create_pickles_locally(results_folder, compressed_file_name_prefix='service_integration_scenario') + + +# int_names = ['status_quo', +# 'chronic_care_clinic', +# 'screening_htn', +# 'screening_dm', +# 'screening_hiv', +# 'screening_tb', +# 'screening_fp', +# 'screening_mal', +# 'screening_all', +# 'mch_clinic_pnc', +# 'mch_clinic_fp', +# 'mch_clinic_all', +# 'all_integration'] + +int_names = ['status_quo', + 'htn', + 'htn_max', + 'dm', + 'dm_max', + 'hiv', + 'hiv_max', + 'tb', + 'tb_max', + 'mal', + 'mal_max', + 'fp_scr', + 'fp_scr_max', + 'pnc', + 'pnc_max', + 'fp_pn', + 'fp_pn_max', + 'chronic_care', + 'chronic_care_max', + 'all_screening', + 'all_screening_max', + 'all_mch', + 'all_mch_max', + 'all_int', + 'all_int_max'] + +# Create a folder to store graphs (if it hasn't already been created when ran previously) +g_path = f'{outputspath}graphs_{scenario}' + +info = get_scenario_info(results_folder) +draws = [x for x in range(info['number_of_draws'])] + +if not os.path.isdir(g_path): + os.makedirs(f'{outputspath}graphs_{scenario}') + + +TARGET_PERIOD = (Date(2011, 1, 1), Date(2015, 12, 31)) + +def get_num_dalys(_df): + """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD). + Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using + results from runs that crashed mid-way through the simulation. + """ + # TO DO: this isnt outputting all dalys (missing 2013 onwards) + years_needed = [i.year for i in TARGET_PERIOD] + assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded." + return pd.Series( + data=_df + .loc[_df.year.between(*years_needed)] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + +num_dalys = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys, + do_scaling=False + ) + +idx = pd.IndexSlice +total_dalys_dfs = {k: num_dalys.loc[:, idx[d, :]] for k, d in zip (int_names, draws)} + +def get_diff_multi_index(df, int_name, draw): + diff = df[int_name][draw] - df['status_quo'][0] + diff.columns=df[int_name].columns + return diff + +total_dalys_diff_dfs = {k: get_diff_multi_index(total_dalys_dfs, k, d) for k, d in zip(int_names, draws)} + +total_dalys_summ = {k:compute_summary_statistics(total_dalys_dfs[k]) for k in int_names} +total_dalys_diff_summ = {k:compute_summary_statistics(total_dalys_diff_dfs[k]) for k in int_names} + +all_dalys_dfs = extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked", + custom_generate_series=( + lambda df: df.drop( + columns=['date', 'sex', 'age_range']).groupby(['year']).sum().stack()), + do_scaling=False) +all_dalys_dfs.index.names = ['year', 'cause'] +years_to_sum = list(range(2011, 2016)) + +# Filter the DataFrame to include only those years +df_subset = all_dalys_dfs.loc[all_dalys_dfs.index.get_level_values('year').isin(years_to_sum)] + +# Group by 'cause' and sum +cause_totals = df_subset.groupby('cause').sum() +total_cause_dfs = {k: cause_totals.loc[:, idx[d, :]] for k, d in zip (int_names, draws)} +total_cause_summ = {k:compute_summary_statistics(total_cause_dfs[k]) for k in int_names} + +total_cause_diff_dfs = {k: get_diff_multi_index(total_cause_dfs, k, d) for k, d in zip(int_names, draws)} +total_cause_summ_diff = {k:compute_summary_statistics(total_cause_diff_dfs[k]) for k in int_names} + + +# GRAPHS AND CSV FILES + +for k in total_cause_diff_dfs: + total_cause_diff_dfs[k].to_csv(f'{g_path}/{k}_diffs.csv') + +for k, d in zip(total_cause_diff_dfs, draws): + labels = total_cause_summ_diff[k].index + median = total_cause_summ_diff[k][d]['central'].values + lower_errors = total_cause_summ_diff[k][d]['lower'].values + upper_errors = total_cause_summ_diff[k][d]['upper'].values + + # lower_errors = [data[k].loc[0, 'lower'] for k in labels] + # upper_errors = [data[k].loc[0, 'upper'] for k in labels] + + # lower_errors = [data[k][d].loc[0, 'lower'] - data[k][d].loc[0, 'central']for k, d in zip(labels, draws)] + # upper_errors = [data[k][d].loc[0, 'upper'] - data[k][d].loc[0, 'lower'] for k, d in zip(labels, draws)] + # errors = [lower_errors, upper_errors] + + # Compute distances from mean to bounds (must be non-negative) + yerr_lower = [mean - low for mean, low in zip(median, lower_errors)] + yerr_upper = [up - mean for mean, up in zip(median, upper_errors)] + + # Create bar chart with error bars + fig, ax = plt.subplots() + ax.bar(labels, median, yerr=[yerr_lower, yerr_upper], capsize=5, alpha=0.7, ecolor='black') + ax.axhline(0, color='gray', linestyle='--', linewidth=1) + ax.set_ylabel('Difference in DALYs from SQ') + ax.set_title(f'{k} Vs status_quo: Difference in DALYs by cause') + + # Adjust label size + plt.xticks(fontsize=8, rotation=90) + plt.tight_layout() + plt.savefig(f'{g_path}/{k}_diff_dalys_cause.png', bbox_inches='tight') + plt.show() + + +def barcharts(data, y_label, title): + + # Extract means and errors + labels = data.keys() + median = [data[k][d].loc[0, 'central'] for k, d in zip(labels, draws)] + lower_errors = [data[k][d].loc[0, 'lower'] for k, d in zip(labels, draws)] + upper_errors = [data[k][d].loc[0, 'upper'] for k, d in zip(labels, draws)] + + # lower_errors = [data[k].loc[0, 'lower'] for k in labels] + # upper_errors = [data[k].loc[0, 'upper'] for k in labels] + + # lower_errors = [data[k][d].loc[0, 'lower'] - data[k][d].loc[0, 'central']for k, d in zip(labels, draws)] + # upper_errors = [data[k][d].loc[0, 'upper'] - data[k][d].loc[0, 'lower'] for k, d in zip(labels, draws)] + # errors = [lower_errors, upper_errors] + + # Compute distances from mean to bounds (must be non-negative) + yerr_lower = [mean - low for mean, low in zip(median, lower_errors)] + yerr_upper = [up - mean for mean, up in zip(median, upper_errors)] + + # Create bar chart with error bars + fig, ax = plt.subplots() + ax.bar(labels, median, yerr=[yerr_lower, yerr_upper], capsize=5, alpha=0.7, ecolor='black') + ax.set_ylabel(y_label) + ax.set_title(title) + + # Adjust label size + plt.xticks(fontsize=8, rotation=90) + plt.tight_layout() + plt.savefig(f'{g_path}/{title}.png', bbox_inches='tight') + plt.show() + +barcharts(total_dalys_diff_summ, 'Difference in DALYs', 'Total Difference in Total DALYs from Status Quo by ' + 'Scenario') + +barcharts(total_dalys_summ, 'DALYs', ' Total DALYs from Status Quo by Scenario') + + +keys = list(total_cause_summ.keys()) +baseline_key = keys[0] +baseline_df = total_cause_summ[baseline_key] + +categories = baseline_df.index +x = np.arange(len(categories)) +width = 0.35 # width of each bar + +for key, draw in zip(keys[1:], draws[1:]): + comp_df = total_cause_summ[key] + + fig, ax = plt.subplots(figsize=(10, 6)) + + # Extract data and compute asymmetric error bars + # Baseline + baseline_central = baseline_df[0]['central'] + baseline_err_lower = baseline_central - baseline_df[0]['lower'] + baseline_err_upper = baseline_df[0]['upper'] - baseline_central + + # Comparison + comp_central = comp_df[draw]['central'] + comp_err_lower = comp_central - comp_df[draw]['lower'] + comp_err_upper = comp_df[draw]['upper'] - comp_central + + # Plot bars with asymmetric error bars + ax.bar(x - width/2, baseline_central, width, + yerr=[baseline_err_lower, baseline_err_upper], + capsize=5, label=baseline_key, alpha=0.8) + + ax.bar(x + width/2, comp_central, width, + yerr=[comp_err_lower, comp_err_upper], + capsize=5, label=key, alpha=0.8) + + ax.axhline(0, color='gray', linestyle='--', linewidth=1) + + ax.set_title(f"Comparison: {key} vs {baseline_key}") + ax.set_ylabel("DALYs") + ax.set_xticks(x) + ax.set_xticklabels(categories, rotation=45, ha='right') + ax.legend() + ax.grid(axis='y', linestyle='--', alpha=0.4) + plt.tight_layout() + plt.savefig(f'{g_path}/{key}_dalys_cause.png', bbox_inches='tight') + plt.show() + + + + +def get_dalys_by_period_sex_agegrp_label(df): + """Sum the dalys by period, sex, age-group and label""" + df['age_grp'] = df['age_range'].astype(make_age_grp_types()) + df = df.drop(columns=['date', 'age_range', 'sex']) + df = df.groupby(by=["year", "age_grp"]).sum().stack() + df.index = df.index.set_names('label', level=2) + return df + +dalys = extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked_by_age_and_time", # <-- for DALYS stacked by age and time + custom_generate_series=get_dalys_by_period_sex_agegrp_label, + do_scaling=False + ) +dalys.index = dalys.index.set_names('age_group', level=1) + +def get_pop_by_agegrp_label(df): + """Sum the dalys by period, sex, age-group and label""" + df['year'] = df['date'].dt.year + df_melted = df.melt(id_vars=['year'], value_vars=[col for col in df.columns if col not in ['date', 'year']], + var_name='age_group', value_name='count') + series_multi = df_melted.set_index(['year', 'age_group'])['count'].sort_index() + + return series_multi + + +pop_f = extract_results( + results_folder, + module="tlo.methods.demography", + key="age_range_f", # <-- for DALYS stacked by age and time + custom_generate_series=get_pop_by_agegrp_label, + do_scaling=False + ) + +pop_m = extract_results( + results_folder, + module="tlo.methods.demography", + key="age_range_m", # <-- for DALYS stacked by age and time + custom_generate_series=get_pop_by_agegrp_label, + do_scaling=False + ) + +pop = pop_f + pop_m + +pop_summ = compute_summary_statistics(pop) +dalys_summ = compute_summary_statistics(dalys) + + +# TODO OTHER OUTPUTS +# Notes from epi meeting with Tim C/Andrew +# - consider splitting appointments by those which could be done by staff member with sufficient training (e.g. refills) and +# those that could only be done by a specialist (e.g. initial HIV care) +# - start modelling at 2025 +# - present % of total DALYs attributable to each scenario +# - can we look at TB dalys in people with HIV +# - for contraception, present met need instead of DALYs? (although could present maternal dalys only) +# they were unsure about age standardizaton + +# =============================================== CONSUMABLES ========================================================= +def drop_outside_period(_df): + """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD""" + return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)]) + +def get_quantity_of_consumables_dispensed(results_folder): + def get_counts_of_items_requested(_df): + _df = drop_outside_period(_df) + counts_of_used = defaultdict(lambda: defaultdict(int)) + counts_of_not_available = defaultdict(lambda: defaultdict(int)) + + for _, row in _df.iterrows(): + date = row['date'] + for item, num in row['Item_Used'].items(): + counts_of_used[date][item] += num + for item, num in row['Item_NotAvailable'].items(): + counts_of_not_available[date][item] += num + used_df = pd.DataFrame(counts_of_used).fillna(0).astype(int).stack().rename('Used') + not_available_df = pd.DataFrame(counts_of_not_available).fillna(0).astype(int).stack().rename('Not_Available') + + # Combine the two dataframes into one series with MultiIndex (date, item, availability_status) + combined_df = pd.concat([used_df, not_available_df], axis=1).fillna(0).astype(int) + + # Convert to a pd.Series, as expected by the custom_generate_series function + return combined_df.stack() + + cons_req = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Consumables', + custom_generate_series=get_counts_of_items_requested, + do_scaling=False) + + cons_dispensed = cons_req.xs("Used", level=2) # only keep actual dispensed amount, i.e. when available + return cons_dispensed + +consumables_dispensed = get_quantity_of_consumables_dispensed(results_folder) +consumables_dispensed = consumables_dispensed.groupby(level=0).sum() + +base = consumables_dispensed.loc[:, 0] + +# Calculate percentage difference +percent_diff = consumables_dispensed.copy() +for col in consumables_dispensed.columns: + if col[0] != 0: + # Get corresponding (0, col[1]) for comparison + base_col = (0, col[1]) + percent_diff[col] = (consumables_dispensed[col] - consumables_dispensed[base_col]) / consumables_dispensed[base_col] * 100 + else: + percent_diff[col] = 0 # or np.nan if you prefer + +pdiff_sum = compute_summary_statistics(percent_diff) + + +# todo: what about converting this back to a normal dose/size/measure? + +ic = {'htn': {'Hydralazine (oral)': 221}, + + 'dm': {'Metformin (oral)': 233, + 'Blood glucose test': 216,}, + + 'hiv': {'HIV test': 196, + 'First-line ART regimen (adult)': 2671, + 'Cotrimoxizole, 960mg': 204, + 'First line ART regimen (older child)': 2672, + 'Cotrimoxazole 120mg' : 203, + 'First line ART regimen (young child)': 2673}, + + + 'tb': {'ZN Stain': 186, + 'Xpert': 187, + 'X-ray': 175, + 'MGIT960 Culture and DST': 188, + 'Cat. I & III Patient Kit A': 176, + 'Cat. I & III Patient Kit B': 178, + 'Cat. II Patient Kit A1': 177, + 'Cat. II Patient Kit A2': 179, + 'Treatment (second-line drugs)': 181, + 'Isoniazid/Pyridoxine, tablet 300 mg': 192, + 'Isoniazid/Rifapentine': 2678}, + + 'mal': {'Supplementary spread, sachet': 1221, + 'Complementary feeding': 1171}, + + 'fp': {'Levonorgestrel': 1, + 'Condom, male': 2, + 'IUD, Copper': 7, + 'Depot': 3, + 'Jadelle (implant)': 12, + 'Pregnancy test kit': 2019}, + + 'pnc': {'Hydralazine' : 60, + 'Methyldopa' : 222, + 'Magnesium sulfate' : 61, + 'Benzylpenicillin' : 99, + 'Gentamycin' : 28, + 'Oxytocin, injection' : 56, + 'Blood, one unit' : 141, + 'Haemoglobin test (HB)' : 50, + 'Ferrous Salt + Folic Acid' : 140}, + + 'chronic_care': {'Phenobarbital': 278, + 'Carbamazepine': 276, + 'Phenytoin sodium': 279, + 'Amitriptyline': 267}} + +draw_numbs = {'htn': [1, 2], + 'dm': [3, 4], + } + + +def get_data_as_list_for_bc(draw_numbs): + nc = draw_numbs[0] + mc = draw_numbs[1] + + def get_med_and_error(draw): + med = [pdiff_sum.at[f'{ic}', (draw, 'central')] for ic in item_codes] + lq = [pdiff_sum.at[f'{ic}', (draw, 'lower')] for ic in item_codes] + uq = [pdiff_sum.at[f'{ic}', (draw, 'upper')] for ic in item_codes] + int_err_lower = [a - b for a, b in zip(med, lq)] + int_err_upper = [a - b for a, b in zip(uq, med)] + + return [med, int_err_lower, int_err_upper] + + return [get_med_and_error(nc), get_med_and_error(mc)] + +filtered_int_name = [s for s in int_names if not s.endswith('_max')] +filtered_int_name.remove('status_quo') + +for scen in filtered_int_name: + if scen.startswith('htn'): + item_codes = list(ic['htn'].values()) + labels = list(ic['htn'].keys()) + data = get_data_as_list_for_bc(draw_numbs['htn']) + title = scen + + elif scen.startswith('dm'): + item_codes = list(ic['dm'].values()) + labels = list(ic['dm'].keys()) + data = get_data_as_list_for_bc(draw_numbs['dm']) + title = scen + + else: + pass + + fig, ax = plt.subplots(figsize=(10, 6)) + x = np.arange(len(labels)) + width = 0.35 + ax.bar(x - width / 2, data[0][0], width, + yerr=[data[0][1], data[0][2]], + capsize=5, label='Normal Cons.', alpha=0.8) + + ax.bar(x + width / 2, data[1][0], width, + yerr=[data[1][1], data[1][2]], + capsize=5, label='Max Con.s', alpha=0.8) + + ax.set_title(f"Percentage Difference in Consumable Use from SQ - {title}") + ax.set_ylabel("Percentage Difference") + ax.set_xticks(x) + ax.set_xticklabels(labels, rotation=45, ha='right') + ax.legend() + plt.tight_layout() + plt.savefig(f'{g_path}/{scenario}_cons_pdiff.png', bbox_inches='tight') + plt.show() + + + +# ========================================= APPOINTMENTS/HCW TIME ===================================================== +# # NUMBER OF APPOINTMENTS + +def compute_service_statistics(counters_by_draw_and_run): + grouped_data = defaultdict(lambda: defaultdict(list)) + + # Step 1: Group counts by first key and service name + for (group_idx, _), counter in counters_by_draw_and_run.items(): + for service_name, count in counter.items(): + grouped_data[group_idx][service_name].append(count) + + # Step 2: Compute statistics + result = defaultdict(dict) + width_of_range = 0.95 + lower_quantile = (1. - width_of_range) / 2. + for group_idx, service_dict in grouped_data.items(): + for service_name, counts in service_dict.items(): + arr = np.array(counts) + result[group_idx][service_name] = { + "median": float(np.median(arr)), + "lower_quartile": float(np.quantile(arr, lower_quantile)), + "upper_quartile": float(np.quantile(arr, 1 - lower_quantile)) + } + + return result + +counts_by_treatment_id = bin_hsi_event_details( + results_folder, + lambda event_details, count: sum( + [ + Counter({ + ( + event_details["treatment_id"] + ): + count * appt_number + }) + for appt_type, appt_number in event_details["appt_footprint"] + ], + Counter() + ), + *TARGET_PERIOD, + True + ) + +# TODO - what about other HSIs that might be impacted (anc, pnc etc), should we do this more generally + +hsi_results = compute_service_statistics(counts_by_treatment_id) +hsi_results = {k:v for k, v in zip(int_names, hsi_results.values())} + +hsi_by_scen = { + 'htn':['CardioMetabolicDisorders_Prevention_CommunityTestingForHypertension', + 'CardioMetabolicDisorders_Investigation', + 'CardioMetabolicDisorders_Prevention_WeightLoss'], + + 'htn_max':['CardioMetabolicDisorders_Prevention_CommunityTestingForHypertension', + 'CardioMetabolicDisorders_Investigation', + 'CardioMetabolicDisorders_Prevention_WeightLoss'], + + 'dm':[ 'CardioMetabolicDisorders_Investigation', + 'CardioMetabolicDisorders_Prevention_WeightLoss'], + + 'dm_max':['CardioMetabolicDisorders_Investigation', + 'CardioMetabolicDisorders_Prevention_WeightLoss'], + + 'hiv': ['Hiv_Test', 'Hiv_Treatment'], + + 'hiv_max': ['Hiv_Test', 'Hiv_Treatment'], + + 'tb': ['Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Xray', + 'Tb_Treatment'], + + 'tb_max': ['Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Xray', + 'Tb_Treatment'], + + 'mal': ['Undernutrition_Feeding'], + + 'mal_max': ['Undernutrition_Feeding'], + + 'fp_scr': ['Contraception_Routine'], + + 'fp_scr_max': ['Contraception_Routine'], + + 'pnc': ['PostnatalCare_Neonatal', + 'PostnatalCare_Maternal'], + + 'pnc_max': ['PostnatalCare_Neonatal', + 'PostnatalCare_Maternal'], + + 'fp_pn': ['Contraception_Routine'], + + 'fp_pn_max': ['Contraception_Routine'], + + 'chronic_care': ['CardioMetabolicDisorders_Investigation', + 'CardioMetabolicDisorders_Prevention_WeightLoss', + 'Hiv_Test', + 'Hiv_Treatment', + 'Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Xray', + 'Tb_Treatment', + 'Depression_TalkingTherapy', + 'Depression_Treatment', + 'Epilepsy_Treatment_Start', + 'Epilepsy_Treatment_Followup'], + + 'chronic_care_max': ['CardioMetabolicDisorders_Investigation', + 'CardioMetabolicDisorders_Prevention_WeightLoss', + 'Hiv_Test', + 'Hiv_Treatment', + 'Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Xray', + 'Tb_Treatment', + 'Depression_TalkingTherapy', + 'Depression_Treatment', + 'Epilepsy_Treatment_Start', + 'Epilepsy_Treatment_Followup'], + + 'all_screening': ['CardioMetabolicDisorders_Prevention_CommunityTestingForHypertension', + 'CardioMetabolicDisorders_Investigation', + 'CardioMetabolicDisorders_Prevention_WeightLoss', + 'Contraception_Routine', + 'Undernutrition_Feeding', + 'Hiv_Test', + 'Hiv_Treatment', + 'Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Xray', + 'Tb_Treatment'], + + 'all_screening_max': ['CardioMetabolicDisorders_Prevention_CommunityTestingForHypertension', + 'CardioMetabolicDisorders_Investigation', + 'CardioMetabolicDisorders_Prevention_WeightLoss', + 'Contraception_Routine', + 'Undernutrition_Feeding', + 'Hiv_Test', + 'Hiv_Treatment', + 'Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Xray', + 'Tb_Treatment'], + + 'all_mch': ['Undernutrition_Feeding', + 'PostnatalCare_Neonatal', + 'PostnatalCare_Maternal', + 'Contraception_Routine'], + + 'all_mch_max': ['Undernutrition_Feeding', + 'PostnatalCare_Neonatal', + 'PostnatalCare_Maternal', + 'Contraception_Routine'], + + 'all_int': ['CardioMetabolicDisorders_Prevention_CommunityTestingForHypertension', + 'CardioMetabolicDisorders_Investigation', + 'CardioMetabolicDisorders_Prevention_WeightLoss', + 'Contraception_Routine', + 'Undernutrition_Feeding', + 'Hiv_Test', + 'Hiv_Treatment', + 'Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Xray', + 'Tb_Treatment', + 'PostnatalCare_Neonatal', + 'PostnatalCare_Maternal', + 'Depression_TalkingTherapy', + 'Depression_Treatment', + 'Epilepsy_Treatment_Start', + 'Epilepsy_Treatment_Followup'], + + 'all_int_max': ['CardioMetabolicDisorders_Prevention_CommunityTestingForHypertension', + 'CardioMetabolicDisorders_Investigation', + 'CardioMetabolicDisorders_Prevention_WeightLoss', + 'Contraception_Routine', + 'Undernutrition_Feeding', + 'Hiv_Test', + 'Hiv_Treatment', + 'Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Xray', + 'Tb_Treatment', + 'PostnatalCare_Neonatal', + 'PostnatalCare_Maternal', + 'Depression_TalkingTherapy', + 'Depression_Treatment', + 'Epilepsy_Treatment_Start', + 'Epilepsy_Treatment_Followup']} + + +def get_sq_hsi_data(data, labels): + med = [data[v]['median'] if v in data else 0 for v in labels] + lq = [data[v]['lower_quartile'] if v in data else 0 for v in labels] + uq = [data[v]['upper_quartile'] if v in data else 0 for v in labels] + + sq_err_lower = [a - b for a, b in zip(med, lq)] + sq_err_upper = [a - b for a, b in zip(uq, med)] + + return [med, sq_err_lower, sq_err_upper] + +for scenario in int_names: + if scenario == 'status_quo': + pass + else: + labels = hsi_by_scen[scenario] + int_data = hsi_results[scenario] + baseline_data = get_sq_hsi_data(hsi_results['status_quo'], labels) + + median = [int_data[v]['median'] if v in int_data else 0 for v in labels] + lq = [int_data[v]['lower_quartile'] if v in int_data else 0 for v in labels] + uq = [int_data[v]['upper_quartile'] if v in int_data else 0 for v in labels] + + fig, ax = plt.subplots(figsize=(10, 6)) + + int_err_lower = [a - b for a, b in zip(median, lq)] + int_err_upper = [a - b for a, b in zip(uq, median)] + + x = np.arange(len(labels)) + width = 0.35 + ax.bar(x - width / 2, baseline_data[0], width, + yerr=[baseline_data[1], baseline_data[2]], + capsize=5, label='Status Quo', alpha=0.8) + + ax.bar(x + width / 2, median, width, + yerr=[int_err_lower, int_err_upper], + capsize=5, label=scenario, alpha=0.8) + + # ax.axhline(0, color='gray', linestyle='--', linewidth=1) + + ax.set_title(f"Comparison: {scenario} vs Status Quo") + ax.set_ylabel("Number of HSIs") + ax.set_xticks(x) + ax.set_xticklabels(labels, rotation=45, ha='right') + ax.legend() + plt.tight_layout() + plt.savefig(f'{g_path}/{scenario}_hsi_counts.png', bbox_inches='tight') + plt.show() + + + + + +# def get_mean_pop_by_age_for_sex_and_year(sex): +# years_needed = [i.year for i in TARGET_PERIOD] +# +# if sex == 'F': +# key = "age_range_f" +# else: +# key = "age_range_m" +# +# num_by_age = compute_summary_statistics( +# extract_results(results_folder, +# module="tlo.methods.demography", +# key=key, +# custom_generate_series=( +# +# lambda df_: df_.drop( +# columns=['date'] +# ).melt( +# var_name='age_grp' +# ).set_index('age_grp')['value'] +# ), +# do_scaling=False +# ), +# collapse_columns=True, +# ) +# print(num_by_age.index[num_by_age.index.duplicated()]) +# # num_by_age = num_by_age.reindex(make_age_grp_types().categories) +# return num_by_age +# +# model_m = get_mean_pop_by_age_for_sex_and_year('M') +# model_f = get_mean_pop_by_age_for_sex_and_year('F') diff --git a/src/scripts/service_integration/service_integration_cost_analyses.py b/src/scripts/service_integration/service_integration_cost_analyses.py new file mode 100644 index 0000000000..f37505247e --- /dev/null +++ b/src/scripts/service_integration/service_integration_cost_analyses.py @@ -0,0 +1,347 @@ +from pathlib import Path + +from collections import Counter, defaultdict + +import os +import scipy.stats as st + +import pandas as pd + +import matplotlib.pyplot as plt +import math + +import matplotlib.cm as cm +from matplotlib.colors import TwoSlopeNorm +import matplotlib.ticker as mticker + +import matplotlib.patheffects as pe + + +import numpy as np +import ast # for safely parsing strings + +import seaborn as sns + +from tlo import Date +from tlo.analysis.utils import (bin_hsi_event_details, extract_results, extract_params, + get_scenario_outputs, compute_summary_statistics, + make_age_grp_types) + +from src.scripts.costing.cost_estimation import (estimate_input_cost_of_scenarios, + do_stacked_bar_plot_of_cost_by_category, + summarize_cost_data) + + +plt.style.use('seaborn-v0_8') + +# Get results folder +resourcefilepath = Path("./resources") +outputspath = './outputs/sejjj49@ucl.ac.uk/' +scenario = 'service_integration_scenario-2025-07-01T144012Z' +results_folder= get_scenario_outputs(scenario, outputspath)[-1] + +# Create a dict of {run: 'scenario'} from the updated parameters +params = extract_params(results_folder) +subset = params[params['module_param'] == ('ServiceIntegration:serv_integration')] +p_dict = subset.drop(columns='module_param').to_dict() +scen_draws = p_dict['value'] + +# create output folder for graphs +def make_folder(path): + folder_path = path + if not os.path.exists(folder_path): + os.makedirs(folder_path) + return folder_path + +g_path = make_folder(f'{outputspath}graphs_{scenario}_final') + +# create a dict with proper labels for each scenario +full_lab = {'htn':'Hypertension screening', + 'htn_max': 'Hypertension screening (max. cons)', + 'dm': 'Diabetes screening', + 'dm_max': 'Diabetes screening (max. cons)', + 'hiv': 'HIV screening', + 'hiv_max': 'HIV screening (max. cons)', + 'tb': 'Tb screening', + 'tb_max':'Tb screening (max. cons)', + 'mal':'Malnutrition screening', + 'mal_max':'Malnutrition screening (max. cons)', + 'fp_scr':'Family planning (WRA)', + 'fp_scr_max':'Family planning (WRA) (max. cons)', + 'anc': 'Antenatal care', + 'anc_max': 'Antenatal care (max.cons)', + 'pnc':'Postnatal care', + 'pnc_max':'Postnatal care (max. cons)', + 'fp_pn': 'Family planning (postnatal)', + 'fp_pn_max':'Family planning (postnatal) (max. cons)', + 'epi': 'EPI', + 'chronic_care': 'Chronic care services', + 'chronic_care_max': 'Chronic care services (max.)', + 'all_screening': 'All screening', + 'all_screening_max':'All screening (max. cons)', + 'all_mch': 'MCH services', + 'all_mch_max': 'MCH services (max. cons)', + 'all_int': 'All services', + 'all_int_max': 'All services (max. cons)'} + +def get_ratios(): + + # TODO DELETE WHEN MOVED INTO MAIN SCRIPT + + appointment_time_table = pd.read_csv( + resourcefilepath + / 'healthsystem' + / 'human_resources' + / 'definitions' + / 'ResourceFile_Appt_Time_Table.csv', + index_col=["Appt_Type_Code", "Facility_Level", "Officer_Category"] + ) + + appt_type_facility_level_officer_category_to_appt_time = ( + appointment_time_table.Time_Taken_Mins.to_dict() + ) + + officer_categories = appointment_time_table.index.levels[ + appointment_time_table.index.names.index("Officer_Category") + ].to_list() + + hcw_time_by_treatment_id = bin_hsi_event_details( + results_folder, + lambda event_details, count: sum( + [ + Counter({ + ( + officer_category, + event_details["treatment_id"] + ): + count + * appt_number + * appt_type_facility_level_officer_category_to_appt_time.get( + ( + appt_type, + event_details["facility_level"], + officer_category + ), + 0 + ) + for officer_category in officer_categories + }) + for appt_type, appt_number in event_details["appt_footprint"] + ], + Counter() + ), + *TARGET_PERIOD, + True + ) + + # First we calculate average change in pop size + def get_pop_by_agegrp_label(df): + """Sum the dalys by period, sex, age-group and label""" + df['year'] = df['date'].dt.year + df_melted = df.melt(id_vars=['year'], value_vars=[col for col in df.columns if col not in ['date', 'year']], + var_name='age_group', value_name='count') + series_multi = df_melted.set_index(['year', 'age_group'])['count'].sort_index() + + return series_multi + + pop_f = extract_results( + results_folder, + module="tlo.methods.demography", + key="age_range_f", + custom_generate_series=get_pop_by_agegrp_label, + do_scaling=True + ) + + pop_m = extract_results( + results_folder, + module="tlo.methods.demography", + key="age_range_m", + custom_generate_series=get_pop_by_agegrp_label, + do_scaling=True + ) + + pop = pop_f + pop_m + pop = pop.groupby(by='year').sum() + relative_increase_df = pop.pct_change() + avg_rel_increase = relative_increase_df.loc[2025:2054].mean(axis=0).to_frame().T + avg_rel_increase_summ = compute_summary_statistics(avg_rel_increase, use_standard_error=True) + + # Next we calculate the total HCW time use + hcw_time_by_treatment_id_df = pd.DataFrame.from_dict(hcw_time_by_treatment_id) + hcw_time_by_treatment_id_df = hcw_time_by_treatment_id_df.fillna(0) + hcw_time_by_treatment_id_df.index.names = ['first', 'second'] + hcw_time_by_cadre = hcw_time_by_treatment_id_df.groupby(level='first').sum() + + # Next we calculate HCW time by year + annual_hcw_time_by_cadre = hcw_time_by_cadre / 30 + + # Read in capabilities data and sum across facility levels etc. + daily_cap = pd.read_csv('./resources/healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv') + daily_mins = daily_cap.set_index('Officer_Category')[['Total_Mins_Per_Day']] + daily_mins = daily_mins.drop('Dental') + daily_mins = daily_mins.drop('Nutrition') + daily_mins = daily_mins.groupby(daily_mins.index).sum() + + # Next we calculate the average HCW capabilities assuming capabilities increase yearly in line with population growth + yrly_hcw_time_cap = daily_mins * 365.25 + value = 1 + avg_rel_increase_summ[(0, 'central')].values # TODO: replace with average annual pop growth (SQ?) + n_times = 30 # number of times to multiply + steps = [(yrly_hcw_time_cap * (value ** i)) for i in range(n_times + 1)] # if you want to include original + pop_corrected_yearly_hcw_time_cap = sum(steps) / len(steps) + + # Now we calculate the ratio of time use to time available (by cadre) and summarise it + hcw_time_ratio_by_cadre = annual_hcw_time_by_cadre.div(pop_corrected_yearly_hcw_time_cap.iloc[:, 0], axis=0) + hcw_time_ratio_by_cadre.columns.names = ['draw', 'run'] + + hcw_time_ratio_by_cadre_summ = compute_summary_statistics(hcw_time_ratio_by_cadre, use_standard_error=True) + + return hcw_time_ratio_by_cadre_summ + +# ==================================== CONSUMABLE COST BY SCENARIO (AND DIFFS) ======================================== +# https://github.com/UCL/TLOmodel/blob/ec8929949c694b3a503d34051575f0dc7e7a32c3/src/scripts/comparison_of_horizontal_and_vertical_programs/economic_analysis_for_manuscript/roi_analysis_horizontal_vs_vertical.py#L45 +# 606 - 635, 1329-1348 + +TARGET_PERIOD = (Date(2025, 1, 1), Date(2054, 12, 31)) + +list_of_relevant_years_for_costing = list(range(TARGET_PERIOD[0].year, TARGET_PERIOD[-1].year + 1)) + +input_costs = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, + _years=list_of_relevant_years_for_costing, + cost_only_used_staff=True, + _discount_rate = 0.03) + +# --------------------------- Adjust HCW costs based on average difference in HCW use --------------------------------- +# Get ratio of time use by cadre +hcw_ratios = get_ratios() + +# Multiply the HCW cost estimates by ratios +central_df = hcw_ratios.xs('central', axis=1, level=1) +# Function to safely get multiplier +def get_multiplier(row): + subgroup = row['cost_subgroup'] + draw = row['draw'] + if subgroup in central_df.index and draw in central_df.columns: + return central_df.loc[subgroup, draw] + else: + return 1.0 # or np.nan, or row['cost'] unmodified depending on your logic +input_costs['cost'] = input_costs.apply(lambda row: row['cost'] * get_multiplier(row), axis=1) + +# Sum the total +total_input_cost = input_costs.groupby(['draw', 'run'])['cost'].sum() +total_input_cost_annual = total_input_cost / 30 + +def find_difference_relative_to_comparison(_ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return _ser \ + .unstack(level=0) \ + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ + .drop(columns=([comparison] if drop_comparison else [])) \ + .stack() + +incremental_scenario_cost_annual = (pd.DataFrame( + find_difference_relative_to_comparison( + total_input_cost_annual, + comparison=0) # sets the comparator to 0 which is the Actual scenario +).T.iloc[0].unstack()).T + +# Plot incremental costs +incremental_scenario_cost_annual_summarized = summarize_cost_data(incremental_scenario_cost_annual) + + +def figure_avg_difference_in_cost_from_status_quo_per_year(cost_data): + name_of_plot = 'Incremental scenario cost relative to baseline during intervention period' + + # === Error bars === + yerr = np.array([ + (cost_data['mean'] - cost_data['lower']).values, + (cost_data['upper'] - cost_data['mean']).values, + ]) + + spacing = 1.55 # increase this value for more spacing + xticks = {(i * spacing): k for i, k in enumerate(cost_data.index)} + fig, ax = plt.subplots(figsize=(10, 5)) + + # === Color mapping === + scenario_ids = cost_data.index.tolist() + n_scenarios = len(scenario_ids) + + palette = sns.color_palette("husl", n_colors=n_scenarios) + step = 10 + spread_indices = [(i * step) % n_scenarios for i in range(n_scenarios)] + spread_palette = [palette[i] for i in spread_indices] + + color_map = {s: spread_palette[i] for i, s in enumerate(scenario_ids)} + colors = [color_map[s] for s in scenario_ids] + + # === Bar chart === + ax.bar( + xticks.keys(), + cost_data['mean'].values, + yerr=yerr, + ecolor='black', + capsize=10, + label=[str(s) for s in scenario_ids], + color=colors, + ) + + # === Format for currency annotation === + def format_currency(val): + if abs(val) >= 1e9: + return f"${val / 1e9:.1f}B" + else: + return f"${val / 1e6:.0f}M" + + # === Annotate bars === + for xpos, mean, lower, upper in zip( + xticks.keys(), + cost_data['mean'].values, + cost_data['lower'].values, + cost_data['upper'].values + ): + text = format_currency(mean) + if mean >= 0: + annotation_y = upper + 0.02 * 1e9 + valign = 'bottom' + else: + annotation_y = lower - 0.02 * 1e9 + valign = 'top' + + ax.text( + xpos, + annotation_y, + text, + ha='center', + va=valign, + fontsize='x-small', + rotation='horizontal' + ) + + # === Axis + Labels === + ax.set_xticks(list(xticks.keys())) + ax.set_xticklabels(full_lab.values()) # Assumes full_lab is defined externally + plt.xticks(rotation=90, fontsize=7) + + ax.grid(axis='both', linestyle='--', linewidth=0.5, alpha=0.7) + ax.axhline(0, color='gray', linewidth=0.5, linestyle='--') + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + + ax.set_ylabel('Difference in annual cost') + ax.set_ylim(bottom=-0.25 * 1e9) + + fig.tight_layout(pad=2.0) + plt.subplots_adjust(left=0.15, right=0.85) + + # === Save + Show === + fig.savefig(Path(g_path) / name_of_plot.replace(' ', '_').replace(',', ''), bbox_inches='tight') + plt.show() + + +figure_avg_difference_in_cost_from_status_quo_per_year(incremental_scenario_cost_annual_summarized) + diff --git a/src/scripts/service_integration/service_integration_scenario.py b/src/scripts/service_integration/service_integration_scenario.py new file mode 100644 index 0000000000..2da954d95d --- /dev/null +++ b/src/scripts/service_integration/service_integration_scenario.py @@ -0,0 +1,73 @@ +from tlo import Date, logging + +from tlo.methods import service_integration +from tlo.methods.fullmodel import fullmodel + +from tlo.scenario import BaseScenario + + +class ServiceIntegrationScenario(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 537184 + self.start_date = Date(2010, 1, 1) + self.end_date = Date(2050, 1, 1) + self.pop_size = 100_000 + self.number_of_draws = 13 + self.runs_per_draw = 10 + + def log_configuration(self): + return { + 'filename': 'service_integration_scenario', 'directory': './outputs', + "custom_levels": { + "*": logging.WARNING, + "tlo.methods.contraception": logging.INFO, + "tlo.methods.cardio_metabolic_disorders": logging.INFO, + "tlo.methods.demography": logging.INFO, + "tlo.methods.depression": logging.INFO, + "tlo.methods.epilepsy": logging.INFO, + "tlo.methods.hiv": logging.INFO, + "tlo.methods.healthsystem.summary": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + "tlo.methods.tb": logging.INFO, + "tlo.methods.labour": logging.INFO, + "tlo.methods.labour.detail": logging.INFO, + "tlo.methods.newborn_outcomes": logging.INFO, + "tlo.methods.care_of_women_during_pregnancy": logging.INFO, + "tlo.methods.pregnancy_supervisor": logging.INFO, + "tlo.methods.postnatal_supervisor": logging.INFO, + "tlo.methods.stunting": logging.INFO, + } + } + + def modules(self): + return [*fullmodel(resourcefilepath=self.resources), + service_integration.ServiceIntegration(resourcefilepath=self.resources)] + + def draw_parameters(self, draw_number, rng): + + params_all = {'ServiceIntegration':{'integration_year': 2020}} + params_oth = {1: {'serv_int_chronic': True}, + 2: {'serv_int_screening': ['htn']}, + 3: {'serv_int_screening': ['dm']}, + 4: {'serv_int_screening': ['hiv']}, + 5: {'serv_int_screening': ['tb']}, + 6: {'serv_int_screening': ['fp']}, + 7: {'serv_int_screening': ['mal']}, + 8: {'serv_int_screening': ['htn', 'dm', 'hiv', 'tb', 'fp', 'mal']}, + 9: {'serv_int_mch': ['pnc']}, + 10: {'serv_int_mch': ['fp']}, + 11: {'serv_int_mch': ['pnc', 'fp']}, + 12: {'serv_int_chronic': True, + 'serv_int_screening': ['htn', 'dm', 'hiv', 'tb', 'fp', 'mal'], + 'serv_int_mch': ['pnc', 'fp']}} + + if draw_number == 0: + return params_all + else: + params_all['ServiceIntegration'].update(params_oth[draw_number]) + return params_all + +if __name__ == '__main__': + from tlo.cli import scenario_run + scenario_run([__file__]) diff --git a/src/tlo/methods/cardio_metabolic_disorders.py b/src/tlo/methods/cardio_metabolic_disorders.py index 2905da34e3..1a64bfb1eb 100644 --- a/src/tlo/methods/cardio_metabolic_disorders.py +++ b/src/tlo/methods/cardio_metabolic_disorders.py @@ -20,10 +20,10 @@ import numpy as np import pandas as pd -from tlo import DAYS_IN_YEAR, DateOffset, Module, Parameter, Property, Types, logging +from tlo import Date, DAYS_IN_YEAR, DateOffset, Module, Parameter, Property, Types, logging from tlo.events import Event, IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent from tlo.lm import LinearModel, LinearModelType, Predictor -from tlo.methods import Metadata +from tlo.methods import Metadata, hiv from tlo.methods import demography as de from tlo.methods.causes import Cause from tlo.methods.dxmanager import DxTest @@ -1473,6 +1473,15 @@ def __init__(self, module, person_id, conditions_to_investigate: List, has_any_c super().__init__(module, person_id=person_id) self.TREATMENT_ID = "CardioMetabolicDisorders_Investigation" + + if conditions_to_investigate: + if 'hypertension' in conditions_to_investigate and 'diabetes' not in conditions_to_investigate: + self.TREATMENT_ID = "CardioMetabolicDisorders_Investigation_hypertension" + elif'hypertension' not in conditions_to_investigate and 'diabetes' in conditions_to_investigate: + self.TREATMENT_ID = "CardioMetabolicDisorders_Investigation_diabetes" + elif 'hypertension' in conditions_to_investigate and 'diabetes' in conditions_to_investigate: + self.TREATMENT_ID = "CardioMetabolicDisorders_Investigation_hypertension_and_diabetes" + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({"Over5OPD": 1}) self.ACCEPTED_FACILITY_LEVEL = '1b' self.conditions_to_investigate = conditions_to_investigate @@ -1587,6 +1596,10 @@ def __init__(self, module, person_id, condition): super().__init__(module, person_id=person_id) self.TREATMENT_ID = 'CardioMetabolicDisorders_Prevention_WeightLoss' + + if condition: + self.TREATMENT_ID = 'CardioMetabolicDisorders_Prevention_WeightLoss' + f'_{condition}' + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({'Over5OPD': 1}) self.ACCEPTED_FACILITY_LEVEL = '1b' @@ -1670,6 +1683,10 @@ def __init__(self, module, person_id, condition): super().__init__(module, person_id=person_id) self.TREATMENT_ID = 'CardioMetabolicDisorders_Treatment' + + if condition: + self.TREATMENT_ID = 'CardioMetabolicDisorders_Treatment' + f'_{condition}' + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({'Over5OPD': 1}) self.ACCEPTED_FACILITY_LEVEL = '1b' @@ -1732,6 +1749,39 @@ def apply(self, person_id, squeeze_factor): priority=1 ) + # todo additional screening if chronic care implemented + if 'ServiceIntegration' in self.sim.modules: + if (self.sim.date >= Date(self.sim.modules['ServiceIntegration'].parameters['integration_year'], 1, 1)) and \ + self.sim.modules['ServiceIntegration'].parameters['serv_integration'].startswith(("chronic_care", "all_int")): + self.additional_screening(person_id) + + # todo - linkage to HIV testing, depression + def additional_screening(self, person_id): + df = self.sim.population.props + + # link to HIV testing + # do not run if already HIV diagnosed or had test in last week + if 'Hiv' in self.sim.modules: + + if not df.at[person_id, "hv_diagnosed"] or (df.at[person_id, "hv_last_test_date"] >= (self.sim.date - DateOffset(days=7))): + self.sim.modules["HealthSystem"].schedule_hsi_event( + hsi_event=hiv.HSI_Hiv_TestAndRefer( + person_id=person_id, + module=self.sim.modules["Hiv"], + referred_from="Integrated_CMD", + ), + priority=1, + topen=self.sim.date, + tclose=None, + ) + + # link to depression screening + if 'Depression' in self.sim.modules: + # if not dx and currently on anti-depressants + # call for depression check which + if not df.at[person_id, 'de_on_antidepr']: + self.sim.modules['Depression'].do_on_presentation_to_care(person_id, hsi_event=self) + def did_not_run(self): # If this HSI event did not run, then the persons ceases to be taking medication person_id = self.target @@ -1751,7 +1801,7 @@ def __init__(self, module, person_id, events_to_investigate: List): super().__init__(module, person_id=person_id) assert isinstance(module, CardioMetabolicDisorders) - self.TREATMENT_ID = 'CardioMetabolicDisorders_Treatment' + self.TREATMENT_ID = 'CardioMetabolicDisorders_Treatment_Emergency' self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({'AccidentsandEmerg': 1}) self.ACCEPTED_FACILITY_LEVEL = '2' self.events_to_investigate = events_to_investigate diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py index e51a95fe74..a51929a374 100644 --- a/src/tlo/methods/consumables.py +++ b/src/tlo/methods/consumables.py @@ -34,7 +34,9 @@ def __init__(self, availability_data: pd.DataFrame = None, item_code_designations: pd.DataFrame = None, rng: np.random = None, - availability: str = 'default' + availability: str = 'default', + treatment_ids_overridden: list = None, + treatment_ids_overridden_avail: float = None, ) -> None: self._options_for_availability = { @@ -69,6 +71,10 @@ def __init__(self, # Create (and save pointer to) the `ConsumablesSummaryCounter` helper class self._summary_counter = ConsumablesSummaryCounter() + # save treatment ids to override consumable availability + self._treatment_ids_overridden = treatment_ids_overridden + self._treatment_ids_overridden_avail = treatment_ids_overridden_avail + @property def availability(self): """Returns the internally stored value for the assumption of availability of consumables.""" @@ -197,12 +203,32 @@ def _determine_default_return_value(cons_availability, default_return_value): else: return default_return_value + def _update_internal_cons_list_for_treat_id_avail(self, + treatment_ids: list, + avail: float): + """This is a private function called by _set_availability_for_treatment_ids in health system. It updates the + self._treatment_ids_overridden which is the list of treatment ids for which consumable availability is being + overridden. Also updates _override_avail_treatment_ids which stores the probability of availability for + those treatment_ids + + :param treatment_ids: The treatment ids which should have availability overridden (list) + :param avail: The probability of availability in those treatment_ids (float) + """ + if not treatment_ids: + self._treatment_ids_overridden = [] + else: + for treatment_id in treatment_ids: + if treatment_id not in self._treatment_ids_overridden: + self._treatment_ids_overridden.append(treatment_id) + + self._override_avail_treatment_ids = avail + def _request_consumables(self, facility_info: 'FacilityInfo', # noqa: F821 essential_item_codes: dict, optional_item_codes: Optional[dict] = None, to_log: bool = True, - treatment_id: Optional[str] = None + treatment_id: Optional[str] = None, ) -> dict: """This is a private function called by 'get_consumables` in the `HSI_Event` base class. It queries whether item_codes are currently available at a particular Facility_ID and logs the request. @@ -212,6 +238,7 @@ def _request_consumables(self, :param optional_item_codes: dict of the form {: } for the optional items requested :param to_log: whether the request is logged. :param treatment_id: the TREATMENT_ID of the HSI (which is entered to the log, if provided). + :param override_hsi: list of treatment IDs for which consumable availability is set at 100%. :return: dict of the form {: } indicating the availability of each item requested. """ # If optional_item_codes is None, treat it as an empty dictionary @@ -223,8 +250,15 @@ def _request_consumables(self, if len(not_recognised_item_codes) > 0: self._not_recognised_item_codes[treatment_id] |= not_recognised_item_codes - # Look-up whether each of these items is available in this facility currently: - available = self._lookup_availability_of_consumables(item_codes=_all_item_codes, facility_info=facility_info) + # Check if the availability of consumables for this treatment id has been overridden + avail_overridden = False + if treatment_id: + if treatment_id in self._treatment_ids_overridden: + avail_overridden = True + + # Look-up whether each of these items is available in this facility currently.: + available = self._lookup_availability_of_consumables(item_codes=_all_item_codes, facility_info=facility_info, + avail_overridden=avail_overridden) # Log the request and the outcome: if to_log: @@ -253,9 +287,23 @@ def _request_consumables(self, # Return the result of the check on availability return available + def _return_available(self, + item_codes: dict, + forced_avail: Optional[bool] = None) -> dict: + """Returns a dictionary with availability of item codes when availability probability is being overridden. If + set_avail is not predetermined as True/False then availability is determined by + _treatment_ids_overridden_avail""" + + if forced_avail is not None: + return {_i: forced_avail for _i in item_codes} + else: + check_avail = self._rng.random_sample() < self._treatment_ids_overridden_avail + return {_i: check_avail for _i in item_codes} + def _lookup_availability_of_consumables(self, facility_info: 'FacilityInfo', # noqa: F821 - item_codes: dict + item_codes: dict, + avail_overridden: bool, ) -> dict: """Lookup whether a particular item_code is in the set of available items for that facility (in `self._is_available`). If any code is not recognised, use the `_is_unknown_item_available`.""" @@ -266,15 +314,23 @@ def _lookup_availability_of_consumables(self, # is running with `disable=True`. Therefore, assume the consumable is available if the overall # availability assumption is 'all' or 'default', and not otherwise. if self.availability in ('all', 'default'): - return {_i: True for _i in item_codes} + # returns true for all item codes + return self._return_available(item_codes, forced_avail=True) else: - return {_i: False for _i in item_codes} + # returns false for all item codes + return self._return_available(item_codes, forced_avail=False) + + # If availability is overridden for this treatment id then all items will be set as available. + if avail_overridden: + # Checks if item codes will be available using random draw against set availability prob + return self._return_available(item_codes, forced_avail=None) + else: + for _i in item_codes.keys(): + if _i in self.item_codes: + avail.update({_i: _i in self._is_available[facility_info.id]}) + else: + avail.update({_i: self._is_unknown_item_available[facility_info.id]}) - for _i in item_codes.keys(): - if _i in self.item_codes: - avail.update({_i: _i in self._is_available[facility_info.id]}) - else: - avail.update({_i: self._is_unknown_item_available[facility_info.id]}) return avail def on_simulation_end(self): diff --git a/src/tlo/methods/contraception.py b/src/tlo/methods/contraception.py index 6f56a55cc7..447588accf 100644 --- a/src/tlo/methods/contraception.py +++ b/src/tlo/methods/contraception.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd +from numpy.core.numeric import True_ from tlo import Date, DateOffset, Module, Parameter, Property, Types, logging from tlo.analysis.utils import flatten_multi_index_series_into_dict_for_logging @@ -592,7 +593,7 @@ def pregnancy_with_contraception(): return processed_params - def update_params_for_interventions(self): + def update_params_for_interventions(self, initiation, after_birth): """Updates process parameters to enable FP interventions.""" processed_params = self.processed_params @@ -617,17 +618,23 @@ def contraception_initiation_after_birth_with_interv(p_start_after_birth_without p_start_after_birth_with_interv.mul(self.parameters['Interventions_PPFP'].loc[0]) # Return reduced prob of 'not_using' - p_start_after_birth_with_interv = pd.Series((1.0 - p_start_after_birth_with_interv.sum()), - index=['not_using']).append(p_start_after_birth_with_interv) + # p_start_after_birth_with_interv = pd.Series((1.0 - p_start_after_birth_with_interv.sum()), + # index=['not_using']).append(p_start_after_birth_with_interv) + + p_start_after_birth_with_interv = pd.concat([pd.Series((1.0 - p_start_after_birth_with_interv.sum()), + index=['not_using']), p_start_after_birth_with_interv]) return p_start_after_birth_with_interv - processed_params['p_start_per_month'] = \ - contraception_initiation_with_interv(processed_params['p_start_per_month']) - processed_params['p_start_after_birth_below30'] = \ - contraception_initiation_after_birth_with_interv(processed_params['p_start_after_birth_below30']) - processed_params['p_start_after_birth_30plus'] = \ - contraception_initiation_after_birth_with_interv(processed_params['p_start_after_birth_30plus']) + if initiation: + processed_params['p_start_per_month'] = \ + contraception_initiation_with_interv(processed_params['p_start_per_month']) + + if after_birth: + processed_params['p_start_after_birth_below30'] = \ + contraception_initiation_after_birth_with_interv(processed_params['p_start_after_birth_below30']) + processed_params['p_start_after_birth_30plus'] = \ + contraception_initiation_after_birth_with_interv(processed_params['p_start_after_birth_30plus']) return processed_params @@ -643,7 +650,8 @@ def select_contraceptive_following_birth(self, mother_id, mother_age): new_contraceptive = self.rng.choice(probs_30plus.index, p=probs_30plus.values) # Do the change in contraceptive - self.schedule_batch_of_contraceptive_changes(ids=[mother_id], old=['not_using'], new=[new_contraceptive]) + self.schedule_batch_of_contraceptive_changes(ids=[mother_id], old=['not_using'], new=[new_contraceptive], + on_birth=True) def get_item_code_for_each_contraceptive(self): """Get the item_code for each contraceptive and for contraceptive initiation.""" @@ -704,11 +712,12 @@ def get_item_code_for_each_contraceptive(self): return _cons_codes - def schedule_batch_of_contraceptive_changes(self, ids, old, new): + def schedule_batch_of_contraceptive_changes(self, ids, old, new, on_birth: bool = False): """Enact the change in contraception, either through editing properties instantaneously or by scheduling HSI. ids: pd.Index of the woman for whom the contraceptive state is changing old: iterable giving the corresponding contraceptive state being switched from new: iterable giving the corresponding contraceptive state being switched to + on_birth: bool. true if change sheduled postnatally It is assumed that even with the option `self.use_healthsystem=True` that switches to certain methods do not require the use of HSI (these are not in `states_that_may_require_HSI_to_switch_to`).""" @@ -742,7 +751,8 @@ def schedule_batch_of_contraceptive_changes(self, ids, old, new): hsi_event=HSI_Contraception_FamilyPlanningAppt( person_id=_woman_id, module=self, - new_contraceptive=_new + new_contraceptive=_new, + on_birth=on_birth ), # select start_date for 0 max day delay; start_date or later for >=1 max day delay: topen=random_date( @@ -916,7 +926,7 @@ def initiate(self, individuals_not_using: pd.Index): self.module.schedule_batch_of_contraceptive_changes( ids=list(will_initiate), old=['not_using'] * len(will_initiate), - new=list(will_initiate.values()) + new=list(will_initiate.values()), ) def discontinue_switch_or_continue(self, individuals_using: pd.Index): @@ -945,7 +955,7 @@ def discontinue_switch_or_continue(self, individuals_using: pd.Index): self.module.schedule_batch_of_contraceptive_changes( ids=will_stop_idx, old=df.loc[will_stop_idx, 'co_contraception'].values, - new=['not_using'] * len(will_stop_idx) + new=['not_using'] * len(will_stop_idx), ) # 2) -- Switches and Continuations for those who do not Discontinue: @@ -977,7 +987,7 @@ def discontinue_switch_or_continue(self, individuals_using: pd.Index): self.module.schedule_batch_of_contraceptive_changes( ids=new_co.index, old=df.loc[new_co.index, 'co_contraception'].values, - new=new_co.values + new=new_co.values, ) # Do the contraceptive "change" for those not switching (this is so that an HSI may be logged and if the HSI @@ -987,7 +997,7 @@ def discontinue_switch_or_continue(self, individuals_using: pd.Index): self.module.schedule_batch_of_contraceptive_changes( ids=continue_idx, old=current_contraception, - new=current_contraception + new=current_contraception, ) def update_pregnancy(self): @@ -1137,7 +1147,7 @@ class HSI_Contraception_FamilyPlanningAppt(HSI_Event, IndividualScopeEventMixin) """HSI event for the starting a contraceptive method, maintaining use of a method of a contraceptive, or switching between contraceptives.""" - def __init__(self, module, person_id, new_contraceptive): + def __init__(self, module, person_id, new_contraceptive, on_birth): super().__init__(module, person_id=person_id) _facility_level = '2' if new_contraceptive in ('implant', 'female_sterilization') else '1a' @@ -1145,7 +1155,11 @@ def __init__(self, module, person_id, new_contraceptive): self.new_contraceptive = new_contraceptive self._number_of_times_run = 0 - self.TREATMENT_ID = "Contraception_Routine" + if on_birth: + self.TREATMENT_ID = "Contraception_Routine_Postnatal" + else: + self.TREATMENT_ID = "Contraception_Routine" + self.ACCEPTED_FACILITY_LEVEL = _facility_level current_method = self.sim.population.props.loc[person_id].co_contraception self.EXPECTED_APPT_FOOTPRINT = self._get_appt_footprint(current_method) @@ -1309,8 +1323,15 @@ def __init__(self, module): def apply(self, population): - # Update module parameters to enable interventions - self.module.processed_params = self.module.update_params_for_interventions() + # if ('fp' in self.sim.modules['ServiceIntegration'].parameters['serv_int_screening'] and 'fp' + # not in self.sim.modules['ServiceIntegration'].parameters['serv_int_mch']): + # after_birth = False + # if ('fp' in self.sim.modules['ServiceIntegration'].parameters['serv_int_mch'] and 'fp' not + # in self.sim.modules['ServiceIntegration'].parameters['serv_int_screening']): + # initiation = False + + self.module.processed_params = self.module.update_params_for_interventions(initiation=True, + after_birth=True) # ----------------------------------------------------------------------------------------------------------- diff --git a/src/tlo/methods/depression.py b/src/tlo/methods/depression.py index f198c4699c..a1cfe9946c 100644 --- a/src/tlo/methods/depression.py +++ b/src/tlo/methods/depression.py @@ -9,10 +9,10 @@ import numpy as np import pandas as pd -from tlo import DateOffset, Module, Parameter, Property, Types, logging +from tlo import Date, DateOffset, Module, Parameter, Property, Types, logging from tlo.events import Event, IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent from tlo.lm import LinearModel, LinearModelType, Predictor -from tlo.methods import Metadata +from tlo.methods import Metadata, hiv from tlo.methods.causes import Cause from tlo.methods.dxmanager import DxTest from tlo.methods.hsi_event import HSI_Event @@ -583,6 +583,10 @@ def _check_for_suspected_depression( "pr_assessed_for_depression_for_perinatal_female" ]: # module labour return True + elif treatment_id.startswith(("Hiv_Treatment", "CardioMetabolicDisorders_Treatment", + "Epilepsy_Treatment_Followup")): + # this is only scheduled if integrated chronic care clinics are operationalised + return True else: raise NotImplementedError return False @@ -633,6 +637,7 @@ def do_when_suspected_depression( :param diagnosis_function: A function capable of running diagnosis checks on the population. :param hsi_event: The HSI_Event that triggered this call. """ + if diagnosis_function is None: assert isinstance( hsi_event, HSI_Event @@ -696,6 +701,36 @@ def do_at_generic_first_appt_emergency( schedule_hsi_event=schedule_hsi_event, ) + def additional_screening(self, person_id): + df = self.sim.population.props + + # link to HIV testing + # do not run if already HIV diagnosed or had test in last week + if 'Hiv' in self.sim.modules: + + if not df.at[person_id, "hv_diagnosed"] or ( + df.at[person_id, "hv_last_test_date"] >= (self.sim.date - DateOffset(days=7))): + self.sim.modules["HealthSystem"].schedule_hsi_event( + hsi_event=hiv.HSI_Hiv_TestAndRefer( + person_id=person_id, + module=self.sim.modules["Hiv"], + referred_from="Integrated_Depression", + ), + priority=1, + topen=self.sim.date, + tclose=None, + ) + + # link to CMD screening + if "CardioMetabolicDisorders" in self.sim.modules: + individual_properties = df.loc[person_id] + symptoms = self.sim.modules["SymptomManager"].has_what(person_id) + schedule_hsi_event = self.sim.modules["HealthSystem"].schedule_hsi_event + + self.sim.modules['CardioMetabolicDisorders'].do_at_generic_first_appt(person_id=person_id, + individual_properties=individual_properties, + symptoms=symptoms, + schedule_hsi_event=schedule_hsi_event) # --------------------------------------------------------------------------------------------------------- # DISEASE MODULE EVENTS @@ -1020,6 +1055,15 @@ def apply(self, person_id, squeeze_factor): # If medication was not available, the persons ceases to be taking antidepressants df.at[person_id, 'de_on_antidepr'] = False + if 'ServiceIntegration' in self.sim.modules: + if (self.sim.date >= Date(self.sim.modules['ServiceIntegration'].parameters['integration_year'], 1, + 1)) and \ + self.sim.modules['ServiceIntegration'].parameters['serv_integration'].startswith( + ("chronic_care", "all_int")): + self.module.additional_screening(person_id=person_id) + + + def did_not_run(self): # If this HSI event did not run, then the persons ceases to be taking antidepressants person_id = self.target diff --git a/src/tlo/methods/enhanced_lifestyle.py b/src/tlo/methods/enhanced_lifestyle.py index 11de3286e1..4872e64baf 100644 --- a/src/tlo/methods/enhanced_lifestyle.py +++ b/src/tlo/methods/enhanced_lifestyle.py @@ -659,13 +659,13 @@ def update_all_properties(self, df): :param df: The population dataframe """ # get months since last poll now = self.module.sim.date - months_since_last_poll = round((now - self.date_last_run) / np.timedelta64(1, "M")) + days_since_last_poll = round((now - self.date_last_run) / np.timedelta64(1, "D")) # loop through linear models dictionary and initialise each property in the population dataframe for _property_name, _model in self._models.items(): if _model['update'] is not None: df.loc[df.is_alive, _property_name] = _model['update'].predict( df.loc[df.is_alive], rng=self.rng, other=self.module.sim.date, - months_since_last_poll=months_since_last_poll) + months_since_last_poll=round(days_since_last_poll / 30.5)) # update date last event run self.date_last_run = now diff --git a/src/tlo/methods/epi.py b/src/tlo/methods/epi.py index 4bc298aefc..0cbd6e489e 100644 --- a/src/tlo/methods/epi.py +++ b/src/tlo/methods/epi.py @@ -190,7 +190,7 @@ def initialise_simulation(self, sim): # Look up item codes for consumables self.get_item_codes() - # Check that the values enetered for 'prob_facility_level_for_vaccine' sum to 1.0 + # Check that the values entered for 'prob_facility_level_for_vaccine' sum to 1.0 probs = self.parameters['prob_facility_level_for_vaccine'] assert all(np.isfinite(probs)) and np.isclose(sum(probs), 1.0) diff --git a/src/tlo/methods/epilepsy.py b/src/tlo/methods/epilepsy.py index 0639e6efd0..8c61f602ad 100644 --- a/src/tlo/methods/epilepsy.py +++ b/src/tlo/methods/epilepsy.py @@ -6,9 +6,9 @@ import numpy as np import pandas as pd -from tlo import DateOffset, Module, Parameter, Property, Types, logging +from tlo import Date, DateOffset, Module, Parameter, Property, Types, logging from tlo.events import IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent -from tlo.methods import Metadata +from tlo.methods import Metadata, hiv from tlo.methods.causes import Cause from tlo.methods.demography import InstantaneousDeath from tlo.methods.hsi_event import HSI_Event @@ -424,6 +424,43 @@ def do_at_generic_first_appt_emergency( event = HSI_Epilepsy_Start_Anti_Epileptic(person_id=person_id, module=self) schedule_hsi_event(event, priority=0, topen=self.sim.date) + def additional_screening(self, person_id, hsi_event): + df = self.sim.population.props + + # link to HIV testing + # do not run if already HIV diagnosed or had test in last week + if 'Hiv' in self.sim.modules: + + if not df.at[person_id, "hv_diagnosed"] or ( + df.at[person_id, "hv_last_test_date"] >= (self.sim.date - DateOffset(days=7))): + self.sim.modules["HealthSystem"].schedule_hsi_event( + hsi_event=hiv.HSI_Hiv_TestAndRefer( + person_id=person_id, + module=self.sim.modules["Hiv"], + referred_from="Integrated_Depression", + ), + priority=1, + topen=self.sim.date, + tclose=None, + ) + + # link to CMD screening + if "CardioMetabolicDisorders" in self.sim.modules: + individual_properties = df.loc[person_id] + symptoms = self.sim.modules["SymptomManager"].has_what(person_id) + schedule_hsi_event = self.sim.modules["HealthSystem"].schedule_hsi_event + + self.sim.modules['CardioMetabolicDisorders'].do_at_generic_first_appt(person_id=person_id, + individual_properties=individual_properties, + symptoms=symptoms, + schedule_hsi_event=schedule_hsi_event) + + # link to depression screening + if 'Depression' in self.sim.modules: + # if not dx and currently on anti-depressants + # call for depression check which + if not df.at[person_id, 'de_on_antidepr']: + self.sim.modules['Depression'].do_on_presentation_to_care(person_id, hsi_event=hsi_event) class EpilepsyEvent(RegularEvent, PopulationScopeEventMixin): """The regular event that actually changes individuals' epilepsy status @@ -753,3 +790,10 @@ def apply(self, person_id, squeeze_factor): # No medicine is available and the maximum number of repeats has been reached: The person will default # to being off the anti-epileptics and no further follow-ups are scheduled. df.at[person_id, 'ep_antiep'] = False + + if 'ServiceIntegration' in self.sim.modules: + if (self.sim.date >= Date(self.sim.modules['ServiceIntegration'].parameters['integration_year'], 1, + 1)) and \ + self.sim.modules['ServiceIntegration'].parameters['serv_integration'].startswith( + ("chronic_care", "all_int")): + self.module.additional_screening(person_id=person_id, hsi_event=self) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 3fda2f09a6..c4c980e50b 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -191,6 +191,14 @@ class HealthSystem(Module): " When using 'all' or 'none', requests for consumables are not logged. NB. This parameter is over-ridden" "if an argument is provided to the module initialiser." "Note that other options are also available: see the `Consumables` class."), + 'cons_override_treatment_ids': Parameter( + Types.LIST, + "Consumable availability within any treatment ids listed in this parameter will be set at to a " + "given probabilty stored in override_treatment_ids_avail. By default this list is empty"), + 'override_treatment_ids_avail': Parameter( + Types.REAL, + "Probability that consumables for treatment ids listed in cons_override_treatment_ids will be " + "available"), # Infrastructure and Equipment 'BedCapacity': Parameter( @@ -670,7 +678,10 @@ def pre_initialise_population(self): self.parameters['availability_estimates']), item_code_designations=self.parameters['consumables_item_designations'], rng=rng_for_consumables, - availability=self.get_cons_availability() + availability=self.get_cons_availability(), + treatment_ids_overridden=self.parameters['cons_override_treatment_ids'], + treatment_ids_overridden_avail=self.parameters['override_treatment_ids_avail'], + ) # We don't need to hold onto this large dataframe del self.parameters['availability_estimates'] @@ -1931,6 +1942,36 @@ def override_availability_of_consumables(self, item_codes) -> None: """ self.consumables.override_availability(item_codes) + def set_availability_for_treatment_ids(self, treatment_ids: list, + availability: float = None) -> None: + """ + This function can be called by any module to update the treatment ids for which consumable availability should + be overridden and to provide a probability of availability (set at 1.0 if unchanged) + + :param treatment_ids: The treatment ids which should have availability overridden (list) + :param avail: The probability of availability in those treatment_ids (float) + :return: None + """ + + # If an empty list is passed to this function then treatment ids for which cons availability was previously + # being overriden are removed + if not treatment_ids: + self.parameters['cons_override_treatment_ids'] = [] + else: + # Otherwise the parameter is updated + for treatment_id in treatment_ids: + if treatment_id not in self.parameters['cons_override_treatment_ids']: + self.parameters['cons_override_treatment_ids'].append(treatment_id) + + # If an availability probability is provided that parameter is updated + if availability: + self.parameters['override_treatment_ids_avail'] = availability + + # Call internal cons function to update the cons 'owned' lists in which this information is stored + self.consumables._update_internal_cons_list_for_treat_id_avail(treatment_ids=self.parameters['cons_override_treatment_ids'], + avail=self.parameters['override_treatment_ids_avail']) + + def _write_hsi_event_counts_to_log_and_reset(self): logger_summary.info( key="hsi_event_counts", diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index ea9b0f50d3..fa50a2e530 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -80,6 +80,7 @@ def __init__(self, name=None, run_with_checks=False): OPTIONAL_INIT_DEPENDENCIES = {"HealthBurden"} + # ADDITIONAL_DEPENDENCIES = {'Tb', 'NewbornOutcomes', 'CardioMetabolicDisorders'} ADDITIONAL_DEPENDENCIES = {'Tb', 'NewbornOutcomes'} METADATA = { @@ -2848,6 +2849,13 @@ def apply(self, person_id, squeeze_factor): priority=0, ) + # todo additional screening if chronic care implemented + if 'ServiceIntegration' in self.sim.modules: + if (self.sim.date >= Date(self.sim.modules['ServiceIntegration'].parameters['integration_year'], 1, 1)) and \ + self.sim.modules['ServiceIntegration'].parameters['serv_integration'].startswith(("chronic_care", + "all_int")): + self.additional_screening(person_id) + def do_at_initiation(self, person_id): """Things to do when this the first appointment ART""" df = self.sim.population.props @@ -2992,6 +3000,30 @@ def consider_tb(self, person_id): person_id=person_id ) + # linkage to NCD, depression + def additional_screening(self, person_id): + df = self.sim.population.props + + individual_properties = df.loc[person_id] + symptoms = self.sim.modules["SymptomManager"].has_what(person_id) + schedule_hsi_event = self.sim.modules["HealthSystem"].schedule_hsi_event + + # link to CMD screening - this will automatically check for current diagnoses before scheduling + if "CardioMetabolicDisorders" in self.sim.modules: + + self.sim.modules['CardioMetabolicDisorders'].do_at_generic_first_appt(person_id=person_id, + individual_properties=individual_properties, + symptoms=symptoms, + schedule_hsi_event=schedule_hsi_event) + + # link to depression screening + if 'Depression' in self.sim.modules: + # if not dx and currently on anti-depressants + # call for depression check which + if not df.at[person_id, 'de_on_antidepr']: + + self.sim.modules['Depression'].do_on_presentation_to_care(person_id, hsi_event=self) + def never_ran(self): """This is called if this HSI was never run. * Default the person to being off ART. diff --git a/src/tlo/methods/labour.py b/src/tlo/methods/labour.py index 1fa531af4c..c85d0461a3 100644 --- a/src/tlo/methods/labour.py +++ b/src/tlo/methods/labour.py @@ -2242,6 +2242,76 @@ def do_at_generic_first_appt_emergency( tclose=self.sim.date + pd.DateOffset(days=1), ) + def update_labour_or_postnatal_coverage_for_analysis(self): + params = self.current_parameters + df = self.sim.population.props + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + mni_df = pd.DataFrame.from_dict(mni, orient='index') + pn_params = self.sim.modules['PostnatalSupervisor'].current_parameters + nb_params = self.sim.modules['NewbornOutcomes'].current_parameters + + # Check to see if analysis is being conducted when this event runs + if params['alternative_bemonc_availability'] or params['alternative_cemonc_availability'] or \ + params['alternative_pnc_coverage'] or params['alternative_pnc_quality'] or params['sba_sens_analysis_max'] \ + or params['pnc_sens_analysis_max'] or params['pnc_sens_analysis_min']: + + params['la_analysis_in_progress'] = True + + # If PNC analysis is being conducted we reset the intercept parameter of the equation determining care + # seeking for PNC and scale the model + if params['alternative_pnc_coverage']: + target = params['pnc_availability_odds'] + params['odds_will_attend_pnc'] = 1 + + women = df.loc[df.is_alive & (df.sex == 'F') & (df.age_years > 14) & (df.age_years < 50)] + mode_of_delivery = pd.Series(False, index=women.index) + delivery_setting = pd.Series(False, index=women.index) + + if 'mode_of_delivery' in mni_df.columns: + mode_of_delivery = pd.Series(mni_df['mode_of_delivery'], index=women.index) + if 'delivery_setting' in mni_df.columns: + delivery_setting = pd.Series(mni_df['delivery_setting'], index=women.index) + + mean = self.la_linear_models['postnatal_check'].predict( + df.loc[df.is_alive & (df.sex == 'F') & (df.age_years > 14) & (df.age_years < 50)], + year=self.sim.date.year, + mode_of_delivery=mode_of_delivery, + delivery_setting=delivery_setting).mean() + + mean = mean / (1.0 - mean) + scaled_intercept = 1.0 * (target / mean) if (target != 0 and mean != 0 and not np.isnan(mean)) else 1.0 + + params['odds_will_attend_pnc'] = scaled_intercept + + # Then override the parameters which control neonatal care seeking + cov_prob = params['pnc_availability_odds'] / (params['pnc_availability_odds'] + 1) + params['prob_timings_pnc'] = [1.0, 0] + + nb_params['prob_pnc_check_newborn'] = cov_prob + nb_params['prob_timings_pnc_newborns'] = [1.0, 0] + + if params['alternative_pnc_quality']: + nb_params['prob_kmc_available'] = params['pnc_availability_probability'] + params['prob_intervention_delivered_anaemia_assessment_pnc'] = params['pnc_availability_probability'] + + if params['pnc_sens_analysis_max'] or params['pnc_sens_analysis_min']: + self.la_linear_models['postnatal_check'] = LinearModel( + LinearModelType.MULTIPLICATIVE, + params['pnc_availability_probability']) + params['prob_timings_pnc'] = [params['pnc_availability_probability'], + 1 - params['pnc_availability_probability']] + params['prob_careseeking_for_complication_pn'] = params['pnc_availability_probability'] + pn_params['prob_care_seeking_postnatal_emergency'] = params['pnc_availability_probability'] + + nb_params['prob_pnc_check_newborn'] = params['pnc_availability_probability'] + nb_params['prob_timings_pnc_newborns'] = [params['pnc_availability_probability'], + 1 - params['pnc_availability_probability']] + nb_params['prob_care_seeking_for_complication'] = params['pnc_availability_probability'] + pn_params['prob_care_seeking_postnatal_emergency_neonate'] = params['pnc_availability_probability'] + + if params['sba_sens_analysis_max']: + params['odds_deliver_at_home'] = 0.0 + class LabourOnsetEvent(Event, IndividualScopeEventMixin): """ This is the LabourOnsetEvent. It is scheduled by the set_date_of_labour function for all women who are newly @@ -3236,74 +3306,8 @@ def __init__(self, module): super().__init__(module) def apply(self, population): - params = self.module.current_parameters - df = self.sim.population.props - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - mni_df = pd.DataFrame.from_dict(mni, orient='index') - pn_params = self.sim.modules['PostnatalSupervisor'].current_parameters - nb_params = self.sim.modules['NewbornOutcomes'].current_parameters - - # Check to see if analysis is being conducted when this event runs - if params['alternative_bemonc_availability'] or params['alternative_cemonc_availability'] or \ - params['alternative_pnc_coverage'] or params['alternative_pnc_quality'] or params['sba_sens_analysis_max'] \ - or params['pnc_sens_analysis_max'] or params['pnc_sens_analysis_min']: - - params['la_analysis_in_progress'] = True - - # If PNC analysis is being conducted we reset the intercept parameter of the equation determining care - # seeking for PNC and scale the model - if params['alternative_pnc_coverage']: - target = params['pnc_availability_odds'] - params['odds_will_attend_pnc'] = 1 - - women = df.loc[df.is_alive & (df.sex == 'F') & (df.age_years > 14) & (df.age_years < 50)] - mode_of_delivery = pd.Series(False, index=women.index) - delivery_setting = pd.Series(False, index=women.index) - - if 'mode_of_delivery' in mni_df.columns: - mode_of_delivery = pd.Series(mni_df['mode_of_delivery'], index=women.index) - if 'delivery_setting' in mni_df.columns: - delivery_setting = pd.Series(mni_df['delivery_setting'], index=women.index) - - mean = self.module.la_linear_models['postnatal_check'].predict( - df.loc[df.is_alive & (df.sex == 'F') & (df.age_years > 14) & (df.age_years < 50)], - year=self.sim.date.year, - mode_of_delivery=mode_of_delivery, - delivery_setting=delivery_setting).mean() - - mean = mean / (1.0 - mean) - scaled_intercept = 1.0 * (target / mean) if (target != 0 and mean != 0 and not np.isnan(mean)) else 1.0 - - params['odds_will_attend_pnc'] = scaled_intercept - - # Then override the parameters which control neonatal care seeking - cov_prob = params['pnc_availability_odds'] / (params['pnc_availability_odds'] + 1) - params['prob_timings_pnc'] = [1.0, 0] - - nb_params['prob_pnc_check_newborn'] = cov_prob - nb_params['prob_timings_pnc_newborns'] = [1.0, 0] - - if params['alternative_pnc_quality']: - params['prob_intervention_delivered_anaemia_assessment_pnc'] = params['pnc_availability_probability'] - if params['pnc_sens_analysis_max'] or params['pnc_sens_analysis_min']: - - self.module.la_linear_models['postnatal_check'] = LinearModel( - LinearModelType.MULTIPLICATIVE, - params['pnc_availability_probability']) - params['prob_timings_pnc'] = [params['pnc_availability_probability'], - 1 - params['pnc_availability_probability']] - params['prob_careseeking_for_complication_pn'] = params['pnc_availability_probability'] - pn_params['prob_care_seeking_postnatal_emergency'] = params['pnc_availability_probability'] - - nb_params['prob_pnc_check_newborn'] = params['pnc_availability_probability'] - nb_params['prob_timings_pnc_newborns'] = [params['pnc_availability_probability'], - 1 - params['pnc_availability_probability']] - nb_params['prob_care_seeking_for_complication'] = params['pnc_availability_probability'] - pn_params['prob_care_seeking_postnatal_emergency_neonate'] = params['pnc_availability_probability'] - - if params['sba_sens_analysis_max']: - params['odds_deliver_at_home'] = 0.0 + self.module.update_labour_or_postnatal_coverage_for_analysis() class LabourLoggingEvent(RegularEvent, PopulationScopeEventMixin): diff --git a/src/tlo/methods/pregnancy_helper_functions.py b/src/tlo/methods/pregnancy_helper_functions.py index 76076a5dcf..01531fa9c4 100644 --- a/src/tlo/methods/pregnancy_helper_functions.py +++ b/src/tlo/methods/pregnancy_helper_functions.py @@ -101,6 +101,40 @@ def check_int_deliverable(self, int_name, hsi_event, p_params = self.sim.modules['PregnancySupervisor'].current_parameters l_params = self.sim.modules['Labour'].current_parameters + def check_int_can_run_no_analysis(): + # If analysis is not being conducted, intervention delivery is dependent on quality parameters, consumable + # availability and dx_test results + quality = False + consumables = False + test = False + + if ((q_param is None) or + all([self.rng.random_sample() < value for value in q_param])): + quality = True + + # todo: should this only be if qual and cons are also true? + if equipment is not None: + hsi_event.add_equipment(equipment) + + if ((cons is None) or + (hsi_event.get_consumables(item_codes=cons if not None else [], + optional_item_codes=opt_cons if not None else []))): + consumables = True + + if cons is None and opt_cons is not None: + hsi_event.get_consumables(item_codes=[], optional_item_codes=opt_cons) + + if ((dx_test is None) or + (self.sim.modules['HealthSystem'].dx_manager.run_dx_test(dx_tests_to_run=dx_test, hsi_event=hsi_event))): + test = True + + if quality and consumables and test: + return True + + else: + return False + + # assert int_name in p_params['all_interventions'] # Firstly, we determine if an analysis is currently being conducted during which the probability of intervention @@ -160,43 +194,14 @@ def check_int_deliverable(self, int_name, hsi_event, if (hsi_event.TREATMENT_ID == k) and params[analysis_dict[k][0]]: if self.rng.random_sample() < params[analysis_dict[k][1]]: return True - else: return False - else: + return check_int_can_run_no_analysis() - # If analysis is not being conducted, intervention delivery is dependent on quality parameters, consumable - # availability and dx_test results - quality = False - consumables = False - test = False - - if ((q_param is None) or - all([self.rng.random_sample() < value for value in q_param])): - quality = True - - # todo: should this only be if qual and cons are also true? - if equipment is not None: - hsi_event.add_equipment(equipment) - - if ((cons is None) or - (hsi_event.get_consumables(item_codes=cons if not None else [], - optional_item_codes=opt_cons if not None else []))): - consumables = True - - if cons is None and opt_cons is not None: - hsi_event.get_consumables(item_codes=[], optional_item_codes=opt_cons) - - if ((dx_test is None) or - (self.sim.modules['HealthSystem'].dx_manager.run_dx_test(dx_tests_to_run=dx_test, hsi_event=hsi_event))): - test = True - - if quality and consumables and test: - return True + else: + return check_int_can_run_no_analysis() - else: - return False def scale_linear_model_at_initialisation(self, model, parameter_key): diff --git a/src/tlo/methods/pregnancy_supervisor.py b/src/tlo/methods/pregnancy_supervisor.py index 1be38175f7..867a4e26ac 100644 --- a/src/tlo/methods/pregnancy_supervisor.py +++ b/src/tlo/methods/pregnancy_supervisor.py @@ -1692,6 +1692,77 @@ def do_at_generic_first_appt_emergency( ) schedule_hsi_event(event, **scheduling_options) + def update_antenatal_care_coverage_for_analysis(self): + params = self.current_parameters + df = self.sim.population.props + + # Check if either of the analysis parameters are set to True + if params['alternative_anc_coverage'] or \ + params['alternative_anc_quality'] or \ + params['alternative_ip_anc_quality'] or \ + params['sens_analysis_max'] or \ + params['sens_analysis_min'] or \ + params['interventions_analysis']: + + # Update this parameter which is a signal used in the pregnancy_helper_function_file to ensure that + # alternative functionality for determining availability of interventions only occurs when analysis is + # occurring + params['ps_analysis_in_progress'] = True + + # When this parameter is set as True, the following parameters are overridden when the event is called. + # Otherwise no parameters are updated. + if params['alternative_anc_coverage']: + # Reset the intercept parameter of the equation determining care seeking for ANC4+ and scale the model + target = params['anc_availability_odds'] + params['odds_early_init_anc4'] = 1 + mean = self.ps_linear_models['early_initiation_anc4'].predict( + df.loc[df.is_alive & (df.sex == 'F') & (df.age_years > 14) & (df.age_years < 50)], + year=self.sim.date.year).mean() + + mean = mean / (1.0 - mean) + scaled_intercept = 1.0 * (target / mean) if (target != 0 and mean != 0 and not np.isnan(mean)) else 1.0 + + # Update parameters that also control when women will initiate visits + params['odds_early_init_anc4'] = scaled_intercept + params['prob_anc1_months_2_to_4'] = [1.0, 0, 0] + params['prob_late_initiation_anc4'] = 0 + + # Finally, remove squeeze factor threshold for ANC attendance to ensure that higher levels of ANC + # coverage can be reached with current logic + self.sim.modules['CareOfWomenDuringPregnancy'].current_parameters['squeeze_factor_threshold_anc'] = \ + 10_000 + + if params['alternative_anc_quality'] or params['sens_analysis_max']: + + # Override the availability of IPTp consumables with the set level of coverage + if 'Malaria' in self.sim.modules: + iptp = self.sim.modules['Malaria'].item_codes_for_consumables_required['malaria_iptp'] + self.sim.modules['HealthSystem'].override_availability_of_consumables( + {iptp: params['anc_availability_probability']}) + + # And then override the quality parameters in the model + for parameter in ['prob_intervention_delivered_urine_ds', 'prob_intervention_delivered_bp', + 'prob_intervention_delivered_syph_test', 'prob_intervention_delivered_gdm_test']: + self.sim.modules['CareOfWomenDuringPregnancy'].current_parameters[parameter] = \ + params['anc_availability_probability'] + + if params['alternative_ip_anc_quality']: + self.sim.modules['CareOfWomenDuringPregnancy'].current_parameters['squeeze_factor_threshold_an'] = \ + 10_000 + + if params['sens_analysis_max']: + for parameter in ['prob_seek_anc5', 'prob_seek_anc6', 'prob_seek_anc7', 'prob_seek_anc8']: + self.sim.modules['CareOfWomenDuringPregnancy'].current_parameters[parameter] = 1.0 + + self.sim.modules['CareOfWomenDuringPregnancy'].current_parameters['squeeze_factor_threshold_anc'] = \ + 10_000 + + params['prob_seek_care_pregnancy_complication'] = 1.0 + self.sim.modules['CareOfWomenDuringPregnancy'].current_parameters['prob_adherent_ifa'] = 1.0 + + if params['sens_analysis_min']: + params['prob_seek_care_pregnancy_complication'] = 0.0 + class PregnancySupervisorEvent(RegularEvent, PopulationScopeEventMixin): """ This is the PregnancySupervisorEvent, it is a weekly event which has four primary functions. 1.) It updates the gestational age (in weeks) of all women who are pregnant @@ -2129,58 +2200,8 @@ def __init__(self, module): super().__init__(module) def apply(self, population): - params = self.module.current_parameters - df = self.sim.population.props - - # Check if either of the analysis parameters are set to True - if params['alternative_anc_coverage'] or \ - params['alternative_anc_quality'] or \ - params['alternative_ip_anc_quality'] or \ - params['sens_analysis_max'] or \ - params['sens_analysis_min'] or \ - params['interventions_analysis']: - - # Update this parameter which is a signal used in the pregnancy_helper_function_file to ensure that - # alternative functionality for determining availability of interventions only occurs when analysis is - # occurring - params['ps_analysis_in_progress'] = True - - # When this parameter is set as True, the following parameters are overridden when the event is called. - # Otherwise no parameters are updated. - if params['alternative_anc_coverage']: - - # Reset the intercept parameter of the equation determining care seeking for ANC4+ and scale the model - target = params['anc_availability_odds'] - params['odds_early_init_anc4'] = 1 - mean = self.module.ps_linear_models['early_initiation_anc4'].predict( - df.loc[df.is_alive & (df.sex == 'F') & (df.age_years > 14) & (df.age_years < 50)], - year=self.sim.date.year).mean() - mean = mean / (1.0 - mean) - scaled_intercept = 1.0 * (target / mean) if (target != 0 and mean != 0 and not np.isnan(mean)) else 1.0 - - # Update parameters that also control when women will initiate visits - params['odds_early_init_anc4'] = scaled_intercept - params['prob_anc1_months_2_to_4'] = [1.0, 0, 0] - params['prob_late_initiation_anc4'] = 0 - - if params['alternative_anc_quality'] or params['sens_analysis_max']: - - # Override the availability of IPTp consumables with the set level of coverage - if 'Malaria' in self.sim.modules: - iptp = self.sim.modules['Malaria'].item_codes_for_consumables_required['malaria_iptp'] - self.sim.modules['HealthSystem'].override_availability_of_consumables( - {iptp: params['anc_availability_probability']}) - - if params['sens_analysis_max']: - for parameter in ['prob_seek_anc5', 'prob_seek_anc6', 'prob_seek_anc7', 'prob_seek_anc8']: - self.sim.modules['CareOfWomenDuringPregnancy'].current_parameters[parameter] = 1.0 - - params['prob_seek_care_pregnancy_complication'] = 1.0 - self.sim.modules['CareOfWomenDuringPregnancy'].current_parameters['prob_adherent_ifa'] = 1.0 - - if params['sens_analysis_min']: - params['prob_seek_care_pregnancy_complication'] = 0.0 + self.module.update_antenatal_care_coverage_for_analysis() class PregnancyLoggingEvent(RegularEvent, PopulationScopeEventMixin): diff --git a/src/tlo/methods/service_integration.py b/src/tlo/methods/service_integration.py new file mode 100644 index 0000000000..0dc59ba073 --- /dev/null +++ b/src/tlo/methods/service_integration.py @@ -0,0 +1,307 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, List, Optional +from pathlib import Path + +import numpy as np +import pandas as pd + +from tlo import DAYS_IN_YEAR, Date, DateOffset, Module, Parameter, Property, Types, logging +from tlo.events import Event, IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent +from tlo.methods import Metadata +from tlo.methods.hsi_generic_first_appts import GenericFirstAppointmentsMixin +from tlo.util import read_csv_files +from tlo.lm import LinearModel, LinearModelType + +from tlo.methods.labour import LabourAndPostnatalCareAnalysisEvent +from tlo.methods.contraception import StartInterventions + +if TYPE_CHECKING: + from tlo.methods.hsi_generic_first_appts import HSIEventScheduler + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +class ServiceIntegration(Module, GenericFirstAppointmentsMixin): + """ + """ + + # Declare modules that need to be registered in simulation and initialised before + # this module + INIT_DEPENDENCIES = {'Demography'} + + + # Declare Metadata + METADATA = { + Metadata.DISEASE_MODULE, + Metadata.USES_SYMPTOMMANAGER, + Metadata.USES_HEALTHSYSTEM, + Metadata.USES_HEALTHBURDEN + } + + # Declare Causes of Death + CAUSES_OF_DEATH = {} + + # Declare Causes of Disability + CAUSES_OF_DISABILITY = { + } + + PARAMETERS = { + 'integration_year': Parameter(Types.INT, 'year on which parameters are overwritten for integration ' + 'modelling'), + 'serv_integration': + Parameter(Types.STRING, + 'name of the integration scenario to be enacted in a given run'), + } + + PROPERTIES = { + } + + def __init__(self, name=None, resourcefilepath=None): + # NB. Parameters passed to the module can be inserted in the __init__ definition. + + super().__init__(name) + self.resourcefilepath = resourcefilepath + + self.accepted_scenarios = ['htn', 'htn_max', 'dm', 'dm_max', 'hiv', 'hiv_max', 'tb', 'tb_max', + 'mal', 'mal_max', 'fp_scr', 'fp_scr_max', 'anc', 'anc_max', 'pnc', + 'pnc_max', 'fp_pn', 'fp_pn_max', 'epi', 'chronic_care', + 'chronic_care_max', 'all_screening', 'all_screening_max', + 'all_mch', 'all_mch_max', 'all_int', 'all_int_max'] + + def read_parameters(self, resourcefilepath: Optional[Path] = None): + parameter_dataframe = read_csv_files(resourcefilepath / 'service integration', + files='parameter_values') + self.load_parameters_from_dataframe(parameter_dataframe) + + def initialise_population(self, population): + """Set our property values for the initial population. + + This method is called by the simulation when creating the initial population, and is + responsible for assigning initial values, for every individual, of those properties + 'owned' by this module, i.e. those declared in the PROPERTIES dictionary above. + + :param population: the population of individuals + """ + + pass + + def initialise_simulation(self, sim): + + """Get ready for simulation start. + + This method is called just before the main simulation loop begins, and after all + modules have read their parameters and the initial population has been created. + It is a good place to add initial events to the event queue. + """ + + params = self.parameters + + event = ServiceIntegrationParameterUpdateEvent(self) + sim.schedule_event(event, Date(params['integration_year'], 1, 1)) + + def on_birth(self, mother_id, child_id): + """Initialise our properties for a newborn individual. + + This is called by the simulation whenever a new person is born. + + :param mother_id: the ID for the mother for this child + :param child_id: the ID for the new child + """ + + pass + + def on_hsi_alert(self, person_id, treatment_id): + """ + This is called whenever there is an HSI event commissioned by one of the other disease modules. + """ + + pass + + def report_daly_values(self): + # This must send back a pd.Series or pd.DataFrame that reports on the average daly-weights that have been + # experienced by persons in the previous month. Only rows for alive-persons must be returned. + # The names of the series of columns is taken to be the label of the cause of this disability. + # It will be recorded by the healthburden module as _. + + pass + + +class ServiceIntegrationParameterUpdateEvent(Event, PopulationScopeEventMixin): + def __init__(self, module): + super().__init__(module) + assert isinstance(module, ServiceIntegration) + + def apply(self, population): + params = self.module.parameters + treat_ids_to_override = [] + + # TODO: make this a class of the health system module instead of its own module that needs to be registered? + + logger.info(key='event_runs', data='ServiceIntegrationParameterUpdateEvent is running') + + if params['serv_integration'] == 'no_integration': + logger.info(key='event_cancelled', data='ServiceIntegrationParameterUpdateEvent did not run') + return + else: + assert params['serv_integration'] in self.module.accepted_scenarios + + def update_cons_override_treatment_ids(treatment_ids): + for treatment_id in treatment_ids: + if treatment_id not in treat_ids_to_override: + treat_ids_to_override.append(treatment_id) + + # ---------------------------------------------- SCREENING --------------------------------------------------- + if params['serv_integration'].startswith(("htn", "all_screening", "all_int")): + # Probability of screening when presenting to any generic first appointment set to 100% + self.sim.modules['CardioMetabolicDisorders'].parameters[ + 'hypertension_hsi']['pr_assessed_other_symptoms'] = 1.0 + + # Annual community screening in over 50s increased to 100% + self.sim.modules['CardioMetabolicDisorders'].lms_testing['hypertension'] = \ + LinearModel(LinearModelType.MULTIPLICATIVE, 1.0) + + # Now ensure consumables are always available for the relevant treatment ids + if params['serv_integration'].endswith('_max'): + update_cons_override_treatment_ids([ + 'CardioMetabolicDisorders_Prevention_CommunityTestingForHypertension', + 'CardioMetabolicDisorders_Investigation_hypertension', + 'CardioMetabolicDisorders_Investigation_hypertension_and_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_hypertension', + 'CardioMetabolicDisorders_Treatment_hypertension']) + + if params['serv_integration'].startswith(("dm", "all_screening", "all_int")): + # Probability of screening when presenting to any generic first appointment and not sympotmatic set to 100% + self.sim.modules['CardioMetabolicDisorders'].parameters['diabetes_hsi'][ + 'pr_assessed_other_symptoms'] = 1.0 + + if params['serv_integration'].endswith('_max'): + update_cons_override_treatment_ids([ + 'CardioMetabolicDisorders_Investigation_diabetes', + 'CardioMetabolicDisorders_Investigation_hypertension_and_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_diabetes', + 'CardioMetabolicDisorders_Treatment_diabetes']) + + if params['serv_integration'].startswith(("fp_scr", "all_screening", "all_int")): + # Here we use the in-built functionality of the contraception model to increase the coverage of modern + # methods of contraception. When 'fp' is listed in params['serv_int_screening'] the probability of + # initiation in the general female population is increased. See updates to contraception.py + + self.sim.modules['Contraception'].update_params_for_interventions(initiation=True, + after_birth=False) + + if params['serv_integration'].endswith('_max'): + update_cons_override_treatment_ids(['Contraception_Routine']) + + if params['serv_integration'].startswith(("mal", "all_screening", "all_int", "all_mch")): + + self.sim.modules['Stunting'].parameters['prob_stunting_diagnosed_at_generic_appt'] = 1.0 + + if params['serv_integration'].endswith('_max'): + update_cons_override_treatment_ids(['Undernutrition_Feeding']) + + if params['serv_integration'].startswith(("hiv", "all_screening", "all_int")): + # annual testing rate used in HIV scale-up scenarios, default average (2010-2020) is 0.25 + self.sim.modules['Hiv'].parameters["hiv_testing_rates"]["annual_testing_rate_adults"] = 0.4 + # update exising linear models to use new scaled-up parameters + + if params['serv_integration'].endswith('_max'): + update_cons_override_treatment_ids([ + 'Hiv_Test', 'Hiv_Treatment']) + + if params['serv_integration'].startswith(("tb", "all_screening", "all_int")): + + # increase treatment coverage rate used to infer rate testing for active tb, default is 0.75 + self.sim.modules['Tb'].parameters["rate_testing_active_tb"]["treatment_coverage"] = 90 + + if params['serv_integration'].endswith('_max'): + update_cons_override_treatment_ids( + ['Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Culture', + 'Tb_Test_Xray', + 'Tb_Treatment', + 'Tb_Test_FollowUp' + ]) + + # ------------------------------------ MATERNAL AND CHILD HEALTH CLINIC --------------------------------------- + if params['serv_integration'].startswith(("anc", "all_mch", "all_int")): + self.sim.modules['PregnancySupervisor'].current_parameters['alternative_anc_coverage'] = True + self.sim.modules['PregnancySupervisor'].current_parameters['anc_availability_odds'] = 9.0 + self.sim.modules['PregnancySupervisor'].update_antenatal_care_coverage_for_analysis() + + if params['serv_integration'].endswith('_max'): + update_cons_override_treatment_ids(['AntenatalCare_Outpatient', 'AntenatalCare_FollowUp']) + + + if params['serv_integration'].startswith(("pnc", "all_mch", "all_int")): + self.sim.modules['Labour'].current_parameters['alternative_pnc_coverage'] = True + self.sim.modules['Labour'].current_parameters['pnc_availability_odds'] = 15.0 + self.sim.modules['Labour'].update_labour_or_postnatal_coverage_for_analysis() + + if params['serv_integration'].endswith('_max'): + update_cons_override_treatment_ids(['PostnatalCare_Neonatal', 'PostnatalCare_Maternal']) + + if params['serv_integration'].startswith(("fp_pn", "all_mch", "all_int")): + # Here we use the in-built functionality of the contraception model to increase the coverage of modern + # methods of contraception. When 'fp' is listed in params['serv_int_mch'] the probability of + # initiation following birth is increased. See updates to contraception.py + + self.sim.modules['Contraception'].update_params_for_interventions(initiation=False, + after_birth=True) + if params['serv_integration'].endswith('_max'): + update_cons_override_treatment_ids(['Contraception_Routine_Postnatal']) + + # no parameter governing prob of receiving vaccine + # child's prob of vax entirely dependent on vaccine being available (cons required) + # can manipulate this to induce 100% coverage rate - will need to look up the vaccines required for each + if params['serv_integration'].startswith(("epi", "all_mch", "all_int")): + update_cons_override_treatment_ids(['Epi_Childhood_Bcg', + 'Epi_Childhood_Opv', + 'Epi_Childhood_DtpHibHep', + 'Epi_Childhood_Rota', + 'Epi_Childhood_Pneumo', + 'Epi_Childhood_MeaslesRubella', + 'Epi_Pregnancy_Td' + ]) + + # ------------------------------------- CHRONIC CARE CLINIC --------------------------------------------------- + if params['serv_integration'].startswith(("chronic_care", "all_int")): + + self.sim.modules['Hiv'].parameters['virally_suppressed_on_art'] = 1.0 + self.sim.modules['Tb'].parameters['tb_prob_tx_success_ds'] = 0.9 + self.sim.modules['Tb'].parameters['tb_prob_tx_success_mdr'] = 0.9 + self.sim.modules['Epilepsy'].parameters[ + 'prob_start_anti_epilep_when_seizures_detected_in_generic_first_appt'] = 1.0 + self.sim.modules['Depression'].parameters['pr_assessed_for_depression_in_generic_appt_level1'] = 1.0 + + # commented out because tx_success higher than 0.9 already in these groups + # self.sim.modules['Tb'].parameters['tb_prob_tx_success_0_4'] = 0.9 + # self.sim.modules['Tb'].parameters['tb_prob_tx_success_5_14'] = 0.9 + + if params['serv_integration'].endswith('_max'): + update_cons_override_treatment_ids( + ['CardioMetabolicDisorders_Investigation_diabetes', + 'CardioMetabolicDisorders_Investigation_hypertension', + 'CardioMetabolicDisorders_Investigation_hypertension_and_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_hypertension', + 'CardioMetabolicDisorders_Treatment_hypertension', + 'CardioMetabolicDisorders_Treatment_diabetes', + 'Hiv_Test', + 'Hiv_Treatment', + 'Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Culture', + 'Tb_Test_Xray', + 'Tb_Treatment', + 'Tb_Test_FollowUp', + 'Depression_TalkingTherapy', + 'Depression_Treatment', + 'Epilepsy_Treatment_Start', + 'Epilepsy_Treatment_Followup']) + + self.sim.modules['HealthSystem'].set_availability_for_treatment_ids( + treatment_ids=treat_ids_to_override, + availability=1.0) diff --git a/tests/test_consumables.py b/tests/test_consumables.py index 92040b6578..b2498cb0c6 100644 --- a/tests/test_consumables.py +++ b/tests/test_consumables.py @@ -315,7 +315,6 @@ def test_items_used_includes_only_available_items(seed, p_known_items, expected_ items_used = getattr(cons._summary_counter, '_items', {}).get('Used') assert items_used == expected_items_used, f"Expected items_used to be {expected_items_used}, but got {items_used}" - def get_sim_with_dummy_module_registered(tmpdir=None, run=True, data=None): """Return an initialised simulation object with a Dummy Module registered. If the `data` argument is provided, the parameter in HealthSystem that holds the data on consumables availability is over-written.""" @@ -466,6 +465,24 @@ def test_use_get_consumables_by_hsi_method_get_consumables(): return_individual_results=True ) + # Check that providing a treatment id within the following health system parameter sets treatment availability to + # 100% + sim.modules['HealthSystem'].parameters['override_treatment_ids_avail'] = 0.5 + sim.modules['HealthSystem'].set_availability_for_treatment_ids( + treatment_ids=[hsi_event.TREATMENT_ID], + availability=1.0) + + assert True is hsi_event.get_consumables(item_codes=item_code_not_available[0]) + assert hsi_event.TREATMENT_ID in sim.modules['HealthSystem'].parameters['cons_override_treatment_ids'] + assert sim.modules['HealthSystem'].parameters['override_treatment_ids_avail'] == 1.0 + + # check that when the parameter is blank that availability is not overridden + sim.modules['HealthSystem'].set_availability_for_treatment_ids( + treatment_ids=[]) + + assert False is hsi_event.get_consumables(item_codes=item_code_not_available[0]) + assert not sim.modules['HealthSystem'].parameters['cons_override_treatment_ids'] + def test_outputs_to_log(tmpdir): """Check that logging from Consumables is as expected.""" diff --git a/tests/test_contraception.py b/tests/test_contraception.py index 37a2ab3fee..f9cd6364c3 100644 --- a/tests/test_contraception.py +++ b/tests/test_contraception.py @@ -438,7 +438,8 @@ def get_appt_footprints(switch_from, switch_to, consumables_available) -> List[s hsi_event = HSI_Contraception_FamilyPlanningAppt( module=sim.modules['Contraception'], person_id=person_id, - new_contraceptive=switch_to + new_contraceptive=switch_to, + on_birth=False, ) sim.modules['HealthSystem'].schedule_hsi_event(hsi_event=hsi_event, topen=sim.start_date, priority=0) diff --git a/tests/test_dxmanager.py b/tests/test_dxmanager.py index 507c9eec85..b2b0451102 100644 --- a/tests/test_dxmanager.py +++ b/tests/test_dxmanager.py @@ -89,7 +89,9 @@ def apply(self, person_id, squeeze_factor): facility_ids=[0], months=[sim.date.month]), rng=sim.modules['HealthSystem'].rng, - availability='default' + availability='default', + treatment_ids_overridden = [], + treatment_ids_overridden_avail = 1.0 ) sim.modules['HealthSystem'].consumables.on_start_of_day(sim.date) diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py index 62c6970196..913d6bd767 100644 --- a/tests/test_healthsystem.py +++ b/tests/test_healthsystem.py @@ -914,7 +914,9 @@ def apply(self, person_id, squeeze_factor): months=list(range(1, 13)), facility_ids=list(all_fac_ids)), rng=sim.modules['HealthSystem'].rng, - availability='default' + availability='default', + treatment_ids_overridden=[], + treatment_ids_overridden_avail=1.0 ) sim.simulate(end_date=start_date + pd.DateOffset(years=2)) diff --git a/tests/test_service_integration.py b/tests/test_service_integration.py new file mode 100644 index 0000000000..03d4c77fd7 --- /dev/null +++ b/tests/test_service_integration.py @@ -0,0 +1,443 @@ +import os + +import pandas as pd + +from pathlib import Path + +from tlo import Date, Simulation, logging +from tlo.methods import service_integration +from tlo.methods.fullmodel import fullmodel +from tlo.analysis.utils import parse_log_file + +# The resource files +try: + resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' +except NameError: + # running interactively + resourcefilepath = Path('./resources') + +start_date = Date(2010, 1, 1) + + +def register_modules(sim): + """Defines sim variable and registers all modules that can be called when running the full suite of pregnancy + modules""" + + sim.register(*fullmodel(), + service_integration.ServiceIntegration()) + +def check_cons_processed_params_have_been_overridden(initial_p, updated_p, data): + mod_cons = ['pill', 'IUD', 'injections', 'implant', 'male_condom', 'female_sterilization', + 'other_modern'] # update with your actual column names + + if data == 'series': + # Restrict to the relevant subset + s1_sub = initial_p.loc[mod_cons] + s2_sub = updated_p.loc[mod_cons] + + # Mask: where s1 > 0 + mask = s1_sub > 0 + + # Assert: in these positions, s2 > s1 + condition_ok = (s2_sub[mask] > s1_sub[mask]).all() + assert condition_ok, "In some positions where series1 > 0, series2 is not greater" + + else: + for key in initial_p: + df1 = initial_p[key] + df2 = updated_p[key] + # Ensure required columns are present + for col in mod_cons: + assert col in df1.columns and col in df2.columns, f"Column '{col}' missing in DataFrame '{key}'" + + # Extract relevant columns + df1_sub = df1[mod_cons] + df2_sub = df2[mod_cons] + + # Create mask where df1 > 0 + mask = df1_sub > 0 + + # Check df2 > df1 where mask is True + diff_check = df2_sub > df1_sub + + # Assert condition holds for all cells where df1 > 0 + condition_ok = diff_check[mask].all().all() + assert condition_ok, f"df2 is not greater than df1 in some cells of '{key}' where df1 > 0" + # Ensure shapes match + assert df1.shape == df2.shape, f"Shape mismatch in DataFrame '{key}'" + + +def test_parameter_update_event_runs_and_cancels_as_expected(tmpdir, seed): + """Test that when no scenarios are stored as parameters of the service integration module the event runs and then is + cancelled""" + sim = Simulation(start_date=start_date, seed=seed, log_config={"filename": "log", "custom_levels":{ + "*": logging.DEBUG},"directory": tmpdir}, resourcefilepath=resourcefilepath) + register_modules(sim) + sim.make_initial_population(n=50) + + # Set parameter update event to run before end of sim + sim.modules['ServiceIntegration'].parameters['integration_year'] = 2010 + sim.simulate(end_date=Date(2010, 1, 2)) + + # Because switches are unchanged check logging occurred as expected + output= parse_log_file(sim.log_filepath) + assert 'event_runs' in output['tlo.methods.service_integration'] + assert 'event_cancelled' in output['tlo.methods.service_integration'] + +def test_correct_treatment_ids_are_provided_to_hs_to_override_consumables(tmpdir, seed): + """Test that TREATMENT_IDs are correctly passed to the health system AND consumables class meaning that + consumable availability for these HSIs is overridden""" + sim = Simulation(start_date=start_date, seed=seed, log_config={"filename": "log", "custom_levels":{ + "*": logging.DEBUG},"directory": tmpdir}, resourcefilepath=resourcefilepath) + register_modules(sim) + sim.make_initial_population(n=50) + sim.simulate(end_date=Date(2010, 1, 2)) + + # Define the update event + serv_int_event = service_integration.ServiceIntegrationParameterUpdateEvent(module=sim.modules['ServiceIntegration']) + + # for each scenario in which cons availability will be overridden, check correct list of TREATMENT_IDs is passed + # to the health system + for scenario, treatment_ids in zip(['htn_max', + 'dm_max', + 'hiv_max', + 'tb_max', + 'fp_scr_max', + 'mal_max', + 'anc_max', + 'pnc_max', + 'fp_pn_max', + 'epi', + 'chronic_care_max', + 'all_screening_max', + 'all_mch_max', + 'all_int_max'], + + [['CardioMetabolicDisorders_Prevention_CommunityTestingForHypertension', + 'CardioMetabolicDisorders_Investigation_hypertension', + 'CardioMetabolicDisorders_Investigation_hypertension_and_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_hypertension', + 'CardioMetabolicDisorders_Treatment_hypertension'], + + ['CardioMetabolicDisorders_Investigation_diabetes', + 'CardioMetabolicDisorders_Investigation_hypertension_and_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_diabetes', + 'CardioMetabolicDisorders_Treatment_diabetes'], + + ['Hiv_Test', 'Hiv_Treatment'], + + ['Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Culture', + 'Tb_Test_Xray', + 'Tb_Treatment', + 'Tb_Test_FollowUp' + ], + + ['Contraception_Routine'], + + ['Undernutrition_Feeding'], + + ['AntenatalCare_Outpatient', + 'AntenatalCare_FollowUp'], + + ['PostnatalCare_Neonatal', + 'PostnatalCare_Maternal'], + + ['Contraception_Routine_Postnatal'], + + ['Epi_Childhood_Bcg', + 'Epi_Childhood_Opv', + 'Epi_Childhood_DtpHibHep', + 'Epi_Childhood_Rota', + 'Epi_Childhood_Pneumo', + 'Epi_Childhood_MeaslesRubella', + 'Epi_Pregnancy_Td' + ], + + ['CardioMetabolicDisorders_Investigation_diabetes', + 'CardioMetabolicDisorders_Investigation_hypertension', + 'CardioMetabolicDisorders_Investigation_hypertension_and_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_hypertension', + 'CardioMetabolicDisorders_Treatment_hypertension', + 'CardioMetabolicDisorders_Treatment_diabetes', + 'Hiv_Test', + 'Hiv_Treatment', + 'Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Culture', + 'Tb_Test_Xray', + 'Tb_Treatment', + 'Tb_Test_FollowUp', + 'Depression_TalkingTherapy', + 'Depression_Treatment', + 'Epilepsy_Treatment_Start', + 'Epilepsy_Treatment_Followup'], + + ['CardioMetabolicDisorders_Prevention_CommunityTestingForHypertension', + 'CardioMetabolicDisorders_Investigation_hypertension', + 'CardioMetabolicDisorders_Investigation_hypertension_and_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_hypertension', + 'CardioMetabolicDisorders_Treatment_hypertension', + 'CardioMetabolicDisorders_Investigation_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_diabetes', + 'CardioMetabolicDisorders_Treatment_diabetes', + 'Contraception_Routine', + 'Undernutrition_Feeding', + 'Hiv_Test', + 'Hiv_Treatment', + 'Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Culture', + 'Tb_Test_Xray', + 'Tb_Treatment', + 'Tb_Test_FollowUp' + ], + + ['Undernutrition_Feeding', + 'AntenatalCare_Outpatient', + 'AntenatalCare_FollowUp', + 'PostnatalCare_Neonatal', + 'PostnatalCare_Maternal', + 'Contraception_Routine_Postnatal', + 'Epi_Childhood_Bcg', + 'Epi_Childhood_Opv', + 'Epi_Childhood_DtpHibHep', + 'Epi_Childhood_Rota', + 'Epi_Childhood_Pneumo', + 'Epi_Childhood_MeaslesRubella', + 'Epi_Pregnancy_Td' + ], + + ['CardioMetabolicDisorders_Prevention_CommunityTestingForHypertension', + 'CardioMetabolicDisorders_Investigation_hypertension', + 'CardioMetabolicDisorders_Investigation_hypertension_and_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_hypertension', + 'CardioMetabolicDisorders_Treatment_hypertension', + 'CardioMetabolicDisorders_Investigation_diabetes', + 'CardioMetabolicDisorders_Prevention_WeightLoss_diabetes', + 'CardioMetabolicDisorders_Treatment_diabetes', + 'Contraception_Routine', + 'Undernutrition_Feeding', + 'Hiv_Test', + 'Hiv_Treatment', + 'Tb_Test_Screening', + 'Tb_Test_Clinical', + 'Tb_Test_Culture', + 'Tb_Test_Xray', + 'Tb_Treatment', + 'Tb_Test_FollowUp', + 'AntenatalCare_Outpatient', + 'AntenatalCare_FollowUp', + 'PostnatalCare_Neonatal', + 'PostnatalCare_Maternal', + 'Contraception_Routine_Postnatal', + 'Epi_Childhood_Bcg', + 'Epi_Childhood_Opv', + 'Epi_Childhood_DtpHibHep', + 'Epi_Childhood_Rota', + 'Epi_Childhood_Pneumo', + 'Epi_Childhood_MeaslesRubella', + 'Epi_Pregnancy_Td', + 'Depression_TalkingTherapy', + 'Depression_Treatment', + 'Epilepsy_Treatment_Start', + 'Epilepsy_Treatment_Followup']]): + + sim.modules['ServiceIntegration'].parameters['serv_integration'] = scenario + serv_int_event.apply(sim.population.props) + + assert sim.modules['HealthSystem'].parameters['cons_override_treatment_ids'] == treatment_ids + assert sim.modules['HealthSystem'].consumables._treatment_ids_overridden == treatment_ids + + sim.modules['HealthSystem'].set_availability_for_treatment_ids( + treatment_ids=[], + availability=1.0) + + +def test_parameter_update_event_runs_as_expected_when_updates_required_screening_parameters(tmpdir, seed): + sim = Simulation(start_date=start_date, seed=seed, log_config={"filename": "log", "custom_levels": { + "*": logging.DEBUG}, "directory": tmpdir}, resourcefilepath=resourcefilepath) + register_modules(sim) + sim.make_initial_population(n=50) + + # Set parameter update event to run before end of sim + + sim.modules['ServiceIntegration'].parameters['serv_integration'] = 'all_screening' + sim.modules['ServiceIntegration'].parameters['integration_year'] = 2010 + + cons_params_init = sim.modules['Contraception'].processed_params['p_start_per_month'] + + sim.simulate(end_date=Date(2010, 1, 2)) + + output = parse_log_file(sim.log_filepath) + assert 'event_runs' in output['tlo.methods.service_integration'] + assert 'event_cancelled' not in output['tlo.methods.service_integration'] + + assert sim.modules['CardioMetabolicDisorders'].parameters['hypertension_hsi']['pr_assessed_other_symptoms'] == 1.0 + assert sim.modules['CardioMetabolicDisorders'].parameters['diabetes_hsi']['pr_assessed_other_symptoms'] == 1.0 + assert sim.modules['Stunting'].parameters['prob_stunting_diagnosed_at_generic_appt'] == 1.0 + + htn_test_lm = sim.modules['CardioMetabolicDisorders'].lms_testing['hypertension'] + assert htn_test_lm.intercept == 1.0 + assert not htn_test_lm.predictors + + cons_params_init_update = sim.modules['Contraception'].processed_params['p_start_per_month'] + check_cons_processed_params_have_been_overridden(cons_params_init, cons_params_init_update, 'dict') + + assert (sim.modules['Hiv'].parameters["hiv_testing_rates"]["annual_testing_rate_adults"] == 0.4).all() + assert (sim.modules['Tb'].parameters["rate_testing_active_tb"]["treatment_coverage"] == 90).all() + + +def test_parameter_update_event_runs_as_expected_when_updates_required_mch(tmpdir, seed): + sim = Simulation(start_date=start_date, seed=seed, log_config={"filename": "log", "custom_levels": { + "*": logging.DEBUG}, "directory": tmpdir}, resourcefilepath=resourcefilepath) + register_modules(sim) + sim.make_initial_population(n=50) + + # Set parameter update event to run before end of sim + sim.modules['ServiceIntegration'].parameters['serv_integration'] = 'all_mch' + sim.modules['ServiceIntegration'].parameters['integration_year'] = 2010 + + cons_p_params_b1 = sim.modules['Contraception'].processed_params['p_start_after_birth_below30'] + cons_p_params_b2 = sim.modules['Contraception'].processed_params['p_start_after_birth_30plus'] + + + sim.simulate(end_date=Date(2010, 1, 2)) + + output = parse_log_file(sim.log_filepath) + assert 'event_runs' in output['tlo.methods.service_integration'] + assert 'event_cancelled' not in output['tlo.methods.service_integration'] + + assert sim.modules['PregnancySupervisor'].current_parameters['alternative_anc_coverage'] + assert sim.modules['PregnancySupervisor'].current_parameters['anc_availability_odds'] == 9.0 + assert sim.modules['PregnancySupervisor'].current_parameters['ps_analysis_in_progress'] + assert (sim.modules['PregnancySupervisor'].current_parameters['prob_anc1_months_2_to_4'] == + [1.0, 0, 0]) + assert (sim.modules['PregnancySupervisor'].current_parameters['prob_late_initiation_anc4'] == + 0) + + assert sim.modules['Labour'].current_parameters['alternative_pnc_coverage'] + assert sim.modules['Labour'].current_parameters['pnc_availability_odds'] == 15.0 + assert sim.modules['Labour'].current_parameters['la_analysis_in_progress'] + cov_prob = sim.modules['Labour'].current_parameters['pnc_availability_odds'] / (sim.modules['Labour'].current_parameters['pnc_availability_odds'] + 1) + + assert sim.modules['Labour'].current_parameters['prob_timings_pnc'] == [1.0, 0] + assert sim.modules['NewbornOutcomes'].current_parameters['prob_pnc_check_newborn'] == cov_prob + assert sim.modules['NewbornOutcomes'].current_parameters['prob_timings_pnc_newborns'] == [1.0, 0] + + assert sim.modules['Stunting'].parameters['prob_stunting_diagnosed_at_generic_appt'] == 1.0 + + cons_params_b1_update = sim.modules['Contraception'].processed_params['p_start_after_birth_below30'] + cons_p_params_b2_update = sim.modules['Contraception'].processed_params['p_start_after_birth_30plus'] + check_cons_processed_params_have_been_overridden(cons_p_params_b1, cons_params_b1_update, 'series') + check_cons_processed_params_have_been_overridden(cons_p_params_b2, cons_p_params_b2_update, 'series') + +def test_parameter_update_event_runs_as_expected_when_updates_required_chronic(tmpdir, seed): + sim = Simulation(start_date=start_date, seed=seed, log_config={"filename": "log", "custom_levels": { + "*": logging.DEBUG}, "directory": tmpdir}, resourcefilepath=resourcefilepath) + register_modules(sim) + sim.make_initial_population(n=50) + + # Set parameter update event to run before end of sim + + # sim.modules['ServiceIntegration'].parameters['serv_int_chronic'] = True + sim.modules['ServiceIntegration'].parameters['serv_integration'] = 'chronic_care' + sim.modules['ServiceIntegration'].parameters['integration_year'] = 2010 + sim.simulate(end_date=Date(2010, 1, 2)) + + output = parse_log_file(sim.log_filepath) + assert 'event_runs' in output['tlo.methods.service_integration'] + assert 'event_cancelled' not in output['tlo.methods.service_integration'] + + assert sim.modules['Hiv'].parameters['virally_suppressed_on_art'] == 1.0 + assert sim.modules['Tb'].parameters['tb_prob_tx_success_ds'] == 0.9 + assert sim.modules['Tb'].parameters['tb_prob_tx_success_mdr'] == 0.9 + assert sim.modules['Epilepsy'].parameters['prob_start_anti_epilep_when_seizures_detected_in_generic_first_appt'] == 1.0 + assert sim.modules['Depression'].parameters['pr_assessed_for_depression_in_generic_appt_level1'] == 1.0 + +def test_cons_params_all_updated_with_all_integration_scenario(tmpdir, seed): + sim = Simulation(start_date=start_date, seed=seed, log_config={"filename": "log", "custom_levels": { + "*": logging.DEBUG}, "directory": tmpdir}, resourcefilepath=resourcefilepath) + register_modules(sim) + sim.make_initial_population(n=50) + + cons_params_init = sim.modules['Contraception'].processed_params['p_start_per_month'] + cons_p_params_b1 = sim.modules['Contraception'].processed_params['p_start_after_birth_below30'] + cons_p_params_b2 = sim.modules['Contraception'].processed_params['p_start_after_birth_30plus'] + + sim.modules['ServiceIntegration'].parameters['serv_integration'] = 'all_int' + sim.modules['ServiceIntegration'].parameters['integration_year'] = 2010 + sim.simulate(end_date=Date(2010, 1, 2)) + + output = parse_log_file(sim.log_filepath) + assert 'event_runs' in output['tlo.methods.service_integration'] + assert 'event_cancelled' not in output['tlo.methods.service_integration'] + + cons_params_init_update = sim.modules['Contraception'].processed_params['p_start_per_month'] + cons_params_b1_update = sim.modules['Contraception'].processed_params['p_start_after_birth_below30'] + cons_p_params_b2_update = sim.modules['Contraception'].processed_params['p_start_after_birth_30plus'] + + check_cons_processed_params_have_been_overridden(cons_params_init, cons_params_init_update, 'dict') + check_cons_processed_params_have_been_overridden(cons_p_params_b1, cons_params_b1_update,'series') + check_cons_processed_params_have_been_overridden(cons_p_params_b2, cons_p_params_b2_update, 'series') + + +def test_long_run_screening_integration(tmpdir, seed): + sim = Simulation(start_date=start_date, seed=seed, log_config={"filename": "log", "custom_levels": { + "*": logging.DEBUG}, "directory": tmpdir}, resourcefilepath=resourcefilepath) + register_modules(sim) + sim.make_initial_population(n=1000) + + # Set parameter update event to run before end of sim + sim.modules['ServiceIntegration'].parameters['serv_integration'] = 'all_screening' + sim.modules['ServiceIntegration'].parameters['integration_year'] = 2010 + sim.simulate(end_date=Date(2015, 1, 1)) + + output = parse_log_file(sim.log_filepath) + assert 'event_runs' in output['tlo.methods.service_integration'] + assert 'event_cancelled' not in output['tlo.methods.service_integration'] + + +def test_long_run_mch_integration(tmpdir, seed): + sim = Simulation(start_date=start_date, seed=seed, log_config={"filename": "log", "custom_levels": { + "*": logging.DEBUG}, "directory": tmpdir}, resourcefilepath=resourcefilepath) + register_modules(sim) + sim.make_initial_population(n=1000) + + sim.modules['ServiceIntegration'].parameters['serv_integration'] = 'all_mch' + sim.modules['ServiceIntegration'].parameters['integration_year'] = 2010 + sim.simulate(end_date=Date(2015, 1, 1)) + + output = parse_log_file(sim.log_filepath) + assert 'event_runs' in output['tlo.methods.service_integration'] + assert 'event_cancelled' not in output['tlo.methods.service_integration'] + + +def test_long_run_chronic_integration(tmpdir, seed): + sim = Simulation(start_date=start_date, seed=seed, log_config={"filename": "log", "custom_levels": { + "*": logging.DEBUG}, "directory": tmpdir}, resourcefilepath=resourcefilepath) + register_modules(sim) + sim.make_initial_population(n=1000) + + sim.modules['ServiceIntegration'].parameters['serv_integration'] = 'chronic_care' + sim.modules['ServiceIntegration'].parameters['integration_year'] = 2010 + sim.simulate(end_date=Date(2015, 1, 1)) + + output = parse_log_file(sim.log_filepath) + assert 'event_runs' in output['tlo.methods.service_integration'] + assert 'event_cancelled' not in output['tlo.methods.service_integration'] + + +def test_long_run_no_integration(tmpdir, seed): + sim = Simulation(start_date=start_date, seed=seed, log_config={"filename": "log", "custom_levels": { + "*": logging.DEBUG}, "directory": tmpdir}, resourcefilepath=resourcefilepath) + register_modules(sim) + sim.make_initial_population(n=1000) + + sim.modules['ServiceIntegration'].parameters['integration_year'] = 2010 + sim.simulate(end_date=Date(2015, 1, 1)) + + output = parse_log_file(sim.log_filepath) + assert 'event_cancelled' in output['tlo.methods.service_integration']