diff --git a/.gitattributes b/.gitattributes index e8241c9d42..b52e66556c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -9,3 +9,4 @@ *.xlsx filter=lfs diff=lfs merge=lfs -text *.pickle filter=lfs diff=lfs merge=lfs -text resources/** filter=lfs diff=lfs merge=lfs -text +resources/**/*.md !filter !diff !merge !text diff --git a/docs/write-ups/ChildhoodUndernutrition.docx b/docs/write-ups/ChildhoodUndernutrition.docx deleted file mode 100644 index 43a6e5c3d0..0000000000 --- a/docs/write-ups/ChildhoodUndernutrition.docx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:409f98f9e6f045cc349f37366c44024eea3c2af30d555004f71e1d7629bcd5e1 -size 949266 diff --git a/docs/write-ups/Stunting.docx b/docs/write-ups/Stunting.docx new file mode 100644 index 0000000000..f44aac3d66 --- /dev/null +++ b/docs/write-ups/Stunting.docx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7377d88b85e99866dd34d5e8540bf53047456dd99fce05029cfe62fe0c507bf +size 801165 diff --git a/docs/write-ups/Wasting.docx b/docs/write-ups/Wasting.docx new file mode 100644 index 0000000000..2f406553b5 --- /dev/null +++ b/docs/write-ups/Wasting.docx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b126e010732ce64a8d37524e86e6e6900e5730a7175f7e108bb0ab105201e8 +size 372537 diff --git a/docs/writeups.rst b/docs/writeups.rst index 12b9075c76..7e33536165 100644 --- a/docs/writeups.rst +++ b/docs/writeups.rst @@ -46,7 +46,7 @@ Conditions of Early Childhood * **Diarrhoea**: Childhood diarrhoea caused by virus or bacteria resulting in dehydration, and the treatments associated. :download:`.docx <./write-ups/Diarrhoea.docx>` -* **Childhood Undernutrition**: Acute and chronic undernutrition and its effects of Wasting and Stunting. :download:`.docx <./write-ups/ChildhoodUndernutrition.docx>` +* **Childhood Undernutrition**: Acute and chronic undernutrition and its effects of Wasting :download:`.docx <./write-ups/Wasting.doc>` and Stunting :download:`.docx <./write-ups/Stunting.docx>`. Communicable Diseases diff --git a/resources/ResourceFile_Stunting/Cover Sheet.csv b/resources/ResourceFile_Stunting/Cover Sheet.csv deleted file mode 100644 index e6593c8b83..0000000000 --- a/resources/ResourceFile_Stunting/Cover Sheet.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7b380c25531d9812f741db00374dd542caf42a7710364e4681de089828403b00 -size 496 diff --git a/resources/ResourceFile_Wasting/parameters.csv b/resources/ResourceFile_Wasting/parameters.csv new file mode 100644 index 0000000000..c903716076 --- /dev/null +++ b/resources/ResourceFile_Wasting/parameters.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fea36eeb99b846c4632f771ded02a1fe400bbbcaaa0e145aa9062c596387589f +size 8582 diff --git a/resources/ResourceFile_Wasting/wasting_prevalence_and_sample_size.csv b/resources/ResourceFile_Wasting/wasting_prevalence_and_sample_size.csv new file mode 100644 index 0000000000..6f0bdf3e12 --- /dev/null +++ b/resources/ResourceFile_Wasting/wasting_prevalence_and_sample_size.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b09b28a442a4b9f7517425a1b01dcf7591a082949dbde9d5d6d813473d038e92 +size 1286 diff --git a/resources/costing/ResourceFile_Costing_Consumables.csv b/resources/costing/ResourceFile_Costing_Consumables.csv index dfa61f63e2..41c46734e6 100644 --- a/resources/costing/ResourceFile_Costing_Consumables.csv +++ b/resources/costing/ResourceFile_Costing_Consumables.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fca1834c38c657dbbd53616e3972b8909882faa1767a22f11349ed2bbcbca183 -size 26791 +oid sha256:bb4a72c041075dcba373d13521ab5a21c8ad7419d740e914a059a56639e4affe +size 26786 diff --git a/resources/healthsystem/consumables/ResourceFile_Consumables_availability_all.csv b/resources/healthsystem/consumables/ResourceFile_Consumables_availability_all.csv new file mode 100644 index 0000000000..f010c8b5e3 --- /dev/null +++ b/resources/healthsystem/consumables/ResourceFile_Consumables_availability_all.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0ac3c5436af2b2d609244ccd81b7ddcb2ebc0afe26c1597c88517ea30353590 +size 91401123 diff --git a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/CVD.csv b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/CVD.csv index ca43ac966e..c14cccf83d 100644 --- a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/CVD.csv +++ b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/CVD.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a2789315842d7b34bce2c4a34300d5b9eed104b1a4bbb93568c00ec5095adc6a -size 3946 +oid sha256:be19c64d84ed49ae79be28f6a8e7abb8e279f7cb441ee90d934753731c708e12 +size 4138 diff --git a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/ClinicallyVulnerable.csv b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/ClinicallyVulnerable.csv index 05cc5f6c12..ce5cf028a8 100644 --- a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/ClinicallyVulnerable.csv +++ b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/ClinicallyVulnerable.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:69a56437c2d302aecdb2ca97459cf756c72d349b471dc66ad30e1a6f8d7d47c3 -size 3566 +oid sha256:824c5882bf4f320e58e7cff622fd5abf5c2b581fd3ed4f2df2d8a2f919509cfd +size 3742 diff --git a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Default.csv b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Default.csv index 0d457a30f0..7a0dbc9d95 100644 --- a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Default.csv +++ b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Default.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e5a056e2cfd1502b5a4e890c8d48120ae637aac7df147db18a91518b1747757 -size 3948 +oid sha256:6b87874bafc8f792822ef1962070c62aead74992e7856a7ca749a05b340eb20a +size 4140 diff --git a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/EHP_III.csv b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/EHP_III.csv index 45972b4124..4425679ae9 100644 --- a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/EHP_III.csv +++ b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/EHP_III.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02bb2169513783163aad51bfddd02cc2196178f3f7b1a2c7c0e0a4eae6f51794 -size 3947 +oid sha256:f97a75bf186809ed5edc41f4542e61bf5ebefb131d7617bbccd74ef5b5a4f33b +size 4139 diff --git a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/LCOA_EHP.csv b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/LCOA_EHP.csv index 176bae4f7c..3403c49229 100644 --- a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/LCOA_EHP.csv +++ b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/LCOA_EHP.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46f8983d320984a51aa078117b90a6a78ed3c604e9ebc96e2c0e10a75659e4c3 -size 3946 +oid sha256:50502e603cb50e6ea00f6df494b4f0cd39f216ce1dffff93ad9d136cd4592c50 +size 4138 diff --git a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Naive.csv b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Naive.csv index 12ff672d74..1f4d00fba8 100644 --- a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Naive.csv +++ b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Naive.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3b3c34313cd97b0bc807e17e64e4997f25f2357c34a0f9ec1f6c834de7433d1 -size 3946 +oid sha256:c914b33a927d488ff650e7a39a1ba8e7b965c8cda73348595e74c2bb2a5fcf89 +size 4138 diff --git a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/RMNCH.csv b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/RMNCH.csv index 17e869fb79..5f753fabab 100644 --- a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/RMNCH.csv +++ b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/RMNCH.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:550ad48b191933d53c75d006e13140585d2563487c7a4d115a7320b506bb363c -size 3930 +oid sha256:838b3ccc7b5882631ae455634aa8d034065a928a63f5ecd412176556738e20b9 +size 4118 diff --git a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Test Mode 1.csv b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Test Mode 1.csv index d307f514b9..8e745a1cb0 100644 --- a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Test Mode 1.csv +++ b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Test Mode 1.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2bb18d408b2470f2a935a2f125e609d862bdd0d07455d84d0e6167d3f229daac -size 3948 +oid sha256:38a678d31cacc4cff04f54d9a916cd5ff2f5f76641ac7bad316087056b8df16c +size 4140 diff --git a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Test.csv b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Test.csv index a4bdd742a3..7e0fd99721 100644 --- a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Test.csv +++ b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/Test.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bce2dcdade0e065b0f48f25edd7e32db45958309dedcf25872a55ee4d85a5bee -size 3948 +oid sha256:4645d100a2b19ca1b50888d2233f7752d360763eb53ec9e61e4355c411b5f18d +size 4140 diff --git a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/VerticalProgrammes.csv b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/VerticalProgrammes.csv index 9a8ccef424..39e26b7a1d 100644 --- a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/VerticalProgrammes.csv +++ b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES/VerticalProgrammes.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e0d8a699cc8a4a04c2b2b8dd4f7368a1572d14906ae8d8654d81456c69e6055 -size 3946 +oid sha256:191d303273c80fa7a1fe59488b5c63e718b9f92911ab45f70ada1c98db06e086 +size 4138 diff --git a/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py b/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py index c19114402b..c5dca65bcc 100644 --- a/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py +++ b/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py @@ -1,13 +1,19 @@ """ This script generates estimates of availability of consumables used by disease modules: -* ResourceFile_Consumables_availability_small.csv (estimate of consumable available - file for use in the - simulation). +* ResourceFile_Consumables_availability_small.csv (estimate of consumable availability for items with availability data + - smaller file). + * ResourceFile_Consumables_availability_all.csv (use estimates of consumable availability for items with availability + data from the RF..._small, and approximate availability for items without availability data, approximation by + average availability at the Facility_ID and month - smaller file). * ResourceFile_Consumables_Inflow_Outflow_Ratio.csv (a file that gives the ratio of inflow of consumables to outflow to * capture the extent of wastage as a proportion of use for each consumable by month, district and level. -N.B. The file uses `ResourceFile_Consumables_matched.csv` as an input. +N.B. +The script uses `ResourceFile_LMIS_2018.csv`, `ResourceFile_Consumables_matched.csv`, +`ResourceFile_hhfa_consumables.xlsx`, `ResourceFile_Master_Facilities_List.csv`, +`ResourceFile_Population_2010.csv`, and 'ResourceFile_Consumables_Items_and_Packages.csv' as inputs. It creates one row for each consumable for availability at a specific facility and month when the data is extracted from the OpenLMIS dataset and one row for each consumable for availability aggregated across all facilities when the data is @@ -705,7 +711,7 @@ def get_inflow_to_outflow_ratio_by_item_and_facilitylevel(_df): print(stkout_df.loc[(~(stkout_df.available_prop >= 0.0)) | (~(stkout_df.available_prop <= 1.0))].available_prop) assert not stkout_df.duplicated(['fac_type_tlo', 'fac_name', 'district', 'month', 'item_code']).any(), "No duplicates" -# --- 6.7 Generate file for use in model run --- # +# --- 6.7 Generate file including only item_codes with availability data --- # # 1) Smaller file size # 2) Indexed by the 'Facility_ID' used in the model (which is an amalgmation of district and facility_level, defined in # the Master Facilities List. @@ -888,6 +894,61 @@ def interpolate_missing_with_mean(_ser): index=False ) +# --- 6.8 Generate file including both item_codes with and without availability data --- # +# - for use in model run +# 1) Smaller file size +# 2) Indexed by the 'Facility_ID' used in the model (which is an amalgmation of district and facility_level, defined in +# the Master Facilities List. +# 3) if item_code in ResourceFile_Consumables_availability_small.csv, use those availability estimates, +# if item_code in ResourceFile_Consumables_Items_and_Packages.csv, but not in +# ResourceFile_Consumables_availability_small.csv, approximate the availability by average availability at the +# 'Facility_ID' and 'month' + +# Load the CSV files +items_and_packages = pd.read_csv(path_for_new_resourcefiles /'ResourceFile_Consumables_Items_and_Packages.csv') +availability_small = pd.read_csv(path_for_new_resourcefiles /'ResourceFile_Consumables_availability_small.csv') + +# Extract all item codes from the RF Items_and_Packages +all_item_codes = set(items_and_packages['Item_Code']) + +# Extract item codes with availability data from RF availability_small +available_item_codes = set(availability_small['item_code']) + +# Find missing item codes +missing_item_codes = all_item_codes - available_item_codes + +# Calculate average availability for each combination of 'Facility_ID' and 'month' +avg_availability = ( + availability_small.groupby(['Facility_ID', 'month'])['available_prop'] + .mean() + .reset_index() + .rename(columns={'available_prop': 'avg_available_prop'}) +) + +# Create rows for missing item codes +missing_rows = [] +for item_code in missing_item_codes: + for _, row in avg_availability.iterrows(): + missing_rows.append({ + 'Facility_ID': row['Facility_ID'], + 'month': row['month'], + 'item_code': item_code, + 'available_prop': row['avg_available_prop'] + }) + +# Convert missing rows to a DataFrame +missing_df = pd.DataFrame(missing_rows) + +# Combine the original availability data with the missing rows +availability_all = pd.concat([availability_small, missing_df], ignore_index=True) + +# Ensure Facility_ID and month columns are integers before saving +availability_all['Facility_ID'] = availability_all['Facility_ID'].astype(int) +availability_all['month'] = availability_all['month'].astype(int) + +# Save the new file +availability_all.to_csv(path_for_new_resourcefiles / 'ResourceFile_Consumables_availability_all.csv', index=False) + # %% # 7. COMPARISON WITH HHFA DATA, 2018/19 ## ######################################################################################### diff --git a/src/scripts/undernutrition_analyses/stunting/stunting_analysis_plots.py b/src/scripts/stunting_analyses/stunting/stunting_analysis_plots.py similarity index 100% rename from src/scripts/undernutrition_analyses/stunting/stunting_analysis_plots.py rename to src/scripts/stunting_analyses/stunting/stunting_analysis_plots.py diff --git a/src/scripts/undernutrition_analyses/stunting/stunting_analysis_scenario.py b/src/scripts/stunting_analyses/stunting/stunting_analysis_scenario.py similarity index 92% rename from src/scripts/undernutrition_analyses/stunting/stunting_analysis_scenario.py rename to src/scripts/stunting_analyses/stunting/stunting_analysis_scenario.py index b1d558b6ec..daf72dcc7b 100644 --- a/src/scripts/undernutrition_analyses/stunting/stunting_analysis_scenario.py +++ b/src/scripts/stunting_analyses/stunting/stunting_analysis_scenario.py @@ -3,10 +3,10 @@ HealthSystem availability - including the effects of Diarrhoea and Alri and all the Labour modules. Run on the batch system using: -```tlo batch-submit src/scripts/undernutrition_analyses/stunting/stunting_analysis_scenario.py``` +```tlo batch-submit src/scripts/stunting_analyses/stunting/stunting_analysis_scenario.py``` Or locally using: -```tlo scenario-run src/scripts/undernutrition_analyses/stunting/stunting_analysis_scenario.py``` +```tlo scenario-run src/scripts/stunting_analyses/stunting/stunting_analysis_scenario.py``` """ from pathlib import Path diff --git a/src/scripts/wasting_analyses/add_pars_page_ToAllPDFs.py b/src/scripts/wasting_analyses/add_pars_page_ToAllPDFs.py new file mode 100644 index 0000000000..5790429adc --- /dev/null +++ b/src/scripts/wasting_analyses/add_pars_page_ToAllPDFs.py @@ -0,0 +1,94 @@ +import itertools +import re +import sys +from io import BytesIO +from pathlib import Path + +from PyPDF2 import PdfReader, PdfWriter +from reportlab.pdfgen import canvas + +# Define the parameter values in a dictionary +parameters = { + "base_death_rate_untreated_sam": [0.01, 0.03, 0.05, 0.08, 0.1], + "mod_wast_incidence__coef": [0.1, 0.3, 0.5, 0.7, 0.9], + "progression_to_sev_wast__coef": [1, 5, 10, 15, 20], + "prob_death_after_SAMcare__as_prop_of_death_rate_untreated_sam": [0.85, 0.7, 0.55, 0.4] +} + +# Create the parameter combinations +param_names = list(parameters.keys()) +param_values = list(parameters.values()) +pars_combinations = list(itertools.product(*param_values)) + +# Function to create a PDF with parameter values +def create_parameter_page(params): + buffer = BytesIO() + c = canvas.Canvas(buffer, pagesize=(200, 45)) # Smaller page size + c.setFont("Helvetica", 5) # Set font and size + y_position = 35 + for name, value in zip(param_names, params): + c.drawString(10, y_position, f"{name} = {value}") + y_position -= 10 # Move down for the next parameter + c.showPage() + c.save() + buffer.seek(0) + return buffer + +# Base directory path +BASE_PATH = Path("/home/eva/PycharmProjects/TLOmodel/outputs/sejjej5@ucl.ac.uk/wasting/") + +# Function to extract the indices from the file name +def extract_indices(file_name): + match = re.search(r'_(\d+)_(\d+)\.pdf$', file_name) + return (int(match.group(1)), int(match.group(2))) if match else (-1, -1) + +# Process each PDF +def process_pdfs(in_folder_name): + pdf_dir = BASE_PATH / in_folder_name / "_outcome_figures" + output_dir = BASE_PATH / in_folder_name / "_outputs_with_pars" + output_dir.mkdir(exist_ok=True) + print(f"Processing PDFs in directory: {pdf_dir}") + + pdf_files = list(pdf_dir.glob("*.pdf")) + if not pdf_files: + print("No PDF files found in the directory.") + return + + # Sort the PDF files by the extracted indices + pdf_files.sort(key=lambda x: extract_indices(x.name)) + + # Extract the draw indices from the file names + existing_draws = {extract_indices(pdf_file.name)[0] for pdf_file in pdf_files} + + # Filter out the parameter combinations for the missing draws + filtered_pars_combinations = [params for i, params in enumerate(pars_combinations) if i in existing_draws] + + for pdf_file, params in zip(pdf_files, filtered_pars_combinations): + print(f"Processing file: {pdf_file}") + reader = PdfReader(str(pdf_file)) + writer = PdfWriter() + + # Create the parameter page + parameter_page = create_parameter_page(params) + parameter_reader = PdfReader(parameter_page) + writer.add_page(parameter_reader.pages[0]) + + # Add the original pages + for page in reader.pages: + writer.add_page(page) + + # Save the new PDF with _pars added to the original name + output_pdf = output_dir / f"{pdf_file.stem}_pars.pdf" + with open(output_pdf, "wb") as f: + writer.write(f) + print(f"Saved updated PDF: {output_pdf}") + + print("PDFs have been updated with parameter pages.") + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: python add_pars_page_ToAllPDFs.py ") + else: + folder_name = sys.argv[1] + process_pdfs(folder_name) + diff --git a/src/scripts/wasting_analyses/analysis_utility_functions_wast.py b/src/scripts/wasting_analyses/analysis_utility_functions_wast.py new file mode 100644 index 0000000000..42eb830e81 --- /dev/null +++ b/src/scripts/wasting_analyses/analysis_utility_functions_wast.py @@ -0,0 +1,2312 @@ +""" +A helping file that contains functions used for wasting analyses to extract data, derive outcomes and generate plots. +It is not to be run by itself. Functions are called from run_interventions_analysis_wasting.py, and +heatmaps_cons_wast.py. +""" + +import logging +import pickle +from decimal import ROUND_HALF_UP, Decimal +from pathlib import Path +from typing import Dict + +import numpy as np +import pandas as pd +import scipy.stats as st +import seaborn as sns +from matplotlib import lines as mpl_lines +from matplotlib import pyplot as plt +from matplotlib.patches import Rectangle +from matplotlib.ticker import FuncFormatter +from PIL import Image +from run_costing_analysis_wast import run_costing_analysis_wast as run_costing + +from src.scripts.costing.cost_estimation import apply_discounting_to_cost_data +from tlo.analysis.utils import create_pickles_locally, extract_results, get_scenario_outputs + +plt.style.use('seaborn-darkgrid') + +scenario_label_map = { + "SQ": "Status Quo", + "GM": "Growth Monitoring", + "CS": "Care-Seeking", + "FS": "Food Supplements", + "GM_CS": "Growth Monitoring and\nCare-Seeking", + "GM_FS": "Growth Monitoring and\nFood Supplements", + "CS_FS": "Care-Seeking and\nFood Supplements", + "GM_CS_FS": "Growth Monitoring and\nCare-Seeking and\nFood Supplements", +} + +def round_standard(val, decimals=0): + # Convert to string first to avoid floating point precision issues + d = Decimal(str(val)) + if decimals == 0: + res = d.quantize(Decimal('1'), rounding=ROUND_HALF_UP) + return int(res) + else: + # Creates a precision like Decimal('0.1') or Decimal('0.01') + precision = Decimal('10') ** -decimals + res = d.quantize(precision, rounding=ROUND_HALF_UP) + return float(res) + +def get_scenario_label(scen_abbr: str) -> str: + """Return display label for a scenario code; fall back to code if unknown.""" + return scenario_label_map.get(scen_abbr, scen_abbr) + +def map_scenario_labels(scen_abbrs_list: list) -> list: + """Map a list of scenario abbreviations to their labels to be displayed in figs.""" + return [get_scenario_label(s) for s in scen_abbrs_list] + +def apply_millions_formatter_to_ax(ax, axis: str = 'x, y', x_decimals: int = 1, y_decimals: int = 1): + if 'x' in axis: + x_fmt = FuncFormatter(lambda v, pos: f"{v/1e6:,.{x_decimals}f}" if v != 0 else "0") + ax.xaxis.set_major_formatter(x_fmt) + if 'y' in axis: + y_fmt = FuncFormatter(lambda v, pos: f"{v/1e6:,.{y_decimals}f}" if v != 0 else "0") + ax.yaxis.set_major_formatter(y_fmt) + +def return_mean_95_CI_across_runs(df: pd.DataFrame) -> pd.DataFrame: + """ + Returns a DataFrame with mean, lower CI, and upper CI for each year and each draw across runs. + The output DataFrame is structured with row index ['year'] and column index ['draw'], where each cell contains + a list of [mean, lower_ci, upper_ci]. + """ + result = pd.DataFrame(index=df.index, columns=df.columns.get_level_values('draw').unique()) + + for year in df.index: + row = df.loc[year] + ci = row.groupby(level='draw').apply( + lambda x: st.t.interval(0.95, len(x) - 1, loc=np.mean(x), scale=st.sem(x)) + ) + result.loc[year] = row.groupby(level='draw').mean().combine( + ci, lambda mean, ci_interval: [mean, ci_interval[0], ci_interval[1]] + ) + + return result + +def return_sum_95_CI_across_runs(df: pd.DataFrame) -> pd.DataFrame: + """ + Returns a DataFrame with sum, lower CI, and upper CI for each draw, mean across runs. + The output DataFrame is structured with column index ['draw'], where each cell contains + a list of [sum, lower_ci, upper_ci]. + """ + result = pd.DataFrame(index=['sum'], columns=df.columns.get_level_values('draw').unique()) + + for draw in df.columns.get_level_values('draw').unique(): + draw_data = df.xs(draw, level='draw', axis=1).sum(axis=0) + ci = st.t.interval(0.95, len(draw_data) - 1, loc=np.mean(draw_data), scale=st.sem(draw_data)) + result.at['sum', draw] = [np.mean(draw_data), ci[0], ci[1]] + + return result + +def compute_scen_sum_and_averted( + interv: str, interv_df: pd.DataFrame, sq_sum_df_name: str, limit_to_zero: bool = False, sq_dict: dict = None +) -> tuple[pd.DataFrame, pd.DataFrame]: + """ + Sum `interv_df` across years into a single-row DataFrame labeled "start_year—end_year", then compute averted outcome + (DALYs or deaths) as compared to sum of outcome under SQ. If limit_to_zero True, averted outcome cannot be negative. + Returns (scen_sum_df, averted_mean_ci_df). + """ + # Sum Series -> single-row DataFrame and set new index 'year' as "first-last" + scen_sum = interv_df.sum(axis=0) + start_year = str(interv_df.index.min()) + end_year = str(interv_df.index.max()) + year_label = f"{start_year}-{end_year}" + scen_sum_df = scen_sum.to_frame().T + scen_sum_df.index.name = "year" + scen_sum_df.index = [year_label] + + # Obtain SQ sum DALYs + if interv != "SQ": + sq_sum_df = sq_dict[sq_sum_df_name] + else: + sq_sum_df = scen_sum_df + + # Compute averted DALYs, mean and CI across runs + if limit_to_zero: + sum_averted_df = (sq_sum_df - scen_sum_df).clip(lower=0) + assert (sum_averted_df >= 0).all().all(), "Negative averted outcome found in sum_averted_df, which should be limited to zero" + else: + sum_averted_df = sq_sum_df - scen_sum_df + averted_mean_ci_df = return_mean_95_CI_across_runs(sum_averted_df) + + return scen_sum_df, averted_mean_ci_df + +def extract_birth_data_frames_and_outcomes( + folder, + years_of_interest, + intervention_datayears, + interv +) -> Dict[str, pd.DataFrame]: + """ + Extracts and summarizes birth data. + + :param folder: Path to the folder containing outcome data. + :param years_of_interest: List of years to extract data for. + :param intervention_datayears: List of years for which we need data to plot means over the interventions years, ie + from the year before interventions are implemented until the last year of interventions. + :param interv: Name or identifier of the intervention. + :return: Dictionary with DataFrames: + (1) 'births_df': Birth counts for years of interest (by draw and run), + (2) 'births_mean_ci_df': Mean and 95% CI for total births per year and draw, + (3) 'interv_births_df': Birth counts for intervention years, + (4) 'interv_births_mean_ci_df': Mean and 95% CI for births per year and draw for intervention_datayears. + """ + + print(f" -{interv=}") + + births_df = extract_results( + folder, + module="tlo.methods.demography", + key="on_birth", + custom_generate_series=( + lambda df: df.assign( + year=df['date'].dt.year).groupby(['year'])['year'].count()), + do_scaling=True + ).fillna(0) + births_df = births_df.loc[years_of_interest] + + births_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(births_df) + + interv_births_df = births_df.loc[intervention_datayears] + interv_births_per_year_per_draw_df = return_mean_95_CI_across_runs(interv_births_df) + + # report during which years interventions were implemented (if any) + interv_years = [year+1 for year in intervention_datayears[:-1]] + + return {'births_df': births_df, + 'births_mean_ci_df': births_mean_ci_per_year_per_draw_df, + 'interv_births_df': interv_births_df, + 'interv_births_mean_ci_df': interv_births_per_year_per_draw_df, + 'interv_years': interv_years} + +def extract_death_data_frames_and_outcomes( + folder, + births_df, + years_of_interest, + intervention_years, + interv, + sq_deaths: dict = None +) -> Dict[str, pd.DataFrame]: + """ + Extracts and summarizes death data (neonatal and under-5) by cause, year, and intervention period. + + :param folder: Path to the folder containing outcome data. + :param births_df: DataFrame of births for the years of interest. + :param years_of_interest: List of years to extract data for. + :param intervention_years: List of years during which the intervention was implemented (if any). + :param interv: Name or identifier of the intervention. + :return: Dictionary with DataFrames for deaths by cause, mean and CI, and mortality rates + for both neonatal and under-5 cohorts. + """ + + print(f" -{interv=}") + # ### NEONATAL MORTALITY + # Extract all deaths occurring during the first 28 days of life + # differentiated by cause of death and acute malnutrition state + neonatal_deaths_by_cause_am_df = extract_results( + folder, + module="tlo.methods.demography.detail", + key="properties_of_deceased_persons", + custom_generate_series=( + lambda df: (filtered_by_age := df.loc[df['age_days'] < 29]) + .assign(year=filtered_by_age['date'].dt.year) + .groupby(['year', 'cause_of_death', 'un_clinical_acute_malnutrition'])['year'] + .count() + .reindex(pd.MultiIndex.from_product([ + df['date'].dt.year.unique(), df['cause_of_death'].unique(), + df['un_clinical_acute_malnutrition'].unique() + ], names=['year', 'cause_of_death', 'un_clinical_acute_malnutrition']), fill_value=0) + ), + do_scaling=True).fillna(0) + neonatal_deaths_by_cause_am_df = neonatal_deaths_by_cause_am_df.loc[years_of_interest] + + # number of deaths by any cause + neonatal_deaths_df = neonatal_deaths_by_cause_am_df.groupby(['year']).sum() + # number of deaths due to specific cause + neonatal_SAM_deaths_df = neonatal_deaths_by_cause_am_df.loc[ + neonatal_deaths_by_cause_am_df.index.get_level_values('cause_of_death') == 'SevereAcuteMalnutrition' + ].groupby(['year']).sum() + neonatal_ALRI_deaths_df = neonatal_deaths_by_cause_am_df.loc[ + neonatal_deaths_by_cause_am_df.index.get_level_values('cause_of_death').str.startswith('ALRI_') + ].groupby(['year']).sum() + neonatal_Diarrhoea_deaths_df = neonatal_deaths_by_cause_am_df.loc[ + neonatal_deaths_by_cause_am_df.index.get_level_values('cause_of_death').str.startswith('Diarrhoea_') + ].groupby(['year']).sum() + # number of deaths due to specific cause with SAM + neonatal_ALRI_deaths_with_SAM_df = neonatal_deaths_by_cause_am_df.loc[ + (neonatal_deaths_by_cause_am_df.index.get_level_values('un_clinical_acute_malnutrition') == 'SAM') & + (neonatal_deaths_by_cause_am_df.index.get_level_values('cause_of_death').str.startswith('ALRI_')) + ].groupby(['year']).sum() + neonatal_Diarrhoea_deaths_with_SAM_df = neonatal_deaths_by_cause_am_df.loc[ + (neonatal_deaths_by_cause_am_df.index.get_level_values('un_clinical_acute_malnutrition') == 'SAM') & + (neonatal_deaths_by_cause_am_df.index.get_level_values('cause_of_death').str.startswith('Diarrhoea_')) + ].groupby(['year']).sum() + + neo_deaths_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(neonatal_deaths_df) + neo_SAM_deaths_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(neonatal_SAM_deaths_df) + neo_ALRI_deaths_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(neonatal_ALRI_deaths_df) + neo_Diarrhoea_deaths_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(neonatal_Diarrhoea_deaths_df) + neo_ALRI_deaths_with_SAM_mean_ci_per_year_per_draw_df = \ + return_mean_95_CI_across_runs(neonatal_ALRI_deaths_with_SAM_df) + neo_Diarrhoea_deaths_with_SAM_mean_ci_per_year_per_draw_df = \ + return_mean_95_CI_across_runs(neonatal_Diarrhoea_deaths_with_SAM_df) + + # neo deaths for each year within intervention period + interv_neo_deaths_df = neonatal_deaths_df.loc[intervention_years] + interv_neo_SAM_deaths_df = neonatal_SAM_deaths_df.loc[intervention_years] + interv_neo_ALRI_deaths_df = neonatal_ALRI_deaths_df.loc[intervention_years] + interv_neo_Diarrhoea_deaths_df = neonatal_Diarrhoea_deaths_df.loc[intervention_years] + interv_neo_ALRI_deaths_with_SAM_df = neonatal_ALRI_deaths_with_SAM_df.loc[intervention_years] + interv_neo_Diarrhoea_deaths_with_SAM_df = neonatal_Diarrhoea_deaths_with_SAM_df.loc[intervention_years] + + # sum and CI of neo deaths over intervention period, mean across runs + interv_neo_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_neo_deaths_df) + interv_neo_SAM_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_neo_SAM_deaths_df) + interv_neo_ALRI_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_neo_ALRI_deaths_df) + interv_neo_Diarrhoea_deaths_sum_per_draw_CI_across_runs_df = \ + return_sum_95_CI_across_runs(interv_neo_Diarrhoea_deaths_df) + interv_neo_ALRI_deaths_with_SAM_sum_per_draw_CI_across_runs_df = \ + return_sum_95_CI_across_runs(interv_neo_ALRI_deaths_with_SAM_df) + interv_neo_Diarrhoea_deaths_with_SAM_sum_per_draw_CI_across_runs_df = \ + return_sum_95_CI_across_runs(interv_neo_Diarrhoea_deaths_with_SAM_df) + + # NEONATAL MORTALITY RATE (NMR), i.e. the number of deaths of infants up to 28 days old per 1,000 live births + nmr_df = (neonatal_deaths_df / births_df) * 1000 + nmr_per_year_per_draw_df = return_mean_95_CI_across_runs(nmr_df) + + # ### UNDER-5 MORTALITY + # Extract all deaths occurring during the first 5 years of life + # differentiated by cause of death and acute malnutrition state + under5_deaths_by_cause_am_df = extract_results( + folder, + module="tlo.methods.demography.detail", + key="properties_of_deceased_persons", + custom_generate_series=( + lambda df: (filtered_by_age := df.loc[df['age_exact_years'] < 5]) + .assign(year=filtered_by_age['date'].dt.year) + .groupby(['year', 'cause_of_death', 'un_clinical_acute_malnutrition'])['year'] + .count() + .reindex(pd.MultiIndex.from_product([ + df['date'].dt.year.unique(), df['cause_of_death'].unique(), + df['un_clinical_acute_malnutrition'].unique() + ], names=['year', 'cause_of_death', 'un_clinical_acute_malnutrition']), fill_value=0) + ), + do_scaling=True).fillna(0) + under5_deaths_by_cause_am_df = under5_deaths_by_cause_am_df.loc[years_of_interest] + + # number of deaths by any cause + under5_deaths_df = under5_deaths_by_cause_am_df.groupby(['year']).sum() + # number of deaths due to specific cause + under5_SAM_deaths_df = under5_deaths_by_cause_am_df.loc[ + under5_deaths_by_cause_am_df.index.get_level_values('cause_of_death') == 'SevereAcuteMalnutrition' + ].groupby(['year']).sum() + under5_ALRI_deaths_df = under5_deaths_by_cause_am_df.loc[ + under5_deaths_by_cause_am_df.index.get_level_values('cause_of_death').str.startswith('ALRI_') + ].groupby(['year']).sum() + under5_Diarrhoea_deaths_df = under5_deaths_by_cause_am_df.loc[ + under5_deaths_by_cause_am_df.index.get_level_values('cause_of_death').str.startswith('Diarrhoea_') + ].groupby(['year']).sum() + # number of deaths due to specific cause with SAM + under5_ALRI_deaths_with_SAM_df = under5_deaths_by_cause_am_df.loc[ + (under5_deaths_by_cause_am_df.index.get_level_values('un_clinical_acute_malnutrition') == 'SAM') & + (under5_deaths_by_cause_am_df.index.get_level_values('cause_of_death').str.startswith('ALRI_')) + ].groupby(['year']).sum() + under5_Diarrhoea_deaths_with_SAM_df = under5_deaths_by_cause_am_df.loc[ + (under5_deaths_by_cause_am_df.index.get_level_values('un_clinical_acute_malnutrition') == 'SAM') & + (under5_deaths_by_cause_am_df.index.get_level_values('cause_of_death').str.startswith('Diarrhoea_')) + ].groupby(['year']).sum() + + under5_deaths_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(under5_deaths_df) + under5_SAM_deaths_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(under5_SAM_deaths_df) + under5_ALRI_deaths_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(under5_ALRI_deaths_df) + under5_Diarrhoea_deaths_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(under5_Diarrhoea_deaths_df) + under5_ALRI_deaths_with_SAM_mean_ci_per_year_per_draw_df = \ + return_mean_95_CI_across_runs(under5_ALRI_deaths_with_SAM_df) + under5_Diarrhoea_deaths_with_SAM_mean_ci_per_year_per_draw_df = \ + return_mean_95_CI_across_runs(under5_Diarrhoea_deaths_with_SAM_df) + + # under 5 deaths for each year within intervention period + interv_under5_deaths_df = under5_deaths_df.loc[intervention_years] + interv_under5_SAM_deaths_df = under5_SAM_deaths_df.loc[intervention_years] + interv_under5_ALRI_deaths_df = under5_ALRI_deaths_df.loc[intervention_years] + interv_under5_Diarrhoea_deaths_df = under5_Diarrhoea_deaths_df.loc[intervention_years] + interv_under5_ALRI_deaths_with_SAM_df = under5_ALRI_deaths_with_SAM_df.loc[intervention_years] + interv_under5_Diarrhoea_deaths_with_SAM_df = under5_Diarrhoea_deaths_with_SAM_df.loc[intervention_years] + + # sum and CI of under 5 deaths over intervention period, mean across runs + interv_under5_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_under5_deaths_df) + interv_under5_SAM_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_under5_SAM_deaths_df) + interv_under5_ALRI_deaths_sum_per_draw_CI_across_runs_df = \ + return_sum_95_CI_across_runs(interv_under5_ALRI_deaths_df) + interv_under5_Diarrhoea_deaths_sum_per_draw_CI_across_runs_df = \ + return_sum_95_CI_across_runs(interv_under5_Diarrhoea_deaths_df) + interv_under5_ALRI_deaths_with_SAM_sum_per_draw_CI_across_runs_df = \ + return_sum_95_CI_across_runs(interv_under5_ALRI_deaths_with_SAM_df) + interv_under5_Diarrhoea_deaths_with_SAM_sum_per_draw_CI_across_runs_df = \ + return_sum_95_CI_across_runs(interv_under5_Diarrhoea_deaths_with_SAM_df) + + # sum and CI of under 5 SAM deaths over calibration period, mean across run + calibration_years = [2015, 2016, 2017, 2018, 2019] #TODO: later as an input parameter? + calib_under5_SAM_deaths_df = under5_SAM_deaths_df.loc[calibration_years] + calib_under5_SAM_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(calib_under5_SAM_deaths_df) + + under5_scen_sum_deaths_df, under5_averted_deaths_mean_ci_df = compute_scen_sum_and_averted( + interv, interv_under5_deaths_df, + 'under5_scen_sum_deaths_df', False, sq_deaths + ) + under5_scen_sum_SAM_deaths_df, under5_averted_SAM_deaths_mean_ci_df = compute_scen_sum_and_averted( + interv, interv_under5_SAM_deaths_df, + 'under5_scen_sum_SAM_deaths_df', False, sq_deaths + ) + under5_scen_sum_ALRI_deaths_df, under5_averted_ALRI_deaths_mean_ci_df = compute_scen_sum_and_averted( + interv, interv_under5_ALRI_deaths_df, + 'under5_scen_sum_ALRI_deaths_df',False, sq_deaths + ) + under5_scen_sum_Diarrhoea_deaths_df, under5_averted_Diarrhoea_deaths_mean_ci_df = compute_scen_sum_and_averted( + interv, interv_under5_Diarrhoea_deaths_df, + 'under5_scen_sum_Diarrhoea_deaths_df', False, sq_deaths + ) + + under5_scen_sum_ALRI_deaths_with_SAM_df, under5_averted_ALRI_deaths_with_SAM_mean_ci_df = \ + compute_scen_sum_and_averted( + interv, interv_under5_ALRI_deaths_with_SAM_df, + 'under5_scen_sum_ALRI_deaths_with_SAM_df', False, sq_deaths + ) + under5_scen_sum_Diarrhoea_deaths_with_SAM_df, under5_averted_Diarrhoea_deaths_with_SAM_mean_ci_df = \ + compute_scen_sum_and_averted( + interv, interv_under5_Diarrhoea_deaths_with_SAM_df, + 'under5_scen_sum_Diarrhoea_deaths_with_SAM_df', False, sq_deaths + ) + + # UNDER-5 MORTALITY RATE, i.e. the number of deaths of children under 5 years old per 1,000 live births + under5mr_df = (under5_deaths_df / births_df) * 1000 + under5mr_per_year_per_draw_df = return_mean_95_CI_across_runs(under5mr_df) + + return { + "neo_deaths_df": neonatal_deaths_df, + "neo_SAM_deaths_df": neonatal_SAM_deaths_df, + "neo_ALRI_deaths_df": neonatal_ALRI_deaths_df, + "neo_Diarrhoea_deaths_df": neonatal_Diarrhoea_deaths_df, + "neo_ALRI_deaths_with_SAM_df": neonatal_ALRI_deaths_with_SAM_df, + "neo_Diarrhoea_deaths_with_SAM_df": neonatal_Diarrhoea_deaths_with_SAM_df, + "neo_deaths_mean_ci_df": neo_deaths_mean_ci_per_year_per_draw_df, + "neo_SAM_deaths_mean_ci_df": neo_SAM_deaths_mean_ci_per_year_per_draw_df, + "neo_ALRI_deaths_mean_ci_df": neo_ALRI_deaths_mean_ci_per_year_per_draw_df, + "neo_Diarrhoea_deaths_mean_ci_df": neo_Diarrhoea_deaths_mean_ci_per_year_per_draw_df, + "neo_ALRI_deaths_with_SAM_mean_ci_df": neo_ALRI_deaths_with_SAM_mean_ci_per_year_per_draw_df, + "neo_Diarrhoea_deaths_with_SAM_mean_ci_df": neo_Diarrhoea_deaths_with_SAM_mean_ci_per_year_per_draw_df, + "interv_neo_deaths_df": interv_neo_deaths_df, + "interv_neo_deaths_sum_ci_df": interv_neo_deaths_sum_per_draw_CI_across_runs_df, + "interv_neo_SAM_deaths_df": interv_neo_SAM_deaths_df, + "interv_neo_SAM_deaths_sum_ci_df": interv_neo_SAM_deaths_sum_per_draw_CI_across_runs_df, + "interv_neo_ALRI_deaths_df": interv_neo_ALRI_deaths_df, + "interv_neo_ALRI_deaths_sum_ci_df": interv_neo_ALRI_deaths_sum_per_draw_CI_across_runs_df, + "interv_neo_Diarrhoea_deaths_df": interv_neo_Diarrhoea_deaths_df, + "interv_neo_Diarrhoea_deaths_sum_ci_df": interv_neo_Diarrhoea_deaths_sum_per_draw_CI_across_runs_df, + "interv_neo_ALRI_deaths_with_SAM_df": interv_neo_ALRI_deaths_with_SAM_df, + "interv_neo_ALRI_deaths_with_SAM_sum_ci_df": interv_neo_ALRI_deaths_with_SAM_sum_per_draw_CI_across_runs_df, + "interv_neo_Diarrhoea_deaths_with_SAM_df": interv_neo_Diarrhoea_deaths_with_SAM_df, + "interv_neo_Diarrhoea_deaths_with_SAM_sum_ci_df": interv_neo_Diarrhoea_deaths_with_SAM_sum_per_draw_CI_across_runs_df, + "neonatal_mort_rate_df": nmr_df, + "neo_mort_rate_mean_ci_df": nmr_per_year_per_draw_df, + "under5_deaths_df": under5_deaths_df, + "under5_SAM_deaths_df": under5_SAM_deaths_df, + "under5_ALRI_deaths_df": under5_ALRI_deaths_df, + "under5_Diarrhoea_deaths_df": under5_Diarrhoea_deaths_df, + "under5_ALRI_deaths_with_SAM_df": under5_ALRI_deaths_with_SAM_df, + "under5_Diarrhoea_deaths_with_SAM_df": under5_Diarrhoea_deaths_with_SAM_df, + "under5_deaths_mean_ci_df": under5_deaths_mean_ci_per_year_per_draw_df, + "under5_SAM_deaths_mean_ci_df": under5_SAM_deaths_mean_ci_per_year_per_draw_df, + "under5_ALRI_deaths_mean_ci_df": under5_ALRI_deaths_mean_ci_per_year_per_draw_df, + "under5_Diarrhoea_deaths_mean_ci_df": under5_Diarrhoea_deaths_mean_ci_per_year_per_draw_df, + "under5_ALRI_deaths_with_SAM_mean_ci_df": under5_ALRI_deaths_with_SAM_mean_ci_per_year_per_draw_df, + "under5_Diarrhoea_deaths_with_SAM_mean_ci_df": under5_Diarrhoea_deaths_with_SAM_mean_ci_per_year_per_draw_df, + "interv_under5_deaths_df": interv_under5_deaths_df, + "interv_under5_deaths_sum_ci_df": interv_under5_deaths_sum_per_draw_CI_across_runs_df, + "interv_under5_SAM_deaths_df": interv_under5_SAM_deaths_df, + "interv_under5_SAM_deaths_sum_ci_df": interv_under5_SAM_deaths_sum_per_draw_CI_across_runs_df, + "interv_under5_ALRI_deaths_df": interv_under5_ALRI_deaths_df, + "interv_under5_ALRI_deaths_sum_ci_df": interv_under5_ALRI_deaths_sum_per_draw_CI_across_runs_df, + "interv_under5_Diarrhoea_deaths_df": interv_under5_Diarrhoea_deaths_df, + "interv_under5_Diarrhoea_deaths_sum_ci_df": interv_under5_Diarrhoea_deaths_sum_per_draw_CI_across_runs_df, + "interv_under5_ALRI_deaths_with_SAM_df": interv_under5_ALRI_deaths_with_SAM_df, + "interv_under5_ALRI_deaths_with_SAM_sum_ci_df": interv_under5_ALRI_deaths_with_SAM_sum_per_draw_CI_across_runs_df, + "interv_under5_Diarrhoea_deaths_with_SAM_df": interv_under5_Diarrhoea_deaths_with_SAM_df, + "interv_under5_Diarrhoea_deaths_with_SAM_sum_ci_df": interv_under5_Diarrhoea_deaths_with_SAM_sum_per_draw_CI_across_runs_df, + "under5_mort_rate_df": under5mr_df, + "under5_mort_rate_mean_ci_df": under5mr_per_year_per_draw_df, + "under5_scen_sum_deaths_df": under5_scen_sum_deaths_df, + "under5_averted_deaths_mean_ci_df": under5_averted_deaths_mean_ci_df, + "under5_scen_sum_SAM_deaths_df": under5_scen_sum_SAM_deaths_df, + "under5_averted_SAM_deaths_mean_ci_df": under5_averted_SAM_deaths_mean_ci_df, + "under5_scen_sum_ALRI_deaths_df": under5_scen_sum_ALRI_deaths_df, + "under5_averted_ALRI_deaths_mean_ci_df": under5_averted_ALRI_deaths_mean_ci_df, + "under5_scen_sum_Diarrhoea_deaths_df": under5_scen_sum_Diarrhoea_deaths_df, + "under5_averted_Diarrhoea_deaths_mean_ci_df": under5_averted_Diarrhoea_deaths_mean_ci_df, + "under5_scen_sum_ALRI_deaths_with_SAM_df": under5_scen_sum_ALRI_deaths_with_SAM_df, + "under5_averted_ALRI_deaths_with_SAM_mean_ci_df": under5_averted_ALRI_deaths_with_SAM_mean_ci_df, + "under5_scen_sum_Diarrhoea_deaths_with_SAM_df": under5_scen_sum_Diarrhoea_deaths_with_SAM_df, + "under5_averted_Diarrhoea_deaths_with_SAM_mean_ci_df": under5_averted_Diarrhoea_deaths_with_SAM_mean_ci_df, + "interv_years": intervention_years, + "calib_under5_SAM_deaths_sum_ci_df": calib_under5_SAM_deaths_sum_per_draw_CI_across_runs_df, + "calib_years": calibration_years, + } + +def extract_daly_data_frames_and_outcomes( + folder, + years_of_interest, + intervention_years, + interv, + sq_dalys: dict = None, +) -> Dict[str, pd.DataFrame]: + """ + Extracts DALYs by cause for under-5s (age_range '0-4'), summed over both sexes, for the specified years. + :param folder: the folder from which the DALY data will be extracted + :param years_of_interest: years for which to extract the data + :param intervention_years: List of years during which the intervention was implemented (if any). + :param interv: Name or identifier of the intervention. + :param sq_dalys: Dict of DataFrames with DALYs outcomes for SQ. + :return: Dict of DataFrames with index ['year'] and columns for each (draw, run), or columns for reach draw with + mean_ci across all runs. + """ + + print(f" -{interv=}") + # ### UNDER-5 DALYs + # Extract all DALYs assigned to children under 5 --- dalys_stacked_by_age_and_time, i.e. all the year of life lost + # are ascribed to the age of the death and the year of the death differentiated by cause of death / disability + + def extrapolate_dalys_data_from_logs(df: pd.DataFrame) -> pd.Series: + # Melt the DataFrame to have 'cause_of_dalys' as a variable + df_with_cause_of_dalys = df.melt( + id_vars=['age_range', 'sex', 'year'], + value_vars=[ + "AIDS", "COPD", "Cancer (Bladder)", "Cancer (Breast)", "Cancer (Oesophagus)", "Cancer (Other)", + "Cancer (Prostate)", "Childhood Diarrhoea", "Childhood Undernutrition", "Congenital birth defects", + "Depression / Self-harm", "Diabetes", "Epilepsy", "Heart Disease", "Kidney Disease", "Lower Back Pain", + "Lower respiratory infections", "Malaria", "Maternal Disorders", "Measles", "Neonatal Disorders", + "Other", "Schistosomiasis", "Stroke", "TB (non-AIDS)", "Transport Injuries" + ], + var_name='cause_of_dalys', + value_name='dalys' + ) + + # Keep only dalys for children under-5 by year and cause_of_dalys + under5_dalys_by_year_cause = \ + df_with_cause_of_dalys[ + (df_with_cause_of_dalys['year'].isin(years_of_interest)) & + (df_with_cause_of_dalys['age_range'] == '0-4') + ].groupby(['year', 'cause_of_dalys'],as_index=True)['dalys'].sum() + + return under5_dalys_by_year_cause + + under5_dalys_by_cause_df = extract_results( + folder, + module="tlo.methods.healthburden", + key="dalys_stacked_by_age_and_time", + custom_generate_series=lambda df: extrapolate_dalys_data_from_logs(df), + do_scaling=True + ).fillna(0) + + # Apply 3% discount rate to DALYs. Re-indexing is required to use the discounting function, + # so the MultiIndexes must be restored afterward. + under5_dalys_by_cause_df__reset_index = under5_dalys_by_cause_df.reset_index() + under5_dalys_by_cause_df__reset_index.columns = [ + f"{col[0]}_{col[1]}" if col[1] != "" else f"{col[0]}" + for col in under5_dalys_by_cause_df__reset_index.columns.values + ] + for col in under5_dalys_by_cause_df__reset_index.columns: + if col.count('_') == 1 and all(part.isdigit() for part in col.split('_')): + under5_dalys_by_cause_df__reset_index[col] = apply_discounting_to_cost_data( + _df=under5_dalys_by_cause_df__reset_index, _discount_rate=0.03, _column_for_discounting=col + )[col] + # set MultiIndex for rows + under5_dalys_by_cause_df = under5_dalys_by_cause_df__reset_index.set_index(['year', 'cause_of_dalys']) + # create MultiIndex for columns + new_col_tuples = [tuple(map(int, col.split('_'))) for col in under5_dalys_by_cause_df.columns if '_' in col] + new_col_index = pd.MultiIndex.from_tuples(new_col_tuples, names=['draw', 'run']) + under5_dalys_by_cause_df = under5_dalys_by_cause_df[[f"{d}_{r}" for d, r in new_col_tuples]] + under5_dalys_by_cause_df.columns = new_col_index + + # number of dalys by any cause + under5_dalys_df = under5_dalys_by_cause_df.groupby(['year']).sum() + # number of dalys by specific causes + under5_SAM_dalys_df = under5_dalys_by_cause_df.xs("Childhood Undernutrition", level=1) + under5_ALRI_dalys_df = under5_dalys_by_cause_df.xs("Lower respiratory infections", level=1) + under5_Diarrhoea_dalys_df = under5_dalys_by_cause_df.xs("Childhood Diarrhoea", level=1) + + under5_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(under5_dalys_df) + under5_SAM_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(under5_SAM_dalys_df) + under5_ALRI_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(under5_ALRI_dalys_df) + under5_Diarrhoea_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(under5_Diarrhoea_dalys_df) + + # under 5 DALYs for each year within intervention period + interv_under5_dalys_df = under5_dalys_df.loc[intervention_years] + interv_under5_SAM_dalys_df = under5_SAM_dalys_df.loc[intervention_years] + interv_under5_ALRI_dalys_df = under5_ALRI_dalys_df.loc[intervention_years] + interv_under5_Diarrhoea_dalys_df = under5_Diarrhoea_dalys_df.loc[intervention_years] + + # sum and CI of under 5 DALYs over intervention period, mean across runs + interv_under5_dalys_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_under5_dalys_df) + interv_under5_SAM_dalys_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_under5_SAM_dalys_df) + interv_under5_ALRI_dalys_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_under5_ALRI_dalys_df) + interv_under5_Diarrhoea_dalys_sum_per_draw_CI_across_runs_df = \ + return_sum_95_CI_across_runs(interv_under5_Diarrhoea_dalys_df) + + under5_scen_sum_dalys_df, under5_averted_dalys_mean_ci_df = compute_scen_sum_and_averted( + interv, interv_under5_dalys_df, 'under5_scen_sum_dalys_df', + False, sq_dalys + ) + under5_scen_sum_SAM_dalys_df, under5_averted_SAM_dalys_mean_ci_df = compute_scen_sum_and_averted( + interv, interv_under5_SAM_dalys_df, 'under5_scen_sum_SAM_dalys_df', + False, sq_dalys + ) + under5_scen_sum_ALRI_dalys_df, under5_averted_ALRI_dalys_mean_ci_df = compute_scen_sum_and_averted( + interv, interv_under5_ALRI_dalys_df, 'under5_scen_sum_ALRI_dalys_df', + False, sq_dalys + ) + under5_scen_sum_Diarrhoea_dalys_df, under5_averted_Diarrhoea_dalys_mean_ci_df = compute_scen_sum_and_averted( + interv, interv_under5_Diarrhoea_dalys_df, 'under5_scen_sum_Diarrhoea_dalys_df', + False, sq_dalys + ) + + return { + "under5_dalys_df": under5_dalys_df, + "under5_SAM_dalys_df": under5_SAM_dalys_df, + "under5_ALRI_dalys_df": under5_ALRI_dalys_df, + "under5_Diarrhoea_dalys_df": under5_Diarrhoea_dalys_df, + "under5_dalys_mean_ci_df": under5_dalys_mean_ci_per_year_per_draw_df, + "under5_SAM_dalys_mean_ci_df": under5_SAM_dalys_mean_ci_per_year_per_draw_df, + "under5_ALRI_dalys_mean_ci_df": under5_ALRI_dalys_mean_ci_per_year_per_draw_df, + "under5_Diarrhoea_dalys_mean_ci_df": under5_Diarrhoea_dalys_mean_ci_per_year_per_draw_df, + "interv_under5_dalys_sum_ci_df": interv_under5_dalys_sum_per_draw_CI_across_runs_df, + "interv_under5_SAM_dalys_sum_ci_df": interv_under5_SAM_dalys_sum_per_draw_CI_across_runs_df, + "interv_under5_ALRI_dalys_sum_ci_df": interv_under5_ALRI_dalys_sum_per_draw_CI_across_runs_df, + "interv_under5_Diarrhoea_dalys_sum_ci_df": interv_under5_Diarrhoea_dalys_sum_per_draw_CI_across_runs_df, + "under5_scen_sum_dalys_df": under5_scen_sum_dalys_df, + "under5_averted_dalys_mean_ci_df": under5_averted_dalys_mean_ci_df, + "under5_scen_sum_SAM_dalys_df": under5_scen_sum_SAM_dalys_df, + "under5_averted_SAM_dalys_mean_ci_df": under5_averted_SAM_dalys_mean_ci_df, + "under5_scen_sum_ALRI_dalys_df": under5_scen_sum_ALRI_dalys_df, + "under5_averted_ALRI_dalys_mean_ci_df": under5_averted_ALRI_dalys_mean_ci_df, + "under5_scen_sum_Diarrhoea_dalys_df": under5_scen_sum_Diarrhoea_dalys_df, + "under5_averted_Diarrhoea_dalys_mean_ci_df": under5_averted_Diarrhoea_dalys_mean_ci_df, + "interv_years": intervention_years, + } + +def extract_pop_sizes_data_frames_and_outcomes( + folder, + years_of_interest, + intervention_years, + interv, +) -> Dict[str, pd.DataFrame]: + """ + Extracts and summarizes data on pop. sizes (total under-5, moderately/severely wasted under 5) year for intervention + period. + + :param folder: Path to the folder containing outcome data. + :param years_of_interest: List of years to extract data for. + :param intervention_years: List of years during which the intervention was implemented (if any). + :param interv: Name or identifier of the intervention. + :return: Dictionary with DataFrames with pop. sizes by year, mean and CI + """ + + print(f" -{interv=}") + + pop_size_wasted_df = extract_results( + folder, + module="tlo.methods.wasting", + key="pop sizes", + custom_generate_series=( + lambda df: df.assign(year=df["date"].dt.year) + .set_index("year") + .pipe(lambda d: d["mod__under5"] + d["sev__under5"]) + ), + do_scaling=True, + ).fillna(0) + + interv_pop_size_wasted_df = pop_size_wasted_df.loc[intervention_years] + interv_pop_size_per_year_per_draw_df = return_mean_95_CI_across_runs(interv_pop_size_wasted_df) + + return { + "pop_size_wasted_df": pop_size_wasted_df, + "pop_size_wasted_mean_ci_df": interv_pop_size_per_year_per_draw_df + } + +def regenerate_pickles_with_debug_logs(iterv_folders_dict) -> None: + for interv_folder_path in iterv_folders_dict.values(): + print(f"\n{interv_folder_path=} in regenerate_wasting_pickle_with_debug_logs") + log_to_pickle = 'wasting_analysis__full_model_' + create_pickles_locally(interv_folder_path, compressed_file_name_prefix=log_to_pickle, level=logging.DEBUG) + +def extract_tx_data_frames( + folder, + years_of_interest, + intervention_years, + interv +) -> Dict[str, pd.DataFrame]: + """ + Extracts and summarizes treatment data by age group and year. + + :param folder: Path to the folder containing outcome data. + :param years_of_interest: List of years to extract data for. + :param intervention_years: List of years for which data include the interventions if any implemented. + :param interv: Name or identifier of the intervention. + :return: Dictionary with DataFrames: + (1) 'tx_by_age_group_df': Counts by year, treatment, age_group (by draw and run), + (2) 'tx_by_age_group_mean_ci_df': Mean and 95% CI for counts per year, treatment, age_group and draw, + (3) 'tx_mean_ci_df': Mean and 95% CI for total treatments per year and draw, + (4) 'interv_tx_by_age_group_df': Counts for intervention years, + (5) 'interv_tx_by_age_group_mean_ci_df': Mean and 95% CI for intervention years, + (6) 'interv_tx_mean_ci_df': Mean and 95% CI for total treatment in intervention years. + """ + print(f" -{interv=}") + + # Extract treatment data + tx_by_age_group_df = extract_results( + folder, + module="tlo.methods.wasting", + key="get-tx", + custom_generate_series=( + lambda df: ( + df.assign(year=df['date'].dt.year) + .groupby(['year', 'treatment', 'age_group'])['year'] + .count() + .reindex( + pd.MultiIndex.from_product([ + df['date'].dt.year.unique(), + df['treatment'].unique(), + df['age_group'].unique() + ], names=['year', 'treatment', 'age_group']) + ) + ) + ), + do_scaling=True + ).fillna(0) + tx_by_age_group_df = tx_by_age_group_df.loc[years_of_interest] + + # Mean and CI by year, treatment, age_group + tx_by_age_group_mean_ci_df = return_mean_95_CI_across_runs(tx_by_age_group_df) + + # Mean and CI by year and treatment (sum over age_group) + tx_mean_df = tx_by_age_group_df.groupby(['year', 'treatment']).sum() + tx_mean_ci_df = return_mean_95_CI_across_runs(tx_mean_df) + + # For intervention years + interv_tx_by_age_group_df = tx_by_age_group_df.loc[intervention_years] + interv_tx_by_age_group_mean_ci_df = return_mean_95_CI_across_runs(interv_tx_by_age_group_df) + interv_tx_mean_df = interv_tx_by_age_group_df.groupby(['year', 'treatment']).sum() + interv_tx_mean_ci_df = return_mean_95_CI_across_runs(interv_tx_mean_df) + + return { + 'tx_by_age_group_df': tx_by_age_group_df, + 'tx_by_age_group_mean_ci_df': tx_by_age_group_mean_ci_df, + 'tx_mean_ci_df': tx_mean_ci_df, + 'interv_tx_by_age_group_df': interv_tx_by_age_group_df, + 'interv_tx_by_age_group_mean_ci_df': interv_tx_by_age_group_mean_ci_df, + 'interv_tx_mean_ci_df': interv_tx_mean_ci_df, + 'interv_years': intervention_years + } + +def get_scen_colour(scen_name: str) -> str: + return { + "Status Quo": "#F12AE5", + "SQ": "#F12AE5", + # "GM_FullAttend": "#4575B4", + # "GM_all": "#BDEBF7", + # "GM_1-2": "#91BFDB", + "GM": "#4575B4", + # "CS_10": "#9FFD17", + # "CS_30": "#61B93C", + # "CS_50": "#2D945F", + # "CS_100": "#266714", + "CS": "#266714", + # "FS_50": "#D4898E", + # "FS_70": "#D4898E", + # "FS_Full": "#A90251", + "FS": "#A90251", + "GM_FS": "#FD7700", + "CS_FS": "#54DC5C", + "GM_CS_FS": "#350D90", + "GM_CS": "#689DEF", + }.get(scen_name) + +def plot_mortality_rate__by_interv_multiple_settings( + cohort: str, + interv_timestamps_dict: dict, + scenarios_dict: dict, + intervs_of_interest: list, + plot_years: list, + outcomes_dict: dict, + outputs_path: Path +) -> None: + """ + Plots mortality rates (neonatal or under-5) and their confidence intervals over time for multiple intervention + settings. For the 'SQ' (Status Quo) intervention, also overlays UNICEF and WPP reference data. + + :param cohort: 'Neonatal' or 'Under-5' + :param interv_timestamps_dict: Dictionary mapping intervention names to their timestamp identifiers + :param scenarios_dict: Dictionary mapping interventions to scenario names and draw numbers + :param intervs_of_interest: List of interventions to plot + :param plot_years: List of years to plot on the x-axis + :param outcomes_dict: Nested dictionary with outcome data for each intervention and scenario + :param outputs_path: Path to save the generated plots + :return: None + """ + + def plot_scenarios(plot_interv, plot_outcome): + scenarios_to_plot = scenarios_dict[plot_interv] + for scen_name, draw in scenarios_to_plot.items(): + scen_colour = get_scen_colour(scen_name) + scen_data = outcomes_dict[plot_interv][plot_outcome][draw] + + means, ci_lower, ci_upper = zip(*scen_data.values.flatten()) + + ax.plot(plot_years, means, label=scen_name, color=scen_colour) + ax.fill_between(plot_years, ci_lower, ci_upper, + color=scen_colour, alpha=.1) + + # Outcome to plot, corresponding target, and y-axis limit + assert cohort in ['Neonatal', 'Under-5'],\ + f"Invalid value for 'cohort': expected 'Neonatal' or 'Under-5'. Received {cohort} instead." + if cohort == 'Neonatal': + outcome = 'neo_mort_rate_mean_ci_df' + target = 12 + ylim_top = 40 #25 + else: #cohort == 'Under-5': + outcome = 'under5_mort_rate_mean_ci_df' + target = 25 + ylim_top = 100 #60 + + # Plots by intervention (multiple settings within each plot) + for interv in intervs_of_interest: + + fig, ax = plt.subplots() + plot_scenarios(interv, outcome) + + if interv == 'SQ': + + # Add UNICEF mortality rates data + # #### + # Load UNICEF mortality rates data from CSV + unicef_csv_path = Path(__file__).parent / "fusion_GLOBAL_DATAFLOW_UNICEF_1.0_MWI.CME_MRY0T4+CME_MRM0...csv" + unicef_df = pd.read_csv(unicef_csv_path) + + # Filter for neonatal and under-5 rates, total sex + neo_mask = ( + (unicef_df['INDICATOR:Indicator'] == 'CME_MRM0: Neonatal mortality rate') & + (unicef_df['SEX:Sex'] == '_T: Total') + ) + under5_mask = ( + (unicef_df['INDICATOR:Indicator'] == 'CME_MRY0T4: Under-five mortality rate') & + (unicef_df['SEX:Sex'] == '_T: Total') + ) + + unicef_neo = unicef_df.loc[neo_mask] + unicef_under5 = unicef_df.loc[under5_mask] + + # Extract years and rates (convert to int/float) + unicef_neo_years = unicef_neo['TIME_PERIOD:Time period'].astype(int).tolist() + unicef_neo_rates = unicef_neo['OBS_VALUE:Observation Value'].astype(float).tolist() + unicef_neo_lower = unicef_neo['LOWER_BOUND:Lower Bound'].astype(float).tolist() + unicef_neo_upper = unicef_neo['UPPER_BOUND:Upper Bound'].astype(float).tolist() + + unicef_under5_years = unicef_under5['TIME_PERIOD:Time period'].astype(int).tolist() + unicef_under5_rates = unicef_under5['OBS_VALUE:Observation Value'].astype(float).tolist() + unicef_under5_lower = unicef_under5['LOWER_BOUND:Lower Bound'].astype(float).tolist() + unicef_under5_upper = unicef_under5['UPPER_BOUND:Upper Bound'].astype(float).tolist() + + unicef_colour = '#1CABE2' + + # Filter data to include only years present in both plot_years and source years + unicef_filtered_neo = [(year, mort_rate, low, upper) for year, mort_rate, low, upper in \ + zip(unicef_neo_years, unicef_neo_rates, unicef_neo_lower, unicef_neo_upper) if \ + year in plot_years] + unicef_filtered_under5 = [(year, mort_rate, low, upper) for year, mort_rate, low, upper in + zip(unicef_under5_years, unicef_under5_rates, + unicef_under5_lower, unicef_under5_upper) if + year in plot_years] + + (unicef_filtered_neo_years, unicef_filtered_neo_rates, + unicef_filtered_neo_lower, unicef_filtered_neo_upper) = \ + zip(*unicef_filtered_neo) if unicef_filtered_neo else ([], [], [], []) + (unicef_filtered_under5_years, unicef_filtered_under5_rates, + unicef_filtered_under5_lower, unicef_filtered_under5_upper) = \ + zip(*unicef_filtered_under5) if unicef_filtered_under5 else ([], [], [], []) + + # Add WPP 2024 mortality rates estimates (past data) and medium projection variant (future predictions) + # #### + wpp_medium_under5_mort_rates = [ + 81, 76, 70, 65, 61, 57, 53, 51, 48, 46, 44, 42, 41, 39, 38, 37, 37, 36, 35, 34, 33, 33, 32, 31, 30, 30, + 29, 28, 27, 27, 26, 26, 25, 24, 24, 23, 23, 22, 22, 21, 21, 21, 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, + 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, + 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 + ] + wpp_years = list(range(2010, 2101)) + wpp_colour = '#1D73F5' + wpp_filtered_years = [year for year in plot_years if year in wpp_years] + wpp_filtered_under5_rates = \ + [rate for year, rate in zip(wpp_years, wpp_medium_under5_mort_rates) if year in wpp_filtered_years] + + # Plot both data + # #### + if cohort == 'Neonatal': + ax.plot(unicef_filtered_neo_years, unicef_filtered_neo_rates, + label='UNICEF Data', color=unicef_colour, linestyle='--') + ax.fill_between( + unicef_filtered_neo_years, unicef_filtered_neo_lower, unicef_filtered_neo_upper, + color=unicef_colour, alpha=0.2 + ) + elif cohort == 'Under-5': + ax.plot(unicef_filtered_under5_years, unicef_filtered_under5_rates, + label='UNICEF Data', color=unicef_colour, linestyle='--') + ax.fill_between( + unicef_filtered_under5_years, unicef_filtered_under5_lower, unicef_filtered_under5_upper, + color=unicef_colour, alpha=0.2 + ) + ax.plot(wpp_filtered_years, wpp_filtered_under5_rates, + label='WPP 2024', color=wpp_colour, linestyle='-.') + else: + plot_scenarios('SQ', outcome) + + plt.axhline(y=target, color='black', linestyle='--', linewidth=1) + plt.text(x=plot_years[-1] + 1, y=target, s='SDG\n3.2 target', color='black', va='center', ha='left', fontsize=8) + plt.text(x=plot_years[0] - 1, y=target, s=target, color='black', va='center', ha='right', fontsize=8) + plt.ylabel(f'{cohort} Deaths per 1,000 Live Births') + plt.xlabel('Year') + plt.title(f'{cohort} Mortality Rate: multiple settings of {interv} intervention') + + plt.gca().set_ylim(bottom=0, top=ylim_top) + + plt.legend() + plt.xticks(plot_years, labels=plot_years, rotation=45, fontsize=8) + + # Save plot as PNG + if interv == 'SQ': + fig.savefig( + outputs_path / f"{cohort}_mort_rate_{interv}_UNICEF_WPP__" + f"{interv_timestamps_dict[interv]}.png", + bbox_inches='tight' + ) + plt.close(fig) + + else: + fig.savefig( + outputs_path / f"{cohort}_mort_rate_{interv}_multiple_settings__" + f"{interv_timestamps_dict[interv]}__{interv_timestamps_dict['SQ']}.png", + bbox_inches='tight' + ) + plt.close(fig) + +def plot_mean_outcome_and_CIs__scenarios_comparison( + cohort: str, + scenarios_dict: dict, + scenarios_to_compare: list, + plot_years: list, + outcome_type: str, + outcomes_dict: dict, + outputs_path: Path, + scenarios_tocompare_prefix: str, + timestamps_suffix: str +) -> None: + """ + Plots mean deaths or DALYs and confidence intervals over time for the specified cohort for multiple scenarios. + :param cohort: 'Neonatal' or 'Under-5' + :param scenarios_dict: Dictionary mapping interventions to scenarios and their corresponding draw numbers + :param scenarios_to_compare: List of scenarios to plot + :param plot_years: List of years to plot + :param outcome_type: 'deaths', 'deaths_with_SAM', or 'DALYs' + :param outcomes_dict: Dictionary containing data for plotting nested as outcomes_dict[interv][outcome][draw][run] + :param outputs_path: Path to save the plots + :param scenarios_tocompare_prefix: Prefix for output files with names of scenarios that are compared in the plots + :param timestamps_suffix: Timestamps to identify the log data from which the outcomes originated. + """ + assert cohort in ['Neonatal', 'Under-5'], \ + f"Invalid value for 'cohort': expected 'Neonatal' or 'Under-5'. Received {cohort} instead." + assert outcome_type in ['deaths', 'deaths_with_SAM', 'DALYs'], \ + f"Invalid value for 'outcome_type': expected 'deaths' or 'DALYs'. Received {outcome_type} instead." + + for i, cause in enumerate(['any cause', 'SAM', 'ALRI', 'Diarrhoea']): + if outcome_type == "deaths": + neonatal_outcomes = ['neo_deaths_mean_ci_df', 'neo_SAM_deaths_mean_ci_df', + 'neo_ALRI_deaths_mean_ci_df', 'neo_Diarrhoea_deaths_mean_ci_df'] + under5_outcomes = ['under5_deaths_mean_ci_df', 'under5_SAM_deaths_mean_ci_df', + 'under5_ALRI_deaths_mean_ci_df', 'under5_Diarrhoea_deaths_mean_ci_df'] + elif outcome_type == "deaths_with_SAM": + neonatal_outcomes = [None, None, + 'neo_ALRI_deaths_with_SAM_mean_ci_df', 'neo_Diarrhoea_deaths_with_SAM_mean_ci_df'] + under5_outcomes = [None, None, + 'under5_ALRI_deaths_with_SAM_mean_ci_df', 'under5_Diarrhoea_deaths_with_SAM_mean_ci_df'] + else: # outcome_type == "DALYs": + neonatal_outcomes = [None, None, None, None] # No data on DALYs for neonatal + under5_outcomes = ['under5_dalys_mean_ci_df', 'under5_SAM_dalys_mean_ci_df', + 'under5_ALRI_dalys_mean_ci_df', 'under5_Diarrhoea_dalys_mean_ci_df'] + outcome = neonatal_outcomes[i] if cohort == 'Neonatal' else under5_outcomes[i] + + if outcome: + # Initialize the plot + fig, ax = plt.subplots() + + # Iterate over scenarios to compare + for scenario in scenarios_to_compare: + # Find the corresponding intervention and draw number + try: + interv, draw = next( + (interv, draw) + for interv, scenarios_for_interv_dict in scenarios_dict.items() + if scenario in scenarios_for_interv_dict + for scen_name, draw in scenarios_for_interv_dict.items() + if scen_name == scenario + ) + except StopIteration: + raise ValueError(f"Scenario '{scenario}' not found in scenarios_dict") + + + # Extract data for the scenario + scen_data = outcomes_dict[interv][outcome][draw] + + # Calculate means and confidence intervals + means, ci_lower, ci_upper = zip(*scen_data.values.flatten()) + + # Plot the data + years_to_plot = [year for year in plot_years if year-1 in scen_data.index] + ax.plot(years_to_plot, means, label=scenario, color=get_scen_colour(scenario)) + ax.fill_between(years_to_plot, ci_lower, ci_upper, color=get_scen_colour(scenario), alpha=0.2) + + # Add labels, title, and legend + plt.ylabel(f'{cohort} {outcome_type}') + plt.xlabel('Year') + plt.title(f'{cohort} Mean {outcome_type.replace("_", " ")} due to {cause} and 95% CI over time') + plt.legend() + plt.xticks(years_to_plot, labels=years_to_plot, rotation=45, fontsize=8) + + plt.savefig( + outputs_path / ( + f"{cohort}_mean_{cause}_{outcome_type}_CI_scenarios_comparison__" + f"{scenarios_tocompare_prefix}__{timestamps_suffix}.png" + ), + bbox_inches='tight' + ) + plt.close(fig) + +def plot_sum_outcome_and_CIs_intervention_period( + cohort: str, + scenarios_dict: dict, + scenarios_to_compare: list, + outcome_type: str, + outcomes_dict: dict, + outputs_path: Path, + scenarios_tocompare_prefix: str, + timestamps_suffix: str, + interv_timestamps_dict: dict = None, + births_dict: dict = None, + pop_size_dict: dict = None, + force_calculation: list = None, +) -> None: + """ + Plots sum & averted sum of averted deaths or DALYs over the intervention period for the specified cohort for + multiple scenarios (means and confidence intervals across runs). + If outcome is DALYs, also plots and tables the cost-effectiveness. + :param cohort: 'Neonatal' or 'Under-5' + :param scenarios_dict: Dictionary mapping interventions to scenarios and their corresponding draw numbers + :param scenarios_to_compare: List of scenarios to plot + :param outcome_type: 'deaths', 'deaths_with_SAM' or 'DALYs' + :param outcomes_dict: Dictionary containing data for plotting nested as outcomes_dict[interv][outcome][draw][run] + :param outputs_path: Path to save the plot + :param scenarios_tocompare_prefix: Prefix for output files with names of scenarios that are compared in the plots + :param timestamps_suffix: Suffix with timestamps to identify the log data from which the outcomes originated + :param interv_timestamps_dict: Dictionary with timestamps for all the interventions + (default: None, as needed only for outcome_type = 'DALYs' for cost-effectiveness analysis) + :param births_dict: Dataframe containing births data from simulations nested as + births_dict[interv][outcome][draw][run] + (default: None, as needed only for outcome_type = 'DALYs' for cost-effectiveness analysis) + """ + assert cohort in ['Neonatal', 'Under-5'], \ + f"Invalid value for 'cohort': expected 'Neonatal' or 'Under-5'. Received {cohort} instead." + assert outcome_type in ['deaths', 'deaths_with_SAM', 'DALYs'], \ + f"Invalid value for 'outcome_type': expected 'deaths' or 'DALYs'. Received {outcome_type} instead." + + # Outcomes to plot + for i, cause in enumerate(['any cause', 'SAM', 'ALRI', 'Diarrhoea']): + + if outcome_type == "deaths": + neonatal_outcomes = ['interv_neo_deaths_sum_ci_df', 'interv_neo_SAM_deaths_sum_ci_df', + 'interv_neo_ALRI_deaths_sum_ci_df', 'interv_neo_Diarrhoea_deaths_sum_ci_df'] + under5_outcomes = ['interv_under5_deaths_sum_ci_df', 'interv_under5_SAM_deaths_sum_ci_df', + 'interv_under5_ALRI_deaths_sum_ci_df', 'interv_under5_Diarrhoea_deaths_sum_ci_df'] + under5_averted_outcomes = ['under5_averted_deaths_mean_ci_df', + 'under5_averted_SAM_deaths_mean_ci_df', + 'under5_averted_ALRI_deaths_mean_ci_df', + 'under5_averted_Diarrhoea_deaths_mean_ci_df'] + elif outcome_type == "deaths_with_SAM": + neonatal_outcomes = [None, None, + 'interv_neo_ALRI_deaths_with_SAM_sum_ci_df', + 'interv_neo_Diarrhoea_deaths_with_SAM_sum_ci_df'] + under5_outcomes = [None, None, + 'interv_under5_ALRI_deaths_with_SAM_sum_ci_df', + 'interv_under5_Diarrhoea_deaths_with_SAM_sum_ci_df'] + under5_averted_outcomes = [None, None, + 'under5_averted_ALRI_deaths_with_SAM_mean_ci_df', + 'under5_averted_Diarrhoea_deaths_with_SAM_mean_ci_df'] + else: # outcome_type == "DALYs" + neonatal_outcomes = [None, None, None, None] # No DALYs for neonatal + under5_outcomes = ['interv_under5_dalys_sum_ci_df', 'interv_under5_SAM_dalys_sum_ci_df', + 'interv_under5_ALRI_dalys_sum_ci_df', 'interv_under5_Diarrhoea_dalys_sum_ci_df'] + under5_averted_outcomes = ['under5_averted_dalys_mean_ci_df', + 'under5_averted_SAM_dalys_mean_ci_df', + 'under5_averted_ALRI_dalys_mean_ci_df', + 'under5_averted_Diarrhoea_dalys_mean_ci_df'] + outcome = under5_outcomes[i] if cohort == 'Under-5' else neonatal_outcomes[i] + averted_outcome = under5_averted_outcomes[i] if cohort =='Under-5' else None + + if outcome: + # Plot comparison of sum of outcome_type over intervention period (absolute numbers of outcome_type) + fig, ax = plt.subplots() + + # Iterate over scenarios to compare + for scenario in scenarios_to_compare: + # Find the corresponding intervention and draw number + interv, draw = next( + (interv, draw) + for interv, scenarios_for_interv_dict in scenarios_dict.items() + if scenario in scenarios_for_interv_dict + for scen_name, draw in scenarios_for_interv_dict.items() + if scen_name == scenario + ) + + # Extract data for the scenario + scen_data = outcomes_dict[interv][outcome][draw] + + # Calculate sum and confidence intervals + interv_sum, interv_ci_lower, interv_ci_upper = zip(*scen_data.values.flatten()) + interv_sum, interv_ci_lower, interv_ci_upper = \ + interv_sum[0], interv_ci_lower[0], interv_ci_upper[0] + + # Plot the data + ax.bar(scenario, interv_sum, + yerr=[[interv_sum - interv_ci_lower], [interv_ci_upper - interv_sum]], + label=scenario, color=get_scen_colour(scenario), capsize=5) + + y_top = ax.get_ylim()[1] + + # Add text label for the bar height (sum), above the bar + ax.text( + scenario, + interv_sum + (y_top * 0.02), # small offset above the bar + f"{interv_sum:,.0f}", + color='black', + ha='center', + va='bottom', + fontsize=12.5 + ) + + # Add text labels for interv_ci_lower and interv_ci_upper + text_color = 'black' if scenario in ['Status Quo'] else 'white' + ax.text(scenario, + interv_ci_upper / 2 + interv_ci_upper / 4 if \ + interv_ci_upper < y_top / 2 + y_top / 15 else y_top / 2 + y_top / 15, + f"{interv_ci_upper:,.0f}", color=text_color, ha='center', va='top', fontsize=12.5) + ax.text(scenario, + interv_ci_upper / 2 - interv_ci_upper / 4 if \ + interv_ci_upper < y_top / 2 + y_top / 15 else y_top / 2 - y_top / 15, + f"{interv_ci_lower:,.0f}", color=text_color, ha='center', va='bottom', fontsize=12.5) + + # Add horizontal lines for Status Quo scenario + if scenario == 'Status Quo': + ax.axhline(y=interv_ci_lower, color=get_scen_colour('Status Quo'), linestyle='--', linewidth=1) + ax.axhline(y=interv_ci_upper, color=get_scen_colour('Status Quo'), linestyle='--', linewidth=1) + + # Add labels, title, and legend + interv_years = outcomes_dict["SQ"]['interv_years'] + min_interv_year = min(interv_years) + max_interv_year = max(interv_years) + plt.ylabel(f'{cohort} {outcome_type} (Sum over intervention period)') + plt.xlabel('Scenario') + plt.title( + f'{cohort} Sum of {outcome_type.replace("_", " ")} due to {cause} and 95% CI over ' + f'intervention period ({min_interv_year}--{max_interv_year})') + plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) + plt.xticks(rotation=45, fontsize=8) + + fig.savefig( + outputs_path / ( + f"{cohort}_sum_{cause}_{outcome_type}_CI_intervention_period_scenarios_comparison__" + f"{scenarios_tocompare_prefix}__{timestamps_suffix}.png" + ), + bbox_inches='tight' + ) + plt.close(fig) + + # noinspection PyUnreachableCode + if averted_outcome: + fig2, ax2 = plt.subplots() + + averted_dict = {} + for scenario in scenarios_to_compare: + if scenario == 'Status Quo': + # Only horizontal lines, no bar + ax2.axhline(y=0, color=get_scen_colour('Status Quo'), linestyle='-', linewidth=2) + else: + # Iterate over scenarios to compare + interv, draw = next( + (interv, draw) + for interv, scenarios_for_interv_dict in scenarios_dict.items() + if scenario in scenarios_for_interv_dict + for scen_name, draw in scenarios_for_interv_dict.items() + if scen_name == scenario + ) + + # Extract data for the scenario + scen_averted_data = \ + outcomes_dict[interv][averted_outcome][draw][f'{min_interv_year}-{max_interv_year}'] + averted_dict[scenario] = scen_averted_data + + ax2.bar(scenario, scen_averted_data[0], + yerr=[[scen_averted_data[0] - scen_averted_data[1]], + [scen_averted_data[2] - scen_averted_data[0]]], + label=scenario, color=get_scen_colour(scenario), capsize=5) + y_top2 = ax2.get_ylim()[1] + s1 = y_top2 * 0.02 # space between bar and value of the bar + # compute number of spaces to pad to 6 digits for printing the value + try: + val = int(abs(scen_averted_data[0])) + except Exception: + val = 0 + digits = len(str(val)) if val != 0 else 1 + n = max(0, 6 - digits) + if n == 4: + n= n+4 + elif n == 3: + n= n+5 + elif n == 2: + if scen_averted_data[0] > 0: + n= n+2 + else: + n= n+1 + elif n == 1: + if scen_averted_data[0] > 0: + n= n+1 + h_space = " " * n + ax2.text(scenario, scen_averted_data[0] + s1 if scen_averted_data[0] >= 0 else 0 + s1, + f"{h_space}{scen_averted_data[0]:,.0f}", color=get_scen_colour(scenario), + ha='center', va='bottom', fontsize=12.5, fontweight='bold') + + # # Display lower and upper CI within the bars + # ax2.text(scenario, averted_ci_upper / 2 + averted_ci_upper / 4 if \ + # averted_ci_upper < y_top2 / 2 + y_top2 / 15 else y_top2 / 2 + y_top2 / 15, + # f"{averted_ci_upper:,.0f}", color='white', ha='center', va='top', fontsize=12.5) + # ax2.text(scenario, averted_ci_upper / 2 - averted_ci_upper / 4 if \ + # averted_ci_upper < y_top2 / 2 + y_top2 / 15 else y_top2 / 2 - y_top2 / 15, + # f"{averted_ci_lower:,.0f}", color='white', ha='center', va='bottom', fontsize=12.5) + + if cause in ['any cause', 'SAM']: + # Apply millions formatter to y-axis values for SAM outcomes + plt.ylabel(f"Averted {outcome_type}, millions (Cumulative: {min_interv_year}—{max_interv_year})") + apply_millions_formatter_to_ax(ax2, axis='y') + else: + if outcome_type == 'deaths_with_SAM': + outcome_type_plot = 'deaths' + else: + outcome_type_plot = outcome_type + plt.ylabel(f"Averted {outcome_type_plot} (Cumulative: {min_interv_year}—{max_interv_year})") + plt.xlabel('Scenario') + plt.title( + f'{cohort}: Sum of averted {outcome_type.replace("_", " ")} due to {cause} and 95% CI over ' + f'intervention period ({min_interv_year}—{max_interv_year})' + ) + # Update scenario abbreviations to full names for labels + handles2, labels2 = ax2.get_legend_handles_labels() + ax2.legend(handles2, map_scenario_labels(labels2), loc='center left', bbox_to_anchor=(1, 0.5), labelspacing=1.4) + ax2.set_xticks(list(range(len(labels2)))) + ax2.set_xticklabels(labels2, rotation=45, fontsize=8) + + fig2.savefig( + outputs_path / ( + f"{cohort}_sum_averted_{cause}_{outcome_type}_CI_intervention_period_scenarios_comparison__" + f"{scenarios_tocompare_prefix}__{timestamps_suffix}.png" + ), + bbox_inches='tight' + ) + plt.close(fig2) + + # path to outcome calculated data + cost_outcome_folder_path = outputs_path / "outcomes_data" + # SQ timestamp associated with scenarios for which we want the costs to be calculated + SQ_results_timestamp = interv_timestamps_dict["SQ"] + + def plot_and_table_cost_effectiveness( + in_averted_DALYs: dict, in_data_impl_cost_name: str, in_sharing_GM_CS: float, in_FS_multiplier: float, + in_medical_calc_done: bool + ) -> None: + ce_suffix = f"{in_data_impl_cost_name}_GM-CS-sharing{in_sharing_GM_CS}_FSmultiplier{in_FS_multiplier}" + print("###################") + print(f"\nCE SUFFIX: {ce_suffix}") + timestamps_and_ce_suffix = f"{timestamps_suffix}__{ce_suffix}" + # ----------- + # Implementation costs estimates based on number of births and unit costs from REFs, discounted by 3% + def calculate_implementation_costs(): + # Implementation costs for education/promotion interventions (ie GM & CS) + # note: start_up_unit_cost is assumed to be additional cost to unit cost for the first year of + # implementation + + # Gelli et al., 2021 (incl food, not the best choice as we already have food supplements modelled + # separately) + # start_up_cost_Gelli_etal2021 = 58.41 * 0.3 + # unit_cost_Gelli_etal2021 = 58.41 * 0.7 + + # Pearson et al., 2018 (seems to include purely implementation costs, and assume coverage for the + # interventions, however as we intend to reach 100% scale-up, we assume 100% coverage) + # coverage_educ_Pearson_etal2018 = 0.247 + # coverage_prom_Pearson_etal2018 = 0.61 + coverage = 1.0 # [coverage_educ_Pearson_etal2018, coverage_prom_Pearson_etal2018, 1.0] + # coverage with Margolies et al., 2021 + # 0.15 => 'CS_FS': 30,484,191.749434147 > 29,853,966 + # 0.14 => 'CS_FS': 28,451,912.299471878 < 29,853,966 + # 0.0017 => 'GM_CS': 259,464.976512309 > 246,824.02 + # 0.0016 => 'GM_CS': 244,202.33083511435 < 246,824.02 + + unit_cost_Pearson_etal2018 = 0.37 + start_up_Pearson_etal2018 = 0.37 / 7 * 3 # no start-up costs included in the estimate + discount_incl_bool_Pearson_etal2018 = False # no discounting (1 year long programme) + + unit_cost_Margolies_etal2021_min = 9.17 + start_up_Margolies_etal2021 = 0 # included in their estimate of unit cost + discount_incl_bool_Margolies_etal2021 = True # already includes 3% annual discounting (5 years) + + # Implementation costs GM, CS & FS: + # ##### + scenarios_without_SQ = [scen for scen in scenarios_to_compare if scen != 'Status Quo'] + + if in_data_impl_cost_name == 'Pearson_etal2018': + unit_cost = unit_cost_Pearson_etal2018 + start_up_unit_cost = start_up_Pearson_etal2018 + discount_incl_bool = discount_incl_bool_Pearson_etal2018 + + elif in_data_impl_cost_name == 'Margolies_etal2021': + unit_cost = unit_cost_Margolies_etal2021_min + start_up_unit_cost = start_up_Margolies_etal2021 + discount_incl_bool = discount_incl_bool_Margolies_etal2021 + + print(f"\nUNIT COST: {unit_cost}") + impl_costs = dict() + + def GM_CS_unit_cost_coef(in_scen: str): + return ( + (0 if in_scen == "FS" else 1) + if "_" not in in_scen + else (2 - in_sharing_GM_CS) + if in_scen.count("_") == 2 + else (2 - in_sharing_GM_CS) + if in_scen == "GM_CS" + else (1) + ) + + def FS_unit_cost_coef(in_scen: str): + return in_FS_multiplier if "FS" in in_scen else 0 + + for scen in scenarios_without_SQ: + impl_cost_df = pd.DataFrame( + { + "year": interv_years, + "impl_costs": [ + ( + births_dict[scen]["births_mean_ci_df"][0].loc[year][0] + * coverage + * ((unit_cost + start_up_unit_cost) if year == interv_years[0] else unit_cost) + * GM_CS_unit_cost_coef(scen) + ) + + ( + pop_size_dict[scen]["pop_size_wasted_mean_ci_df"][0].loc[year][0] + * coverage + * ((unit_cost + start_up_unit_cost) if year == interv_years[0] else unit_cost) + * FS_unit_cost_coef(scen) + ) + for year in interv_years + ], + } + ) + # print(f"\n--------\nscen {scen}\n--------") + # print("\nimpl_cost_df") + # print(impl_cost_df) + if not discount_incl_bool: + impl_cost_df = apply_discounting_to_cost_data( + _df=impl_cost_df, _discount_rate=0.03, _column_for_discounting='impl_costs' + ) + sum_impl_costs_discounted = sum(impl_cost_df['impl_costs']) + impl_costs[scen] = sum_impl_costs_discounted + # print("\nimpl_cost_discounted_df") + # print(impl_cost_discounted_df) + # print("\nimpl_costs") + # print(impl_costs) + + # Implementation costs FS: + # TODO: + # interv_tx_mean_ci_df + # salary [RF_Costing_HR Nutrition] at levels 1a, 1b and 2: 4573.11153 USD (annual??) + + # Add 0 implementation costs for SQ + impl_costs['SQ'] = 0.0 + return impl_costs + + def get_all_costs(): + # 1. Medical consumable costs - discounted by 3% + output_costs_medical_file_path = \ + cost_outcome_folder_path / f"output_costs_medical_outcomes_{SQ_results_timestamp}.pkl" + if output_costs_medical_file_path.exists() and (in_medical_calc_done or not force_calculation[4]): + print("\nloading output costs medical from file ...") + output_costs_medical_df = pd.read_pickle(output_costs_medical_file_path) + else: + print("\noutput costs medical calculation ...") + run_costing( + cost_outcome_folder_path, SQ_results_timestamp, timestamps_suffix, force_calculation + ) + output_costs_medical_df = pd.read_pickle(output_costs_medical_file_path) + incremental_consumable_costs = dict() + for scen in output_costs_medical_df.index: + incremental_consumable_costs[scen] = \ + output_costs_medical_df.loc[scen, 'total'] - output_costs_medical_df.loc['SQ', 'total'] + print("\nincremental_consumable_costs") + print(incremental_consumable_costs) + + # TODO: use also incremental_consumable_costs CIs + # (saved as output_costs_medical_df.loc[scen, ['lower_bound', 'upper_bound']]) + # How the net_health_benefit & net_monetary_benefit then will need to account for both uncertainty + # around Averted DALYs and around Incremental Costs (hence similar calculation as when calculated + # CI for Averted DALYs) + + # 2. Implementation costs estimates based on number of births and unit costs from REFs, + # discounted by 3% + implementation_costs = calculate_implementation_costs() + print(f"\nimplementation_costs: {in_data_impl_cost_name}; GM & CS shared costs: {in_sharing_GM_CS};" + f" FS multiplier: {in_FS_multiplier}") + print(implementation_costs) + + all_costs = pd.DataFrame( + { + "scenario": list(incremental_consumable_costs.keys()), + "incremental_consumable_costs": [ + incremental_consumable_costs[scen] for scen in incremental_consumable_costs.keys() + ], + "implementation_costs": [ + implementation_costs[scen] for scen in incremental_consumable_costs.keys() + ], + "total_cost": [ + incremental_consumable_costs[scen] + implementation_costs[scen] + for scen in incremental_consumable_costs.keys() + ], + } + ) + return all_costs + + output_all_costs_file_path = \ + cost_outcome_folder_path / f"all_costs_{SQ_results_timestamp}_{ce_suffix}.pkl" + if output_all_costs_file_path.exists() and not force_calculation[4] and not force_calculation[5] : + print("\nloading all costs from file ...") + all_costs_df = pd.read_pickle(output_all_costs_file_path) + print("-------------") + print(f"\nCE_SUFFIX={ce_suffix}") + # print(f"--all_costs_df:\n{all_costs_df}") + else: + print("\nall costs calculation ...") + all_costs_df = get_all_costs() + print("\nsaving all costs to file ...") + all_costs_df.to_pickle(output_all_costs_file_path) + + # Cost-effectiveness threshold (CET) = willingness to pay + CET = 76 + + # PLOT cost-effectiveness plane + # ##### + print("\nplotting CE plane ...") + scenarios_without_SQ = [scen for scen in all_costs_df['scenario'] if scen != "SQ"] + + fig_ce, ax_ce = plt.subplots(figsize=(10, 8)) + for scen in scenarios_without_SQ: + ax_ce.errorbar( + in_averted_DALYs[scen][0], + all_costs_df.loc[all_costs_df['scenario'] == scen, 'total_cost'].values[0], + xerr=[ + [in_averted_DALYs[scen][0] - in_averted_DALYs[scen][1]], + [in_averted_DALYs[scen][2] - in_averted_DALYs[scen][0]], + ], + fmt="o", + color=get_scen_colour(scen), + capsize=5, + ) + # add SQ point + ax_ce.plot(0, 0, marker="o", color=get_scen_colour('SQ')) + + # Create frontier and icer_domination labels for the points for all scenarios + # ----- + # 1. Initialize + final_frontier = [] + domination = {} + icer_domination = {} + + # 2. Create a mapping dictionary for costs + map_cost = all_costs_df.set_index("scenario")["total_cost"].to_dict() + + # 3. Sort by total_cost + all_costs_df_sorted = all_costs_df.sort_values("total_cost").reset_index(drop=True) + + # Helper function to calculate ICER + def get_icer(s1, s2): + cost1 = map_cost[s1] + cost2 = map_cost[s2] + daly1 = in_averted_DALYs[s1][0] if s1 != 'SQ' else 0 + daly2 = in_averted_DALYs[s2][0] if s2 != 'SQ' else 0 + # Handle division by zero if DALYs are identical + if (daly2 - daly1) == 0: + return float("inf") + return (cost2 - cost1) / (daly2 - daly1) + + # 3. First pass: Identify simple Domination + temp_frontier = [] + max_dalys = -float("inf") + + for i, row in all_costs_df_sorted.iterrows(): + s = row["scenario"] + current_dalys = in_averted_DALYs[s][0] if s != 'SQ' else 0 + if current_dalys > max_dalys: + temp_frontier.append(s) + max_dalys = current_dalys + domination[s] = "" + else: + domination[s] = "dominated" + icer_domination[s] = "dominated" + + # 4. Second pass: Identify Extended Domination and calculate ICERs + for s_next in temp_frontier: + while len(final_frontier) >= 2: + s_prev = final_frontier[-2] + s_curr = final_frontier[-1] + + icer1 = get_icer(s_prev, s_curr) + icer2 = get_icer(s_curr, s_next) + + if icer1 > icer2: + # s_curr is extendedly dominated + domination[s_curr] = "extendedly dominated" + icer_domination[s_curr] = "extendedly dominated" + final_frontier.pop() + else: + break + final_frontier.append(s_next) + + # 5. Populate ICER_domination for the final frontier with conditional rounding + for i in range(len(final_frontier)): + current_scen = final_frontier[i] + if i == 0: + icer_domination[current_scen] = "" + else: + prev_scen = final_frontier[i - 1] + val = get_icer(prev_scen, current_scen) + + # Check if the ICER is within +/- 1 unit of the CET + if abs(val - CET) < 1: + # Round to 1 decimal place if very close to CET + rounded = round_standard(val, decimals=1) + icer_domination[current_scen] = f"\\${rounded:.1f}" + print(f"\nICER close to CET: {val}") + else: + # Round to 0 decimal places otherwise + rounded = round_standard(val, decimals=0) + icer_domination[current_scen] = f"\\${rounded:.0f}" + + # 6. Define position of label in plot + if in_data_impl_cost_name == 'Pearson_etal2018': + space = 0.012 * map_cost['FS'] + ha_scen = { + "SQ": "left", + "GM": "center", + "CS": "center", + "FS": "left", + "GM_FS": "center", + "CS_FS": "left", + "GM_CS_FS": "left", + "GM_CS": "center", + } # ['right', 'left', 'center'] + va_scen = { + "SQ": "top", + "GM": "bottom", + "CS": "top", + "FS": "bottom", + "GM_FS": "bottom", + "CS_FS": "top", + "GM_CS_FS": "top", + "GM_CS": "bottom", + } # ['bottom', 'top', 'bottom'] + + elif in_data_impl_cost_name == 'Margolies_etal2021': + space = 0.024 * map_cost['FS'] + # positions of the labels + if in_FS_multiplier > 0.4: + ha_scen = { + "SQ": "left", + "GM": "center", + "CS": "right", + "FS": "right", + "GM_FS": "right", + "CS_FS": "left", + "GM_CS_FS": "center", + "GM_CS": "right", + } # ['right', 'left', 'center'] + va_scen = { + "SQ": "top", + "GM": "bottom", + "CS": "top", + "FS": "bottom", + "GM_FS": "bottom", + "CS_FS": "top", + "GM_CS_FS": "bottom", + "GM_CS": "bottom", + } # ['bottom', 'top', 'bottom'] + else: + ha_scen = { + "SQ": "left", + "GM": "center", + "CS": "right", + "FS": "left", + "GM_FS": "right", + "CS_FS": "right", + "GM_CS_FS": "center", + "GM_CS": "center", + } # ['right', 'left', 'center'] + va_scen = { + "SQ": "top", + "GM": "bottom", + "CS": "top", + "FS": "top", + "GM_FS": "bottom", + "CS_FS": "top", + "GM_CS_FS": "bottom", + "GM_CS": "bottom", + } # ['bottom', 'top', 'bottom'] + + # 7. Add the labels to scenario points + for scen in all_costs_df['scenario']: + ax_ce.text( + in_averted_DALYs[scen][0] if scen != 'SQ' else 0, + (map_cost[scen] + (space * 0.8)) if va_scen[scen] == "bottom" + else (map_cost[scen] - space), + icer_domination[scen], + fontsize=12, ha=ha_scen[scen], va=va_scen[scen], color=get_scen_colour(scen), + ) + + # Add cost-effectiveness frontier (dotted line connecting non-dominated scenarios) + frontier_x = [in_averted_DALYs[scen][0] if scen != 'SQ' else 0 for scen in final_frontier] + frontier_y = [map_cost[scen] for scen in final_frontier] + ax_ce.plot( + frontier_x, + frontier_y, + linestyle=":", + color="black", + linewidth=1.5, + label="Cost-effectiveness frontier", + ) + + # Add axis labels + ax_ce.set_xlabel("DALYs Averted, millions") + ax_ce.set_ylabel("Total Incremental Costs (2023 USD), millions") + # Format both axis to millions; x rounded to 1 decimal, but y to 0 decimals + apply_millions_formatter_to_ax(ax_ce, y_decimals=0) + # #TODO: uncomment only when creating this, otherwise it messes with other figures axis + # ax_ce.set_title( + # f"$\\bf{{unit\\ cost:}}$ {in_data_impl_cost_name}; " + # f"$\\bf{{CS\\ &\\ GM\\ sharing:}}$ {in_sharing_GM_CS} prop of implem. costs; " + # f"$\\bf{{FS\\ multiplier:}}$ {in_FS_multiplier}", + # pad=12, + # ) + + # # Add dashed black line for 1 DALY averted per CET + # y_vals = np.array(ax_ce.get_ylim()) + # x_vals = y_vals / CET + # # x_vals = np.array(ax_ce.get_xlim()) + # # y_vals = x_vals * CET + # ax_ce.plot(x_vals, y_vals, color="black", linestyle="--") + # ax_ce.text( + # x_vals[-1] + 10, y_vals[-1] + 1e6, f"ICER = ${CET}/DALY", + # color="black", fontsize=9, ha="left", va="top", + # ) + + # Add a legend box with scenario and CEF labels + # order legend labels by y (total_cost) + # so the largest value appears at the top and smallest at the bottom + ordered_scenarios = list( + all_costs_df.sort_values("total_cost", ascending=False)["scenario"].tolist() + ) + # Create proxy handles with matching colours for the ordered scenarios + proxy_handles = [ + mpl_lines.Line2D([], [], marker="o", color=get_scen_colour(scen), linestyle="-", markersize=8) + for scen in ordered_scenarios + ] + # add a proxy handle for the cost-effectiveness frontier (dotted black line) + frontier_handle = mpl_lines.Line2D([], [], linestyle=":", color="black", linewidth=1.5) + proxy_handles.append(frontier_handle) + + # map scenario short codes to display full names in legend + ax_ce.legend(proxy_handles, map_scenario_labels(ordered_scenarios) + ['cost-effectiveness\nfrontier'], loc="center left", + bbox_to_anchor=(1, 0.5), fontsize=12, labelspacing=1.4) + + plt.tight_layout() + fig_ce.savefig( + outputs_path + / ( + f"cost_effectiveness_scatter_DALYsAverted_vs_TotalCosts__" + f"{scenarios_tocompare_prefix}__{timestamps_and_ce_suffix}.png" + ), + bbox_inches="tight", + ) + plt.close(fig_ce) + + # TABLE cost-effectiveness + # ##### + print("\ncreating table with CE metrics ...") + + # Create a cost-effectiveness summary table for each scenario + ce_table_rows = [] + for scen in all_costs_df["scenario"]: + averted_dalys = in_averted_DALYs[scen][0] if scen != 'SQ' else 0 + averted_dalys_lower = in_averted_DALYs[scen][1] if scen != 'SQ' else 0 + averted_dalys_upper = in_averted_DALYs[scen][2] if scen != 'SQ' else 0 + incremental_costs = \ + all_costs_df.loc[all_costs_df['scenario'] == scen, 'incremental_consumable_costs'].values[0] + impl_cost = all_costs_df.loc[all_costs_df['scenario'] == scen, 'implementation_costs'].values[0] + total_cost = incremental_costs + impl_cost + + net_health_benefit = averted_dalys - (total_cost / CET) + net_health_benefit_lower = averted_dalys_lower - (total_cost / CET) + net_health_benefit_upper = averted_dalys_upper - (total_cost / CET) + + net_monetary_benefit = (averted_dalys * CET) - total_cost + net_monetary_benefit_lower = (averted_dalys_lower * CET) - total_cost + net_monetary_benefit_upper = (averted_dalys_upper * CET) - total_cost + + max_allow_total_costs = (averted_dalys * CET) + max_allow_total_costs_lower = (averted_dalys_lower * CET) + max_allow_total_costs_upper = (averted_dalys_upper * CET) + + ce_table_rows.append( + { + "Scenario": scen, + "Total costs (2023 USD)": f"{total_cost:,.0f}", + "Averted DALYs (95% CI)": f"{averted_dalys:,.0f} " + f"({averted_dalys_lower:,.0f}; {averted_dalys_upper:,.0f})", + "Incremental consumable-related costs (2023 USD)": f"{incremental_costs:,.0f}", + "Implementation costs estimate (2023 USD)": f"{impl_cost:,.0f}", + "ICER (2023 USD)": f"{icer_domination[scen]}", + "Incremental net health benefit (95% CI)": + f"{net_health_benefit:,.0f} " + f"({net_health_benefit_lower:,.0f}; {net_health_benefit_upper:,.0f})", + "Incremental net monetary benefit (95% CI)": + f"{net_monetary_benefit:,.0f} " + f"({net_monetary_benefit_lower:,.0f}; {net_monetary_benefit_upper:,.0f})", + "Maximum allowable total costs (2023 USD, 95% CI)": + f"{max_allow_total_costs:,.0f} " + f"({max_allow_total_costs_lower:,.0f}; {max_allow_total_costs_upper:,.0f})", + } + ) + ce_table_df = pd.DataFrame(ce_table_rows) + ce_table_df = ce_table_df.sort_values( + by="Total costs (2023 USD)", key=lambda x: x.str.replace(",", "").astype(float) + ) + ce_table_df.to_csv( + outputs_path / f"cost_effectiveness_summary_table__{timestamps_and_ce_suffix}.csv", + index=False, + ) + + def table_effectiveness(averted_outcome: dict, outcome_cause_suffix:str) -> None: + + eff_table = pd.DataFrame( + { + "Scenario": list(averted_outcome.keys()), + f"Averted {outcome_type}": [ + f"{round_standard(vals[0]):,} ({round_standard(vals[1]):,}; {round_standard(vals[2]):,})" + for vals in averted_outcome.values() + ], + } + ) + eff_table.to_csv(outputs_path / f"effectiveness_table_{outcome_cause_suffix}_{timestamps_suffix}.csv", + index=False) + + if outcome_type == "DALYs": + if cause == "any cause": + # sensitivity to unit cost + data_impl_cost_name = ['Pearson_etal2018', 'Margolies_etal2021'] + # sensitivity to sharing implementation costs for GM and CS interventions + sharing_GM_CS = [0.5, 0] + # sensitivity to FS intervention multiplier + FS_multiplier = [10, 2, 1, 0.5] + + # Individual CE planes + # medical consumables are given by model, hence do not change within the sensitivity analyses + medical_calc_done = False + for unit_cost in data_impl_cost_name: + for GM_CS__multiplier in sharing_GM_CS: + for FS__multiplier in FS_multiplier: + plot_and_table_cost_effectiveness( + averted_dict, unit_cost, GM_CS__multiplier, FS__multiplier, medical_calc_done + ) + medical_calc_done = True + + ######################################## + # Sensitivity plot - create grid of CE figures for all unit_cost / GM_CS / FS combinations + # (highlight the grid with the value assumed in main analysis) + ######################################## + print("\nplotting sensitivity CE plot ...") + + # 1. Define the target for the "Main Analysis" highlight + target_unit_cost = "Pearson_etal2018" + target_gm_cs = 0.5 + target_fs_mult = 1.0 + + # 2. Build grid dims + n_rows = len(data_impl_cost_name) * len(sharing_GM_CS) + n_cols = len(FS_multiplier) + + # Create figure at higher resolution (DPI) to improve exported image quality + # use gridspec spacing controls to reduce large gaps between columns/rows + fig, axes = plt.subplots( + n_rows, n_cols, figsize=(4 * n_cols, 3 * n_rows), dpi=200, + constrained_layout=False, + gridspec_kw={'wspace': 0.12, 'hspace': 0.18} + ) + + # Tighten left/right margins and avoid large automatic padding caused by negative text positions + fig.subplots_adjust(left=0.14, right=0.98, top=0.96, bottom=0.06) + + # Ensure axes is a 2D array for consistent indexing + axes = np.atleast_2d(axes) + + # 3. Calculate target indices + r1_idx_target = data_impl_cost_name.index(target_unit_cost) + r2_idx_target = sharing_GM_CS.index(target_gm_cs) + row_idx_target = r1_idx_target * len(sharing_GM_CS) + r2_idx_target + col_idx_target = list(FS_multiplier).index(target_fs_mult) + + # 4. Iterate rows and columns in desired order to create all grids: + # rows: for each unit_cost in data_impl_cost_name -> for each GM_CS in sharing_GM_CS + for r1_idx, unit_cost in enumerate(data_impl_cost_name): + for r2_idx, gm_cs in enumerate(sharing_GM_CS): + row_idx = r1_idx * len(sharing_GM_CS) + r2_idx + for col_idx, fs_mult in enumerate(FS_multiplier): + ax = axes[row_idx, col_idx] + ce_suffix = f"{unit_cost}_GM-CS-sharing{gm_cs}_FSmultiplier{fs_mult}" + timestamps_and_ce_suffix = f"{timestamps_suffix}__{ce_suffix}" + img_path = outputs_path / ( + f"cost_effectiveness_scatter_DALYsAverted_vs_TotalCosts__" + f"{scenarios_tocompare_prefix}__{timestamps_and_ce_suffix}.png" + ) + + if img_path.exists(): + # Open image and display with preserved aspect ratio; turn axes off + img = Image.open(img_path).convert("RGB") + ax.imshow(img, aspect='auto', interpolation='bilinear') + else: + # Clear axes and show placeholder text + ax.clear() + ax.text(0.5, 0.5, f"Missing:\\n{img_path.name}", ha="center", va="center", + fontsize=8) + # Clean up axes for all cells + ax.set_xticks([]) + ax.set_yticks([]) + ax.set_axis_off() + + # Apply the Green Rectangle Highlight to the target cell + # expansions for the highlight so it does not cover label or legend + padding_x = 0.03 # Extra space on right side + padding_y = 0.04 # Extra space on bottom + if row_idx == row_idx_target and col_idx == col_idx_target: + # Add thick green border + rect = Rectangle( + (-0.6 * padding_x , -0.5 * padding_y), + 1 + padding_x, + 1 + padding_y, + linewidth=10, # Very thick to be visible over image edges + edgecolor="#c6e0b4", + facecolor="none", + transform=ax.transAxes, + zorder=10, + clip_on=False, + ) + ax.add_patch(rect) + + # Add identifying label + ( + ax.text( + 0.63, + 0.98, + "MAIN ANALYSIS", + transform=ax.transAxes, + color="white", + weight="bold", + fontsize=10, + ha="left", + va="top", + bbox=dict(facecolor="#c6e0b4", edgecolor="none", pad=4), + ), + ) + + # Create a unit cost mapping dictionary + unit_cost_mapping = { + "Pearson_etal2018": r'\$0.53 (1st y), \$0.37 (subsequent ys)', + "Margolies_etal2021": r'\$9.17 (each year)', + } + + # add row labels on the left (unit_cost + gm_cs) using figure text to avoid expanding axes margins + for r in range(n_rows): + unit_idx = r // len(sharing_GM_CS) + gm_idx = r % len(sharing_GM_CS) + + # column titles: only for first row + if r == 0: + for c in range(n_cols): + axes[0, c].set_title(f"$\\bf{{FS\\ multiplier:}}$ {FS_multiplier[c]}", + loc="center", x=0.43, pad=10, fontsize=8) + # row labels: place in left margin of the row; use transform to align with axes coordinates + row_label = ( + f"$\\bf{{unit\\ cost:}}$ {unit_cost_mapping[data_impl_cost_name[unit_idx]]};\n" + f"$\\bf{{GM\\ &\\ CS\\ shared\\ implem. costs\\ prop.:}}$ {sharing_GM_CS[gm_idx]}" + ) + axes[r, 0].text(-0.02, 0.5, row_label, transform=axes[r, 0].transAxes, + rotation=90, ha="right", va="center", fontsize=8) + + plt.tight_layout() + cohort_prefix = \ + "Neo" if cohort == "Neonatal" else "Under5" if cohort == "Under-5" else "unknown_cohort" + out_file = \ + outputs_path / (f"{cohort_prefix}_cost_effectiveness_sensitivity_grid__" + f"{scenarios_tocompare_prefix}__{timestamps_suffix}.png") + # Save at higher DPI for better quality + fig.savefig(out_file, bbox_inches="tight", dpi=300) + plt.close(fig) + + ######################################## + # Total cost mapping table + ######################################## + print("\ncreating cost mapping tables ...") + print("\n per intervention period & per 1 year & per 1 year per intervention") + # Define the number of years within the intervention period + interv_period_lenght = 5 + + # includes total_cost ranges (min—max) for the same grid used in the CE sensitivity plot + table_interv_period_rows = [] + table_1_rows = [] + table_1_per_interv_rows = [] + row_labels = [] + for unit_cost_ref_name in data_impl_cost_name: + unit_cost = unit_cost_mapping[unit_cost_ref_name] + for gm_cs in sharing_GM_CS: + row_label = f"{unit_cost}; GM & CS: {gm_cs}" + row_labels.append(row_label) + row_interv_period_cells = [] + row_1_cells = [] + row_1_per_interv_cells = [] + for fs_mult in FS_multiplier: + ce_suffix = f"{unit_cost_ref_name}_GM-CS-sharing{gm_cs}_FSmultiplier{fs_mult}" + all_costs_path = outputs_path / "outcomes_data" / f"all_costs_{SQ_results_timestamp}_{ce_suffix}.pkl" + all_costs_df_local = pd.read_pickle(all_costs_path) + # Exclude SQ scenario + df_nonSQ_local = all_costs_df_local[all_costs_df_local["scenario"] != "SQ"].copy() + lo = df_nonSQ_local["total_cost"].min() + hi = df_nonSQ_local["total_cost"].max() + cell_interv_period = f"{lo:,.0f}; {hi:,.0f}" + cell_1 = f"{lo/interv_period_lenght:,.0f}; {hi/interv_period_lenght:,.0f}" + # Create new column total cost per 1 year per intervention + df_nonSQ_local["total_cost_per1y_perInterv"] = df_nonSQ_local.apply( + lambda row: (row["total_cost"] / interv_period_lenght) + / (row["scenario"].count("_") + 1), + axis=1, + ) + lo_per1y_per_interv = df_nonSQ_local["total_cost_per1y_perInterv"].min() + hi_per1y_per_interv = df_nonSQ_local["total_cost_per1y_perInterv"].max() + cell_1_per_interv = f"{lo_per1y_per_interv:,.0f}; {hi_per1y_per_interv:,.0f}" + row_interv_period_cells.append(cell_interv_period) + row_1_cells.append(cell_1) + row_1_per_interv_cells.append(cell_1_per_interv) + table_interv_period_rows.append(row_interv_period_cells) + table_1_rows.append(row_1_cells) + table_1_per_interv_rows.append(row_1_per_interv_cells) + + # Columns labelled by FS multiplier values + col_labels = [str(x) for x in FS_multiplier] + total_cost_range_table_interv_period = pd.DataFrame(table_interv_period_rows, index=row_labels, columns=col_labels) + total_cost_range_table_1 = pd.DataFrame(table_1_rows, index=row_labels, columns=col_labels) + total_cost_range_table_1_per_interv = pd.DataFrame(table_1_per_interv_rows, index=row_labels, columns=col_labels) + + # Save summary as CSVs + out_tables_path_without_per_suffix = \ + outputs_path / f"total_cost_mapping_table__{scenarios_tocompare_prefix}__{timestamps_suffix}" + + # per intervention period + total_cost_range_table_interv_period.to_csv(Path(str(out_tables_path_without_per_suffix) + + f"_per{interv_period_lenght}years.csv"), + index=True) + # per 1 year + total_cost_range_table_1.to_csv(Path(str(out_tables_path_without_per_suffix) + + "_per1year.csv"), + index=True) + # per 1 year per interv + total_cost_range_table_1_per_interv.to_csv(Path(str(out_tables_path_without_per_suffix) + + "_per1year_perInterv.csv"), + index=True) + + else: # cause != "any cause": + table_effectiveness(averted_dict, f"{outcome_type}_{cause}") + # if outcome_type == "deaths": + # table_effectiveness(averted_deaths, f"{outcome_type}_{cause}") + +# ---------------------------------------------------------------------------------------------------------------------- +def calc_calibration_annual_death_CIs(calib_outputs_path: Path) -> None: + """ + Calculate and print mean annual direct deaths due to SAM and CI over runs and data (reported value and CI derived from reported age group sizes): + * prevalence of moderate and severe wasting among age groups in 2016, + * prevalence of moderate and severe wasting among age groups in 2020, + * average annual direct deaths due to SAM. + :param calib_outputs_path: + """ + print("\nannual direct deaths, average over calibration period calculation ...") + # Get latest SQ timestamp + SQ_outcomes_path = Path("./outputs/sejjej5@ucl.ac.uk/wasting/scenarios/SQ") + SQ_outcomes_file_prefix = 'wasting_analysis__full_model_SQ-' + SQ_timestamp = \ + get_scenario_outputs(SQ_outcomes_file_prefix, SQ_outcomes_path)[-1].name.split(f"{SQ_outcomes_file_prefix}")[-1] + + # Get the file with death outcomes + outcomes_data_dir = Path("./outputs/sejjej5@ucl.ac.uk/wasting/scenarios/_outcomes/outcomes_data") + + # Find the file with death outcomes for this SQ timestamp (should be exactly one) + search_pattern = f"death_outcomes_*{SQ_timestamp}*.pkl" + matches = list(outcomes_data_dir.glob(search_pattern)) + if len(matches) == 0: + raise FileNotFoundError(f"No file found matching pattern: {search_pattern} in {outcomes_data_dir}") + if len(matches) > 1: + # List the found files in the error message to help debugging + files_found = [f.name for f in matches] + raise ValueError(f"Ambiguity Error: Found multiple files matching pattern '{search_pattern}': {files_found}") + death_outcomes_path = matches[0] + + # Load modelled under 5 death outcomes due to SAM for calibration period + # ### + print("\nloading death outcomes from file ...") + with death_outcomes_path.open("rb") as f: + death_outcomes_dict = pickle.load(f) + # SAM deaths over calibration period Sum and 95% CI + SQ_calib_period_under5_SAM_deaths_sum_ci_list = \ + death_outcomes_dict['SQ']['calib_under5_SAM_deaths_sum_ci_df'].loc['sum', 0] + calib_period = death_outcomes_dict['SQ']['calib_years'] + + # Average and 95% CI errors: Annual nmb of SAM deaths (avg over calib period) + SQ_calib_period_avg_annual_under5_SAM_deaths_avg_ci_list = \ + [val / 5 for val in SQ_calib_period_under5_SAM_deaths_sum_ci_list] + print(f"\ncalibration period: {calib_period}") + mean, low, upp = SQ_calib_period_avg_annual_under5_SAM_deaths_avg_ci_list + SQ_calib_annual_under5_SAM_deaths_mean_errors_list =\ + [mean, mean - low, upp - mean] + print("\nSQ_calib_annual_under5_SAM_deaths_mean_errors_list:") + print(SQ_calib_annual_under5_SAM_deaths_mean_errors_list) + +def plot_calibration_prevalence_outputs(calib_outputs_path: Path) -> None: + """ + Creates 3 plots comparing + calibration outputs (mean and CI over runs) and data (reported value and CI derived from reported age group sizes): + * prevalence of moderate and severe wasting among age groups in 2016, + * prevalence of moderate and severe wasting among age groups in 2020, + :param calib_outputs_path: + """ + + +# ---------------------------------------------------------------------------------------------------------------------- +def plot_availability_heatmaps(outputs_path: Path) -> None: + """ + Creates the following heatmaps of average availabilities: + A) HEATMAP OF ESSENTIAL CONSUMABLES AVAILABILITY + A1) average over the entire year at each facility level + A2) average for each month at each facility level + A3) average for each month at requested facility level + B) HEATMAP OF TREATMENTS AVAILABILITY, i.e. probability all essential consumables for the treatments are available + B1) average over the entire year at each facility level + B2) average for each month at each facility level + B3) average for each month at requested facility level + + :param outputs_path: Path to save the plots as PNG files. + """ + resourcefilepath = Path("./resources") + + tlo_availability_df = pd.read_csv( + resourcefilepath / 'healthsystem' / 'consumables' / "ResourceFile_Consumables_availability_all.csv") + + # Master Facilities List (district, facility level, region, facility id, and facility name) + mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv") + tlo_availability_df = tlo_availability_df.merge(mfl[['District', 'Facility_Level', 'Facility_ID']], + on=['Facility_ID'], how='left') + + # fac_levels = {'0': 'Health Post', '1a': 'Health Centers', '1b': 'Rural/Community \n Hospitals', + # '2': 'District Hospitals', '3': 'Central Hospitals', '4': 'Mental Hospital'} + correct_order_of_fac_levels = ['0', '1a', '1b', '2', '3', '4'] + chosen_item_codes = [208, 1227, 1220] + item_names_to_map = {208:'CSB++*', 1227:'RUTF', 1220:'F-75\ntherapeutic\nmilk'} + + tlo_availability_df = tlo_availability_df[tlo_availability_df.Facility_Level.isin(correct_order_of_fac_levels)] + + # Month labels used for any plot showing months (A2, A3, B2, B3) + month_labels = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] + # Mapping integer month columns (1-12) to month labels + month_map = {i + 1: month_labels[i] for i in range(12)} + + # A) HEATMAP OF ESSENTIAL CONSUMABLES AVAILABILITY + # A1) Essential consumables: average over the entire year at each facility level + # ### + print("Heathmap A1...") + # Pivot the DataFrame + aggregated_df = \ + tlo_availability_df.groupby(['Facility_Level', 'item_code'])[['available_prop']].mean().reset_index() + heatmap_data = aggregated_df.pivot(columns='Facility_Level', index='item_code', values='available_prop') + # Keep chosen items + heatmap_data = heatmap_data.loc[chosen_item_codes] + # Add average column (availability across all facility levels) + aggregate_col = aggregated_df.groupby('item_code')[['available_prop']].mean() + # Order the facility levels + heatmap_data = heatmap_data.reindex(columns=correct_order_of_fac_levels) + heatmap_data['Average'] = aggregate_col + # Map item codes to names + heatmap_data.index = heatmap_data.index.map(item_names_to_map) + + # Generate the heatmap + sns.set_theme(font_scale=1.5) + plt.figure(figsize=(10, 8)) + sns.heatmap(heatmap_data, annot=True, cmap='RdYlGn', vmin=0, vmax=1, + cbar_kws={'label': 'Proportion of days on which consumable is available'}) + + plt.title('Availability of essential consumables\n for acute malnutrition treatments', fontweight='bold') + plt.xlabel('Facility Level') + plt.ylabel('Consumable') + plt.xticks(rotation=90) + plt.yticks(rotation=0) + plt.savefig(outputs_path / 'consumable_availability_heatmap.png', dpi=300, bbox_inches='tight') + + # A2) Essential consumables: average for each month at each facility level + # ### + print("Heathmaps A2...") + monthly_agg_df = \ + tlo_availability_df.groupby(["Facility_Level", "item_code", "month"])[['available_prop']].mean().reset_index() + months = range(1, 13) + + fig, axes = plt.subplots(3, 4, figsize=(24, 18)) + for i, month in enumerate(months): + row = i // 4 + col = i % 4 + ax = axes[row, col] + month_df = monthly_agg_df[monthly_agg_df["month"] == month] + heatmap_data_month = month_df.pivot( + columns="Facility_Level", index="item_code", values="available_prop" + ).reindex(index=chosen_item_codes, columns=correct_order_of_fac_levels) + # Add average column (across all facility levels) + aggregate_col_month = month_df.groupby('item_code')[['available_prop']].mean() + heatmap_data_month['Average'] = aggregate_col_month + heatmap_data_month.index = heatmap_data_month.index.map(item_names_to_map) + sns.heatmap(heatmap_data_month, annot=True, cmap="RdYlGn", cbar=False, ax=ax, vmin=0, vmax=1) + ax.set_title(f"Month {month}", fontweight="bold") + if row == 2: + ax.set_xlabel("Facility Level") + ax.set_xticklabels(ax.get_xticklabels(), rotation=90) + else: + ax.set_xlabel("") + ax.set_xticklabels([]) + if col == 0: + ax.set_ylabel("Consumable") + ax.set_yticklabels(ax.get_yticklabels(), rotation=0) + else: + ax.set_ylabel("") + ax.set_yticklabels([]) + plt.tight_layout() + plt.savefig(outputs_path / "consumable_monthly_availability_heatmaps.png", dpi=300, bbox_inches="tight") + + # A3) Essential consumables: average for each month at requested facility level + # ### + print("Heathmap A3...") + ess_cons_requested_at = ['208_1a', '1227_1a', '1227_1b', '1220_1b'] + + def split_item_level(item_level: str) -> tuple[int, str]: + """ + :param item_level: string with item left from '_', and level right from '_' + :return: takes the item_level, and returns what is before '_' as integer (it is the item_code) + and what is after '_' as string (it is fac_level) + """ + assert item_level.count('_') == 1, "the argument of split_item_level fnc must have a structure of: item_level" + before, after = item_level.split('_', 1) + return int(before), after + + monthly_aggregated_data_requested_fac_level = \ + pd.DataFrame(columns=['item_level', 'month', 'available_prop']) + for item_level in ess_cons_requested_at: + item_code, fac_level = split_item_level(item_level) + for month in months: + val = monthly_agg_df[ + (monthly_agg_df["item_code"] == item_code) + & (monthly_agg_df["Facility_Level"] == fac_level) + & (monthly_agg_df["month"] == month) + ]["available_prop"] + monthly_aggregated_data_requested_fac_level = \ + pd.concat([monthly_aggregated_data_requested_fac_level, + pd.DataFrame([{'item_level': item_level, "month": month, + "available_prop": val.values[0] if not val.empty else None}])], + ignore_index=True) + + # Pivot for heatmap + heatmap_data_requested_fac_level_raw = \ + monthly_aggregated_data_requested_fac_level.pivot( + index="item_level", columns="month", values="available_prop" + ) + # Map item codes to names + item_level_labels_to_map = {"208_1a": "CSB++*\nprimary level", "1227_1a": "RUTF\nprimary level", + "1227_1b": "RUTF\nsecondary level", + "1220_1b": "F-75 therapeutic milk\nsecondary level"} + heatmap_data_requested_fac_level = heatmap_data_requested_fac_level_raw.copy() + heatmap_data_requested_fac_level.index = \ + heatmap_data_requested_fac_level.index.map(item_level_labels_to_map) + heatmap_data_requested_fac_level = \ + heatmap_data_requested_fac_level.reindex([item_level_labels_to_map[item_level] for item_level in ess_cons_requested_at]) + heatmap_data_requested_fac_level["Average"] = heatmap_data_requested_fac_level.mean(axis=1) + heatmap_data_requested_fac_level[""] = np.nan # Add empty column for spacing + # Reorder columns: months Jan-Dec, empty column, then Average + heatmap_data_requested_fac_level = heatmap_data_requested_fac_level.rename(columns=month_map) + ordered_cols = month_labels + ["", "Average"] + heatmap_data_requested_fac_level = heatmap_data_requested_fac_level[ordered_cols] + plt.figure(figsize=(12, 4)) + sns.heatmap( + heatmap_data_requested_fac_level, + annot=True, cmap="RdYlGn", vmin=0, vmax=1,cbar_kws={"label": "Proportion of days available"} + ) + # plt.title("Monthly average availability of consumables at requested facility levels", fontweight="bold") + plt.xlabel("Month") + plt.ylabel("Consumable") + plt.xticks(rotation=0) + plt.yticks(rotation=0) + plt.savefig(outputs_path / "consumable_availability_heatmap_requested_fac_level.png", dpi=300, bbox_inches="tight") + + # B) HEATMAP OF TREATMENTS AVAILABILITY, i.e. probability all essential consumables for the treatments are available + # B1) Treatments: average over the entire year at each facility level + # ### + print("Heathmap B1...") + treatment_item_map = { + "SFP": ["CSB++*"], # 208 + "OTP": ["RUTF"], # 1227 + "ITC": ["F-75\ntherapeutic\nmilk", "RUTF"] # 1220, 1227 + } + + # Calculate availability for treatments + treatment_availability = {} + for treatment, items in treatment_item_map.items(): + treatment_availability[treatment] = { + level: np.prod([heatmap_data.loc[item_code, level] for item_code in items]) + for level in correct_order_of_fac_levels + } + + # Prepare the DataFrame + treatment_heatmap_data = \ + pd.DataFrame.from_dict(treatment_availability, orient='index',columns=correct_order_of_fac_levels) + treatment_heatmap_data = treatment_heatmap_data.reindex(columns=correct_order_of_fac_levels) + treatment_heatmap_data['Average'] = treatment_heatmap_data.mean(axis=1) + + # Generate the heatmap + plt.figure(figsize=(10, 8)) + sns.heatmap(treatment_heatmap_data, annot=True, cmap='RdYlGn', vmin=0, vmax=1, + cbar_kws={'label': 'Proportion of days on which treatment is available'}) + + plt.title('Availability of treatments\n for acute malnutrition', fontweight='bold') + plt.xlabel('Facility Level') + plt.ylabel('Treatment') + plt.xticks(rotation=90) + plt.yticks(rotation=0) + plt.savefig(outputs_path / 'treatment_availability_heatmap.png', dpi=300, bbox_inches='tight') + plt.close(fig) + + # B2) Treatments: average for each month at each facility level + # ### + print("Heathmaps B2...") + monthly_treatment_availability = {} + for month in months: + month_df = monthly_agg_df[monthly_agg_df["month"] == month] + heatmap_data_month = month_df.pivot( + columns="Facility_Level", index="item_code", values="available_prop" + ).reindex(index=chosen_item_codes, columns=correct_order_of_fac_levels) + heatmap_data_month.index = heatmap_data_month.index.map(item_names_to_map) + + # Calculate treatment availability for each facility level + treatment_availability_month = {} + for treatment, items in treatment_item_map.items(): + treatment_availability_month[treatment] = { + level: np.prod([heatmap_data_month.loc[item_code, level] for item_code in items]) + for level in correct_order_of_fac_levels + } + treatment_heatmap_data_month = pd.DataFrame.from_dict( + treatment_availability_month, orient="index", columns=correct_order_of_fac_levels + ) + treatment_heatmap_data_month = treatment_heatmap_data_month.reindex(columns=correct_order_of_fac_levels) + treatment_heatmap_data_month["Average"] = treatment_heatmap_data_month.mean(axis=1) + monthly_treatment_availability[month] = treatment_heatmap_data_month + + fig, axes = plt.subplots(3, 4, figsize=(24, 18)) + for i, month in enumerate(months): + row = i // 4 + col = i % 4 + ax = axes[row, col] + sns.heatmap(monthly_treatment_availability[month], annot=True, cmap="RdYlGn", cbar=False, ax=ax, vmin=0, vmax=1) + ax.set_title(f"Month {month}", fontweight="bold") + if row == 2: + ax.set_xlabel("Facility Level") + ax.set_xticklabels(ax.get_xticklabels(), rotation=90) + else: + ax.set_xlabel("") + ax.set_xticklabels([]) + if col == 0: + ax.set_ylabel("Treatment") + ax.set_yticklabels(ax.get_yticklabels(), rotation=0) + else: + ax.set_ylabel("") + ax.set_yticklabels([]) + plt.tight_layout() + fig.savefig(outputs_path / "treatment_monthly_availability_heatmaps.png", dpi=300, bbox_inches="tight") + + # B3) Treatments: average for each month at requested facility level + # ### + print("Heathmap B3...") + # Calculate availability for treatments + treatment_item_level_map = { + "SFP\nprimary level": ['208_1a'], + "OTP\nprimary level": ['1227_1a'], + "ITC\nsecondary level": ['1227_1b', '1220_1b'] + } + treatment_availability_requested_fac_level = {} + for treatment, items_level in treatment_item_level_map.items(): + treatment_availability_requested_fac_level[treatment] = { + month: np.prod( + [heatmap_data_requested_fac_level_raw.loc[item_level, month] for item_level in items_level] + ) for month in months + } + + # Prepare the DataFrame + treatment_heatmap_data_requested_fac_level = \ + pd.DataFrame.from_dict(treatment_availability_requested_fac_level, orient='index',columns=months) + treatment_heatmap_data_requested_fac_level['Average'] = treatment_heatmap_data_requested_fac_level.mean(axis=1) + treatment_heatmap_data_requested_fac_level[''] = np.nan + # Reorder columns: months Jan-Dec, empty column, then Average + treatment_heatmap_data_requested_fac_level = treatment_heatmap_data_requested_fac_level.rename(columns=month_map) + ordered_cols = month_labels + ["", "Average"] + treatment_heatmap_data_requested_fac_level = treatment_heatmap_data_requested_fac_level[ordered_cols] + + # Generate the heatmap + plt.figure(figsize=(12, 4)) + sns.heatmap( + treatment_heatmap_data_requested_fac_level, + annot=True, cmap="RdYlGn", vmin=0, vmax=1, cbar_kws={"label": "Proportion of days available"} + ) + # plt.title("Monthly average availability of treatments at requested facility levels", fontweight="bold") + plt.xlabel("Month") + plt.ylabel("Treatment") + plt.xticks(rotation=0) + plt.yticks(rotation=0) + plt.savefig(outputs_path / "treatment_availability_heatmap_requested_fac_level.png", + dpi=300, bbox_inches="tight") + plt.close(fig) diff --git a/src/scripts/wasting_analyses/calib_analysis_wasting.py b/src/scripts/wasting_analyses/calib_analysis_wasting.py new file mode 100644 index 0000000000..7f2cf5e7d8 --- /dev/null +++ b/src/scripts/wasting_analyses/calib_analysis_wasting.py @@ -0,0 +1,907 @@ +""" +An analysis file for the wasting module (so far only for 1 run, 1 draw) +""" +# %% Import statements +import glob +import gzip +import os +import shutil +import time +from pathlib import Path + +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +from PyPDF2 import PdfReader, PdfWriter +from scipy import stats + +from tlo.analysis.utils import compare_number_of_deaths, get_scenario_outputs, parse_log_file + +# start time of the whole analysis +total_time_start = time.time() + +# ####### TO SET ####################################################################################################### +# scenario_filename = 'wasting_analysis__minimal_model' +scenario_filename = 'wasting_analysis__full_model_SQ' +outputs_path = Path("./outputs/sejjej5@ucl.ac.uk/wasting/scenarios/SQ") +legend_fontsize = 12 +title_fontsize = 16 +######################################################################################################################## + +class WastingAnalyses: + """ + This class looks at plotting all important outputs from the wasting module + """ + + def __init__(self, sim_results_folder_path_str, in_datestamp, in_draw_nmb, in_run_nmb, in_png=False): + self.outcomes_folder_path = sim_results_folder_path_str + self.datestamp = in_datestamp + self.draw_nmb = in_draw_nmb + self.run_nmb = in_run_nmb + self.png = in_png, """bool indicating whether we want to save all figures not only as pdf, but also as png""" + + sim_results_folder_draw_x_run_0_path_str = sim_results_folder_path_str + f'/{draw_nmb}/{run_nmb}/' + sim_results_file_name_prefix = scenario_filename + sim_results_file_name_extension = '.log.gz' + gz_results_file_path = \ + Path(glob.glob(os.path.join(sim_results_folder_draw_x_run_0_path_str, + f"{sim_results_file_name_prefix}*{sim_results_file_name_extension}"))[0]) + + # Path to the decompressed .log file + self.__log_file_path = gz_results_file_path.with_suffix('') + # Decompress the .log.gz file + with gzip.open(gz_results_file_path, 'rb') as f_in: + with open(self.__log_file_path, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + + # parse wasting logs + self.__w_logs_dict = parse_log_file(self.__log_file_path)['tlo.methods.wasting'] + # TODO: Why it prints the messages from parse_log_file() twice? + # parse scaling factor log + # self.__scaling_factor = \ + # parse_log_file(self.__log_file_path)['tlo.methods.population']['scaling_factor'].set_index('date').loc[ + # '2010-01-01', 'scaling_factor' + # ] + + # wasting types description + self.__wasting_types_desc = {'WHZ<-3': 'severe wasting', + '-3<=WHZ<-2': 'moderate wasting', + 'WHZ>=-2': 'no wasting'} + + self.fig_files = [] + + cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] + # # define colo(u)rs to use: + self.__colors_model = { + 'severe wasting': cycle[0], # #1f77b4 + 'moderate wasting': cycle[1], # #ff7f0e + 'SAM': '#B372B7', + 'MAM': '#D1BCD2', + } + self.__colors_data = { + 'severe wasting': '#82C1EC', + 'moderate wasting': '#C71E1E', + } + self.__colors_init_data = { + 'severe wasting': '#0E53EA', + 'moderate wasting': '#FFA783', + } + self.__data_name = { + 2010: 'DHS 2010', + 2014: 'MICS 2013-2014', + 2016: 'DHS 2015–2016', + 2020: 'MICS 2019–2020', + } + + self.calib_outcomes_folder_path_name = ( + Path(sim_results_folder_path_str).parent / f"calib_outcome_figures_SQ-{self.datestamp}" + ) + self.calib_outcomes_folder_path_name.mkdir(parents=True, exist_ok=True) + + def create_calib_outcome_csv(self): + """ + Creates a new empty csv file with the header if it doesn't exist yet. + :return: + """ + csv_file_name = ( + str(self.outcomes_folder_path).replace(str(outputs_path), "").lstrip("/") + + "_model_calib-data_intersect_bool" + ) + csv_file_path = self.calib_outcomes_folder_path_name / f"{csv_file_name}.csv" + + if not csv_file_path.exists(): + age_groups = [(0, 5), (6, 11), (12, 23), (24, 35), (36, 47), (48, 59)] + calib_ys = [2016, 2020] + wast_type_agegp = [ + f"{wast_type}_wast__{low_bound}_{high_bound}mo" + for wast_type in ["any", "sev"] + for low_bound, high_bound in age_groups + ] + year_wast_age_grps = [f"{year}__{wast_age_grp}" for year in calib_ys for wast_age_grp in wast_type_agegp] + sum_year_prev_calib_points = [f"{year}__sum_prev_calib_points" for year in calib_ys] + + with open(csv_file_path, "w") as csv_file: + csv_file.write( + "draw,run," + + ",".join(year_wast_age_grps) + + ",deaths_2010_2014,deaths_2015_2019," + + ",".join(sum_year_prev_calib_points) + + ",sum_prev_calib_points,sum_all_calib_points\n" + ) + + def save_fig__store_pdf_file(self, fig, fig_output_name: str) -> None: + # Save figures one folder up, into 'calib_outcome_figures_SQ-{datestamp}' + output_folder = Path(self.outcomes_folder_path).parent / f'calib_outcome_figures_SQ-{self.datestamp}' + output_folder.mkdir(parents=True, exist_ok=True) + full_path_and_file_name = str(output_folder / (fig_output_name + f'_{self.draw_nmb}_{self.run_nmb}')) + if self.png: #TODO: doesn't seem to be working + fig.savefig(full_path_and_file_name + '.png', format='png') + fig.savefig(full_path_and_file_name + '.pdf', format='pdf') + self.fig_files.append(full_path_and_file_name + '.pdf') + + def plot_wasting_incidence(self): + """ plot the incidence of wasting over time """ + w_inc_df = self.__w_logs_dict['wasting_incidence_count'] + w_inc_df = w_inc_df.set_index(w_inc_df.date.dt.year-1) + w_inc_df = w_inc_df.drop(columns='date') + # check no incidence of well-nourished + all_zeros = w_inc_df['WHZ>=-2'].apply(lambda x: all(value == 0 for value in x.values())) + assert all(all_zeros) + w_inc_df = w_inc_df[["WHZ<-3", "-3<=WHZ<-2"]] + + pop_sizes_df = self.__w_logs_dict['pop sizes'] + pop_sizes_df = pop_sizes_df.set_index(pop_sizes_df.date.dt.year) + pop_sizes_df = pop_sizes_df.drop(columns='date') + po_sizes_to_keep = [col for col in pop_sizes_df.columns if + col.startswith('total__') and col not in ['total__under5']] + age_gps_total_pop_sizes_df = pop_sizes_df[po_sizes_to_keep].copy() + age_gps_total_pop_sizes_df['0y'] = \ + age_gps_total_pop_sizes_df['total__0_5mo'] + age_gps_total_pop_sizes_df['total__6_11mo'] + age_gps_total_pop_sizes_df = age_gps_total_pop_sizes_df.drop(columns=['total__0_5mo', 'total__6_11mo']) + age_gps_total_pop_sizes_df = age_gps_total_pop_sizes_df.rename(columns={ + 'total__12_23mo': '1y', + 'total__24_35mo': '2y', + 'total__36_47mo': '3y', + 'total__48_59mo': '4y', + 'total__5y+': '5+y' + }) + + # get age_years, doesn't matter what wasting category you choose, + # they all have same age groups + age_years = list(w_inc_df.loc[w_inc_df.index[0], 'WHZ<-3'].keys()) + # age_years.remove('5+y') + + _row_counter = 0 + _col_counter = 0 + # plot setup + fig, axes = plt.subplots(nrows=2, ncols=3, sharex=True, sharey=True, figsize=(10, 6)) + axes[1, 2].axis('off') # 5+y has no data (no new cases in 5+y), its space is used to display the label + for age in age_years: + plotting = pd.DataFrame() + for state in w_inc_df.columns: + plotting[state] = \ + w_inc_df.apply(lambda row: row[state][age], axis=1) + # remove sev cases from mod cases (all sev cases went through mod state) + plotting["-3<=WHZ<-2"] = plotting.apply(lambda row: max(row["-3<=WHZ<-2"] - row["WHZ<-3"], 0), axis=1) + # calculate props within the age group + plotting = plotting.div(age_gps_total_pop_sizes_df[age], axis=0) + plotting = plotting.rename(columns=self.__wasting_types_desc) + # filter data to include only years from 2015 onwards + plotting = plotting.loc[plotting.index >= 2015] + # and up to 2030 + plotting = plotting.loc[plotting.index <= 2030] + # check for invalid values + if (plotting < 0).any().any() or (plotting > 1).any().any(): + print(f"Warning plot_wasting_incidence: Invalid values detected in plotting data for age group {age}:") + print(plotting) + + ax = plotting.plot(kind='bar', stacked=True, + ax=axes[_row_counter, _col_counter], + title=f"{age} old")#, + #ylim=[0, 1]) + show_legend = (_row_counter == 1 and _col_counter == 2) + # show_x_axis_label = (_row_counter == 0 and _col_counter == 2) + if show_legend: + ax.legend(loc='center', fontsize=legend_fontsize) + ax.set_title('') + else: + ax.get_legend().remove() + # if show_x_axis_label: + # ax.set_xlabel('Year') # TODO: this is not working + ax.set_xlabel('year') + ax.set_ylabel('proportion (within age group)') + # move to another row + if _col_counter == 2: + _row_counter += 1 + _col_counter = -1 + _col_counter += 1 # increment column counter + fig.suptitle('Annual incidence of wasting among the age group', fontsize=title_fontsize) #, weight='bold') + fig.tight_layout() + fig_output_name = ('wasting_incidence__' + self.datestamp) + self.save_fig__store_pdf_file(fig, fig_output_name) + # plt.show() + + # def plot_wasting_incidence_mod_to_sev_props(self): + # """ plot the incidence of wasting over time """ + # w_inc_df = self.__w_logs_dict['wasting_incidence_count'] + # w_inc_df = w_inc_df.set_index(w_inc_df.date.dt.year) + # w_inc_df = w_inc_df.drop(columns='date') + # # check no incidence of well-nourished + # all_zeros = w_inc_df['WHZ>=-2'].apply(lambda x: all(value == 0 for value in x.values())) + # assert all(all_zeros) + # w_inc_df = w_inc_df[["WHZ<-3", "-3<=WHZ<-2"]] + # # get age_years, doesn't matter what wasting category you choose, + # # they all have same age groups + # age_years = list(w_inc_df.loc[w_inc_df.index[0], 'WHZ<-3'].keys( + # + # )) + # age_years.remove('5+y') + # + # _row_counter = 0 + # _col_counter = 0 + # # plot setup + # fig, axes = plt.subplots(nrows=2, ncols=3, sharex=True, sharey=True, figsize=(10, 6)) + # fig.delaxes(axes[1, 2]) + # for _age in age_years: + # new_df = pd.DataFrame() + # for state in w_inc_df.columns: + # new_df[state] = \ + # w_inc_df.apply(lambda row: row[state][_age], axis=1) + # # convert into proportions + # new_df = new_df.apply(lambda _row: _row / _row.sum(), axis=1) + # plotting = new_df.rename(columns=self.__wasting_types_desc) + # ax = plotting.plot(kind='bar', stacked=True, + # ax=axes[_row_counter, _col_counter], + # title=f"incidence of wasting in {_age} old", + # ylim=[0, 1]) + # ax.legend(loc='lower right') + # ax.set_xlabel('year') + # ax.set_ylabel('proportion') + # # move to another row + # if _col_counter == 2: + # _row_counter += 1 + # _col_counter = -1 + # _col_counter += 1 # increment column counter + # + # handles, labels = axes[1, 1].get_legend_handles_labels() + # fig.legend(handles, labels, loc='center left', bbox_to_anchor=(1.05, 0.5)) + # fig_output_name = ('wasting_incidence_mod_to_sev_props__' + self.datestamp) + # fig.tight_layout() + # self.save_fig__store_pdf_file(fig, fig_output_name) + # # plt.show() + + def plot_wasting_length(self): + """ plot the average length of wasting over time """ + + if 'wasting_length_avg' in self.__w_logs_dict: + w_length_df = self.__w_logs_dict['wasting_length_avg'] + w_length_df = w_length_df.set_index(w_length_df.date.dt.year) + w_length_df = w_length_df.drop(columns='date') + # get age_years, doesn't matter from which dict + age_years = list(w_length_df.loc[w_length_df.index[0], 'mod_MAM_tx_full_recov'].keys()) + # age_years.remove('5+y') + w_length_df = w_length_df.loc[:, ['mod_MAM_nat_full_recov', + 'mod_SAM_nat_full_recov', 'mod_SAM_nat_recov_to_MAM', + 'sev_SAM_nat_full_recov', 'sev_SAM_nat_recov_to_MAM', + 'mod_MAM_tx/nat_full_recov', + 'mod_SAM_tx_full_recov', 'mod_SAM_tx/nat_recov_to_MAM', + 'sev_SAM_tx_full_recov', 'sev_SAM_tx/nat_recov_to_MAM', + 'mod_not_yet_recovered', + 'sev_not_yet_recovered']] + + for recov_opt in w_length_df.columns: + _row_counter = 0 + _col_counter = 0 + # plot setup + fig, axes = plt.subplots(nrows=2, ncols=3, sharex=True, sharey=True, figsize=(10, 7)) + # axes[1, 2].axis('off') # 5+y has no data (no new cases in 5+y), its space is used to display the label + for _age in age_years: + plotting = pd.DataFrame() + # dict to dataframe + plotting[recov_opt] = \ + w_length_df.apply(lambda row: row[recov_opt][_age], axis=1) + + if recov_opt.startswith("mod_"): + colour_to_use = self.__colors_model['moderate wasting'] + y_upper_lim = 355 + else: + colour_to_use = self.__colors_model['severe wasting'] + y_upper_lim = 1000 + if recov_opt.endswith("not_yet_recovered"): + y_upper_lim = 4000 + + ax = plotting.plot(kind='bar', stacked=False, + ax=axes[_row_counter, _col_counter], + title=f"length of wasting in {_age} old", + color=colour_to_use, + ylim=[0, y_upper_lim]) + # show_legend = (_row_counter == 0 and _col_counter == 0) + # # show_x_axis_label = (_row_counter == 0 and _col_counter == 2) + # if show_legend: + # ax.legend(loc='upper right', bbox_to_anchor=(0.5, 1.2), + # fancybox=True, shadow=True, ncol=5) + # else: + ax.get_legend().remove() + # if show_x_axis_label: + # ax.set_xlabel('Year') # TODO: this is not working + ax.set_xlabel('year') + ax.set_ylabel('avg length of wasting (days)') + # move to another row + if _col_counter == 2: + _row_counter += 1 + _col_counter = -1 + _col_counter += 1 # increment column counter + + fig.suptitle(f'{recov_opt}', fontsize=16) + # Adjust layout to make room for the suptitle + fig.tight_layout(rect=[0, 0, 1, 0.95]) + fig_output_name = ('wasting_length__' + recov_opt + self.datestamp) + self.save_fig__store_pdf_file(fig, fig_output_name) + # plt.show(`) + + # def plot_wasting_initial_overall_prevalence(self): + # """ plot wasting prevalence of all age groups for the year 2010. Proportions are obtained by getting a total + # number of children wasted (moderately and severely) divided by the total number of children less than 5 years""" + # + # ## Prevalence at 2010, ie data from the same source used to draw initial prevalence by age group + # w_prev_calib_data_years_only_df = pd.DataFrame({ + # 'sev_wast_calib': [0.015], + # 'mod_wast_calib': [0.025] + # }, index=[2010]) + # date_range = pd.Index([2010], name='date') + # w_prev_calib = pd.DataFrame(index=date_range) + # # filling missing values with 0 + # w_prev_calib_df = w_prev_calib.merge( + # w_prev_calib_data_years_only_df, left_index=True, right_index=True, how='left' + # ).fillna(0) + # + # ## Initial prevalence at the beginning of 2010 - model + # init_w_prev_2010_only_df = self.__w_logs_dict["wasting_init_prevalence_props"] + # init_w_prev_2010_only_df = init_w_prev_2010_only_df[['date', 'total_sev_under5_prop', 'total_mod_under5_prop']].rename( + # columns={'total_sev_under5_prop': 'total_init_sev_under5_prop', 'total_mod_under5_prop': 'total_init_mod_under5_prop'} + # ) + # init_w_prev_2010_only_df = init_w_prev_2010_only_df.set_index(init_w_prev_2010_only_df.date.dt.year) + # init_w_prev_2010_only_df = init_w_prev_2010_only_df.drop(columns='date') + # init_w_prev_2010_only_df = init_w_prev_2010_only_df.loc[[2010]] + # init_w_prev_df = pd.DataFrame(index=date_range) + # # filling missing values with 0 + # init_w_prev_df = init_w_prev_df.merge( + # init_w_prev_2010_only_df, left_index=True, right_index=True, how='left' + # ).fillna(0) + # + # w_prev_calib_and_init_df = pd.merge(init_w_prev_df, w_prev_calib_df, on='date') + # columns_to_plot = [ + # ['total_init_sev_under5_prop', 'total_init_mod_under5_prop'], + # ['sev_wast_calib', 'mod_wast_calib'], + # ] + # colors_to_plot = { + # 'total_init_sev_under5_prop': self.__colors_init_data['severe wasting'], + # 'total_init_mod_under5_prop': self.__colors_init_data['moderate wasting'], + # 'sev_wast_calib': self.__colors_data['severe wasting'], + # 'mod_wast_calib': self.__colors_data['moderate wasting'], + # } + # labels_to_plot = { + # 'total_init_sev_under5_prop': 'severe wasting (initial)', + # 'total_init_mod_under5_prop': 'moderate wasting (initial)', + # 'sev_wast_calib': 'severe wasting (data)', + # 'mod_wast_calib': 'moderate wasting (data)', + # } + # + # fig, ax = plt.subplots() + # bar_spots = len(columns_to_plot) + # bar_width = 0.3 / bar_spots + # pos = np.arange(len(w_prev_calib_and_init_df)) + # dodge_offsets = np.linspace(-bar_spots * bar_width / 2, bar_spots * bar_width / 2, bar_spots, endpoint=False) + # for columns, offset in zip(columns_to_plot, dodge_offsets): + # bottom = 0 + # for col in ([columns] if isinstance(columns, str) else columns): + # ax.bar(pos + offset, w_prev_calib_and_init_df[col], bottom=bottom, width=bar_width, align='edge', + # label=labels_to_plot[col], color=colors_to_plot[col]) + # bottom += w_prev_calib_and_init_df[col] + # ax.set_xticks(pos) + # ax.set_xticklabels(w_prev_calib_and_init_df.index, rotation=90) + # ax.set_title(r"Overall wasting prevalence $\bf{at}$ $\bf{initiation}$ (2010)", fontsize=title_fontsize-1) + # ax.set_ylabel('proportion of wasted children in the year') + # ax.set_xlabel('year') + # ax.set_ylim([0, 0.131]) + # ax.legend(fontsize=legend_fontsize) + # plt.tight_layout() + # fig_output_name = ('wasting_initial_overall_prevalence__' + self.datestamp) + # self.save_fig__store_pdf_file(fig, fig_output_name) + # # plt.show() + # + # def plot_wasting_initial_prevalence_by_age_group(self): + # """ Plot wasting prevalence per each age group. Proportions are obtained by getting a total number of + # children wasted in a particular age-group divided by the total number of children per that age-group""" + # + # # Initial prevalence at the beginning of 2010 - model + # w_prev_df = self.__w_logs_dict["wasting_init_prevalence_props"] + # w_prev_df = w_prev_df.drop(columns={'total_mod_under5_prop', 'total_sev_under5_prop'}) + # w_prev_df = w_prev_df.set_index(w_prev_df.date.dt.year) + # w_prev_df = w_prev_df.drop(columns='date') + # + # # 2010 prevalence calibration data + # # TODO: load data_2010 from the resource file: + # # resources_path / 'ResourceFile_Wasting/wasting_prevalence_and_sample_size.csv' + # data_2010 = { + # 'wasted_calib': [7.0, 13.0, 12.7, 2.4, 2.7, 1.9, 0.0], + # 'sev_wast_calib': [2.1, 7.1, 4.7, 0.9, 0.7, 0.6, 0.0] + # } + # data_2010['mod_wast_calib'] = \ + # [(w - s)/100 for w, s in zip(data_2010['wasted_calib'], data_2010['sev_wast_calib'])] + # data_2010['sev_wast_calib'] = \ + # [s/100 for s in data_2010['sev_wast_calib']] + # + # # Prepare plotting data + # plotting_model = {'severe wasting': {}, 'moderate wasting': {}} + # for col in w_prev_df.columns: + # prefix, age_group = col.split('__') + # if prefix == 'sev': + # plotting_model['severe wasting'][age_group] = w_prev_df[col].values[0] + # elif prefix == 'mod': + # plotting_model['moderate wasting'][age_group] = w_prev_df[col].values[0] + # plotting_model = pd.DataFrame(plotting_model) + # + # plotting_calib = {'severe wasting': {}, 'moderate wasting': {}} + # age_groups = ['0_5mo', '6_11mo', '12_23mo', '24_35mo', '36_47mo', '48_59mo', '5y+'] + # for i, age_group in enumerate(age_groups): + # plotting_calib['severe wasting'][age_group] = data_2010['sev_wast_calib'][i] + # plotting_calib['moderate wasting'][age_group] = data_2010['mod_wast_calib'][i] + # plotting_calib = pd.DataFrame(plotting_calib) + # + # plotting_model = plotting_model.reindex(age_groups) + # plotting_calib = plotting_calib.reindex(age_groups) + # + # # Plot wasting prevalence + # fig, ax = plt.subplots(figsize=(10, 6)) + # bar_width = 0.35 + # # Set positions of bars on x-axis + # r1 = range(len(plotting_model)) + # r2 = [x + bar_width for x in r1] + # + # # Plot the first set of bars (model data) + # ax.bar(r1, plotting_model['severe wasting'], + # color=self.__colors_init_data['severe wasting'], width=bar_width, + # label='severe wasting (initial)') + # ax.bar(r1, plotting_model['moderate wasting'], bottom=plotting_model['severe wasting'], + # color=self.__colors_init_data['moderate wasting'], width=bar_width, + # label='moderate wasting (initial)') + # + # # Plot the second set of bars (calibration data) + # ax.bar(r2, plotting_calib['severe wasting'], + # color=self.__colors_data['severe wasting'], width=bar_width, + # label='severe wasting (data)') + # ax.bar(r2, plotting_calib['moderate wasting'], bottom=plotting_calib['severe wasting'], + # color=self.__colors_data['moderate wasting'], width=bar_width, + # label='moderate wasting (data)') + # + # ax.set_xlabel('age group') + # ax.set_ylabel('proportion') + # ax.set_title( + # r"Wasting prevalence in children 0-59 months per each age group $\bf{at}$ $\bf{initiation}$ (2010)", + # fontsize=title_fontsize-1) + # ax.set_xticks([r + bar_width / 2 for r in range(len(plotting_model))]) + # ax.set_xticklabels(age_groups) + # ax.set_ylim([0, 0.16]) + # ax.legend(fontsize=legend_fontsize) + # plt.tight_layout() + # fig_output_name = ('wasting_initial_prevalence_per_each_age_group__' + self.datestamp) + # self.save_fig__store_pdf_file(fig, fig_output_name) + # # plt.show() + + # def plot_wasting_prevalence_per_year(self): + # """ plot wasting prevalence of all age groups per year. Proportions are obtained by getting a total number of + # children wasted divide by the total number of children less than 5 years""" + # + # ## Prevalence at some years - data (2010 are the data used to draw initial prevalence) + # # TODO: add calibration data into the resource file: + # # resources_path / 'ResourceFile_Wasting/wasting_prevalence_and_sample_size.csv' + # # and load here and for initial overall prev from the RF + # # w_prev_calib_data_years_only_df = pd.DataFrame({ + # # 'sev_wast_calib': [0.015, 0.011, 0.006, 0.007], + # # 'mod_wast_calib': [0.025, 0.027, 0.021, 0.019] + # # }, index=[2010, 2014, 2016, 2020]) + # # date_range = pd.Index(range(2010, 2031), name='date') #TODO 2032 with new sims + # w_prev_calib_data_years_only_df = pd.DataFrame({ + # 'sev_wast_calib': [0.006, 0.007], + # 'mod_wast_calib': [0.021, 0.019] + # }, index=[2016, 2020]) + # date_range = pd.Index(range(2015, 2032), name='date') + # w_prev_calib = pd.DataFrame(index=date_range) + # # filling missing values with 0 + # w_prev_calib_df = w_prev_calib.merge( + # w_prev_calib_data_years_only_df, left_index=True, right_index=True, how='left' + # ).fillna(0) + # + # # ## Initial prevalence at the beginning of 2010 - model + # # init_w_prev_2010_only_df = self.__w_logs_dict["wasting_init_prevalence_props"] + # # init_w_prev_2010_only_df = init_w_prev_2010_only_df[['date', 'total_sev_under5_prop', 'total_mod_under5_prop']].rename( + # # columns={'total_sev_under5_prop': 'total_init_sev_under5_prop', 'total_mod_under5_prop': 'total_init_mod_under5_prop'} + # # ) + # # init_w_prev_2010_only_df = init_w_prev_2010_only_df.set_index(init_w_prev_2010_only_df.date.dt.year) + # # init_w_prev_2010_only_df = init_w_prev_2010_only_df.drop(columns='date') + # # init_w_prev_2010_only_df = init_w_prev_2010_only_df.loc[[2010]] + # # init_w_prev_df = pd.DataFrame(index=date_range) + # # # filling missing values with 0 + # # init_w_prev_df = init_w_prev_df.merge( + # # init_w_prev_2010_only_df, left_index=True, right_index=True, how='left' + # # ).fillna(0) + # + # ## Prevalence at the beginning of years - model + # w_prev_df = self.__w_logs_dict["wasting_prevalence_props"] + # w_prev_df = w_prev_df[['date', 'total_sev_under5_prop', 'total_mod_under5_prop']] + # w_prev_df = w_prev_df.set_index(w_prev_df.date.dt.year) + # w_prev_df = w_prev_df.drop(columns='date') + # + # # w_prev_calib_and_init_df = pd.merge(init_w_prev_df, w_prev_calib_df, on='date') + # # w_prev_plot_df = pd.merge(w_prev_df, w_prev_calib_and_init_df, on='date').loc[lambda df: df.index >= 2015] + # w_prev_plot_df = w_prev_df.loc[lambda df: df.index >= 2015] + # columns_to_plot = [ + # ['total_init_sev_under5_prop', 'total_init_mod_under5_prop'], + # ['total_sev_under5_prop', 'total_mod_under5_prop'], + # ['sev_wast_calib', 'mod_wast_calib'], + # ] + # colors_to_plot = { + # 'total_sev_under5_prop': self.__colors_model['severe wasting'], + # 'total_mod_under5_prop': self.__colors_model['moderate wasting'], + # 'sev_wast_calib': self.__colors_data['severe wasting'], + # 'mod_wast_calib': self.__colors_data['moderate wasting'], + # 'total_init_sev_under5_prop': self.__colors_init_data['severe wasting'], + # 'total_init_mod_under5_prop': self.__colors_init_data['moderate wasting'] + # + # } + # labels_to_plot = { + # 'total_sev_under5_prop': 'severe wasting (model)', + # 'total_mod_under5_prop': 'moderate wasting (model)', + # 'sev_wast_calib': 'severe wasting (data)', + # 'mod_wast_calib': 'moderate wasting (data)', + # 'total_init_sev_under5_prop': 'severe wasting (initial)', + # 'total_init_mod_under5_prop': 'moderate wasting (initial)' + # } + # + # fig, ax = plt.subplots() + # bar_spots = len(columns_to_plot) + # bar_width = 0.8 / bar_spots + # pos = np.arange(len(w_prev_plot_df)) + # dodge_offsets = np.linspace(-bar_spots * bar_width / 2, bar_spots * bar_width / 2, bar_spots, endpoint=False) + # for columns, offset in zip(columns_to_plot, dodge_offsets): + # bottom = 0 + # for col in ([columns] if isinstance(columns, str) else columns): + # ax.bar(pos + offset, w_prev_plot_df[col], bottom=bottom, width=bar_width, align='edge', + # label=labels_to_plot[col], color=colors_to_plot[col]) + # bottom += w_prev_plot_df[col] + # ax.set_xticks(pos) + # ax.set_xticklabels(w_prev_plot_df.index, rotation=90) + # # ax.set_title("Wasting prevalence in children 0-59 months per year", fontsize=title_fontsize-6) + # ax.set_ylabel('proportion of wasted children in the year') + # ax.set_xlabel('year') + # ax.set_ylim([0, 0.06]) + # ax.legend(fontsize=legend_fontsize-4) + # plt.tight_layout() + # fig_output_name = ('wasting_prevalence_per_year__' + self.datestamp) + # self.save_fig__store_pdf_file(fig, fig_output_name) + # # plt.show() + + def plot_wasting_prevalence_by_age_group(self): + """ Plot wasting prevalence per each age group. Proportions are obtained by getting a total number of + children wasted in a particular age-group divided by the total number of children per that age-group""" + + age_groups = ['0_5mo', '6_11mo', '12_23mo', '24_35mo', '36_47mo', '48_59mo', '5y+'] + + # ### Calibration Data + # Load calibration data from CSV file + wasting_calib_data_path = resources_path / 'ResourceFile_Wasting/wasting_prevalence_and_sample_size.csv' + wasting_calib_data_df = pd.read_csv(wasting_calib_data_path, index_col='year') + + # Recalculate data to proportions (0 to 1) and separate mod wast as (wasted - sev wast) + wasting_calib_data_df['mod_wast_calib'] = \ + (wasting_calib_data_df['prev any wast (%)'] - wasting_calib_data_df['prev severe wast (%)']) / 100 + wasting_calib_data_df['sev_wast_calib'] = wasting_calib_data_df['prev severe wast (%)'] / 100 + + # Pivot the data to get the required format + w_prev_calib_data_df = wasting_calib_data_df.pivot(columns='age_group (months)', + values=['mod_wast_calib', 'sev_wast_calib']) + w_prev_calib_data_df.columns = [f'{col[0][:3]}__{col[1]}' for col in w_prev_calib_data_df.columns] + + # Load calibration sample sizes from CSV file + sample_sizes_calib_data_df = wasting_calib_data_df.pivot(columns='age_group (months)', values='sample_size') + sample_sizes_calib_data_df = sample_sizes_calib_data_df.reindex(columns=age_groups) + + # ### Model Outcomes + # Load modelled prevalence proportions + # # year 2010 + # init_w_prev_2010_only_df = self.__w_logs_dict["wasting_init_prevalence_props"] + # init_w_prev_2010_only_df = init_w_prev_2010_only_df.drop(columns={'total_mod_under5_prop', 'total_sev_under5_prop'}) + # init_w_prev_2010_only_df = init_w_prev_2010_only_df.set_index(init_w_prev_2010_only_df.date.dt.year) + # init_w_prev_2010_only_df = init_w_prev_2010_only_df.drop(columns='date') + # from year 2011-- now all from 2010 here + w_prev_model_df = self.__w_logs_dict["wasting_prevalence_props"] + w_prev_model_df = w_prev_model_df.drop(columns={'total_mod_under5_prop', 'total_sev_under5_prop'}) + w_prev_model_df = w_prev_model_df.set_index(w_prev_model_df.date.dt.year) + w_prev_model_df = w_prev_model_df.drop(columns='date') + # # merge 2010 and 2011+ data frames + # w_prev_model_df = pd.concat([init_w_prev_2010_only_df, w_prev_model_df], axis=0) + + # Load modelled population sizes + # # year 2010 + # init_pop_sizes_2010_only_df = self.__w_logs_dict["init pop sizes"] + # init_pop_sizes_2010_only_df = \ + # init_pop_sizes_2010_only_df.set_index(init_pop_sizes_2010_only_df.date.dt.year).rename_axis('year') + # init_pop_sizes_2010_only_df = init_pop_sizes_2010_only_df.drop(columns='date') + # init_pop_sizes_2010_only_df = init_pop_sizes_2010_only_df.filter(like='total__').rename( + # lambda x: x.replace('total__', ''), axis=1 + # )[age_groups] + # from year 2011 -- now all from 2010 + pop_sizes_model_df = self.__w_logs_dict['pop sizes'] + pop_sizes_model_df = pop_sizes_model_df.set_index(pop_sizes_model_df.date.dt.year).rename_axis('year') + pop_sizes_model_df = pop_sizes_model_df.drop(columns='date') + pop_sizes_model_df = pop_sizes_model_df.filter(like='total__').rename( + lambda x: x.replace('total__', ''), axis=1 + )[age_groups] + # # merge 2010 and 2011+ data frames + # pop_sizes_model_df = pd.concat([init_pop_sizes_2010_only_df, pop_sizes_model_df], axis=0) + + for year_calib in w_prev_calib_data_df.index: + w_prev_calib_data_year_df = w_prev_calib_data_df.loc[w_prev_calib_data_df.index == year_calib] + w_prev_model_year_df = w_prev_model_df.loc[w_prev_model_df.index == year_calib] + + def create_plotting_data(df, df_name): + plotting = {'severe wasting': {}, 'moderate wasting': {}, 'any wasting': {}} + for col in df.columns: + prefix, agegp = col.split('__') + if prefix == 'sev': + plotting['severe wasting'][agegp] = df[col].values[0] + elif prefix == 'mod': + plotting['moderate wasting'][agegp] = df[col].values[0] + plotting['any wasting'][agegp] = df[col].values[0] + df[f'sev__{agegp}'].values[0] + plotting_df = pd.DataFrame(plotting) + assert set(plotting_df.index) == set(age_groups),\ + f"age groups in {df_name} are not in line with the age_groups." + plotting_df = plotting_df.reindex(age_groups) + return plotting_df + + # Create plotting data for both dataframes + plotting_model = create_plotting_data(w_prev_model_year_df, 'w_prev_model_year_df') + plotting_calib = create_plotting_data(w_prev_calib_data_year_df, 'w_prev_calib_data_year_df') + + # Calculate 95% confidence intervals for both + sample_sizes_calib_data_year = sample_sizes_calib_data_df.loc[year_calib, :] + sample_sizes_model_year = pop_sizes_model_df.loc[year_calib, :] + + confidence_level = 0.95 + z_score = stats.norm.ppf(1 - (1 - confidence_level) / 2) + + calib_data_margin_of_error_any_wast = [] + calib_data_margin_of_error_sev_wast = [] + for p, n in zip(plotting_calib['any wasting'].reindex(age_groups[:-1]), sample_sizes_calib_data_year[:-1]): + calib_data_margin_of_error_any_wast.append(z_score * np.sqrt((p * (1 - p)) / n)) + for p, n in \ + zip(plotting_calib['severe wasting'].reindex(age_groups[:-1]),sample_sizes_calib_data_year[:-1]): + calib_data_margin_of_error_sev_wast.append(z_score * np.sqrt((p * (1 - p)) / n)) + model_margin_of_error_any_wast = [] + model_margin_of_error_sev_wast = [] + for p, n in zip(plotting_model['any wasting'].reindex(age_groups[:-1]), sample_sizes_model_year[:-1]): + model_margin_of_error_any_wast.append(z_score * np.sqrt((p * (1 - p)) / n)) + for p, n in zip(plotting_model['severe wasting'].reindex(age_groups[:-1]), sample_sizes_model_year[:-1]): + model_margin_of_error_sev_wast.append(z_score * np.sqrt((p * (1 - p)) / n)) + + # ##### + # Plot wasting prevalence + fig, ax = plt.subplots(figsize=(10, 6)) + bar_width = 0.35 + # Set positions of bars on x-axis + r1 = range(len(plotting_model)) + r2 = [x + bar_width for x in r1] + + # Plot the first set of bars (model data) + ax.bar(r1, plotting_model['severe wasting'], + color=self.__colors_model['severe wasting'], width=bar_width, + label='severe wasting (model)') + ax.bar(r1, plotting_model['moderate wasting'], bottom=plotting_model['severe wasting'], + color=self.__colors_model['moderate wasting'], width=bar_width, + label='moderate wasting (model)') + + # Add the confidence intervals + for i, age_group in enumerate(age_groups[0:len(age_groups)-1]): + ax.errorbar(r1[i], plotting_model['any wasting'][age_group], + yerr=[model_margin_of_error_any_wast[i]], + capsize=5, fmt='none', color='black') + ax.errorbar(r1[i], plotting_model['severe wasting'][age_group], + yerr=[model_margin_of_error_sev_wast[i]], + capsize=5, fmt='none', color='white') + + # Plot the second set of bars (calibration data) + ax.bar( + r2, + plotting_calib["severe wasting"], + color=self.__colors_data["severe wasting"], + width=bar_width, + label=f"severe wasting ({self.__data_name[year_calib]})", + ) + ax.bar( + r2, + plotting_calib["moderate wasting"], + bottom=plotting_calib["severe wasting"], + color=self.__colors_data["moderate wasting"], + width=bar_width, + label=f"moderate wasting ({self.__data_name[year_calib]})", + ) + + # Add the confidence intervals + for i, age_group in enumerate(age_groups[0:len(age_groups)-1]): + ax.errorbar(r2[i], plotting_calib['any wasting'][age_group], + yerr=[calib_data_margin_of_error_any_wast[i]], + capsize=5, fmt='none', color='black') + ax.errorbar(r2[i], plotting_calib['severe wasting'][age_group], + yerr=[calib_data_margin_of_error_sev_wast[i]], + capsize=5, fmt='none', color='white') + + ax.set_xlabel('age group') + ax.set_ylabel('proportion') + # ax.set_title(f"Wasting prevalence in children 0-59 months per each age group in {year_calib}", + # fontsize=title_fontsize-1) + ax.set_xticks([r + bar_width / 2 for r in range(len(plotting_model))]) + ax.set_xticklabels(age_groups) + ax.set_ylim([0, 0.12]) + ax.legend(fontsize=legend_fontsize) + plt.tight_layout() + fig_output_name = (f'wasting_prevalence_per_each_age_group_{year_calib}__' + self.datestamp) + if year_calib in [2016, 2020]: + self.save_fig__store_pdf_file(fig, fig_output_name) + # plt.show() + + def plot_model_gbd_deaths_incl_burnin_period(self): + """ compare model and GBD deaths 2010-2014 & 2015-2019 """ + death_compare = \ + compare_number_of_deaths(self.__log_file_path, resources_path) + fig, ax = plt.subplots(figsize=(10, 6)) + # cause of death as of GBD 2019 'Protein-energy malnutrition' was labeled as 'Childhood Undernutrition' in + # wasting module + plot_df = death_compare.loc[(['2010-2014', '2015-2019'], + slice(None), ['0-4'], 'Childhood Undernutrition' + )].groupby('period').sum() + plotting = plot_df.loc[['2010-2014', '2015-2019']] + ax = plotting['model'].plot.bar(label='TLO model', ax=ax, rot=0) + ax.errorbar(x=plotting['model'].index, y=plotting.GBD_mean, + yerr=[plotting.GBD_lower, plotting.GBD_upper], + fmt='o', color='#000', label="GBD 2019 estimates") + + # ax.set_title('Average annual under-five direct deaths due to severe acute malnutrition', fontsize=title_fontsize-1) + ax.set_xlabel("time period") + ax.set_ylabel("number of deaths") + ax.legend(loc='upper center', fontsize=legend_fontsize) + fig.tight_layout() + fig_output_name = ('model_gbd_deaths_incl_burnin__' + self.datestamp) + self.save_fig__store_pdf_file(fig, fig_output_name) + # plt.show() + + def plot_model_gbd_deaths_excl_burnin_period(self): + """ compare model and GBD deaths 2015-2019 """ + # cause of death as of GBD 2019 'Protein-energy malnutrition' was labeled as 'Childhood Undernutrition' in + # wasting module + death_compare = \ + compare_number_of_deaths(self.__log_file_path, resources_path) + calib_period = '2015-2019' + plot_df = death_compare.loc[([calib_period], + slice(None), ['0-4'], 'Childhood Undernutrition' + )].groupby('period').sum() + gbd_data = plot_df.loc[calib_period, ['GBD_mean', 'GBD_lower', 'GBD_upper']].tolist() + + # as these need mean outcomes across all the runs, we calculate these with the script + # plot_calib_outputs__using_analysis_utility_fncs_wast.py, and add these figures here by hand + # (could be automatised in future) + model_data = [2972.865386866667, 96.43464129549511, 96.43464129549557] + + # Setup labels and values + labels = ["model", "GBD 2019"] + means = [model_data[0], gbd_data[0]] + yerr = [ + [model_data[1], gbd_data[1]], + [model_data[2], gbd_data[2]], + ] + + # Specify colors + custom_colors = [self.__colors_model["severe wasting"], self.__colors_data["severe wasting"]] + + # Create the plot + fig, ax = plt.subplots(figsize=(10, 6)) + ax.bar(labels, means, yerr=yerr, capsize=8, color=custom_colors, edgecolor="black") + + # ax.set_title('Average direct deaths per year due to severe acute malnutrition in children under 5', + # fontsize=title_fontsize - 1) + ax.set_xlabel(calib_period) + ax.set_ylabel("number of deaths due to SAM") + fig.tight_layout() + fig_output_name = ('model_gbd_deaths_excl_burnin__' + self.datestamp) + self.save_fig__store_pdf_file(fig, fig_output_name) + # plt.show() + + def plot_all_figs_in_one_pdf(self): + + output_file_path = (self.calib_outcomes_folder_path_name / + f'wasting_all_figures__{self.datestamp}_{self.draw_nmb}_{self.run_nmb}.pdf') + # Remove the existing output file if it exists to ensure a clean start + if os.path.exists(output_file_path): + os.remove(output_file_path) + + # Assert that the file doesn't exist anymore after removal + assert not os.path.exists(output_file_path), "The file was not successfully removed." + + # Merge the PDF files + # Create a PDF writer object + pdf_writer = PdfWriter() + + # Iterate through the figure files and add each to the writer + for fig_file in self.fig_files: + pdf_reader = PdfReader(fig_file) + for page_num in range(len(pdf_reader.pages)): + page = pdf_reader.pages[page_num] + pdf_writer.add_page(page) + + # Write the merged PDF to a file + with open(output_file_path, 'wb') as out_file: + pdf_writer.write(out_file) + + +if __name__ == "__main__": + + # Path to the resource files used by the disease and intervention methods + resources_path = Path("./resources") + + # Find sim_results_folder_path associated with a given batch_file (and get most recent [-1]) + sim_results_folder_path = get_scenario_outputs(scenario_filename, outputs_path)[-1] + sim_results_folder_name = sim_results_folder_path.name + print(f"Calibrating {sim_results_folder_name=}") + # Get the datestamp + assert sim_results_folder_name.startswith(scenario_filename + '-'),\ + "The scenario output name does not correspond with the set scenario_filename." + datestamp = sim_results_folder_name[(len(scenario_filename) + 1):] + + folders = [name for name in os.listdir(sim_results_folder_path) if \ + os.path.isdir(os.path.join(sim_results_folder_path, name)) and name.isdigit()] + + # Analyse each draw + # for now, we always have just one run, run 0 + run_nmb = 0 + for draw_nmb in range(0, len(folders)): + print(f"Analysing {draw_nmb=} ...") + time_start = time.time() + + # initialise the wasting class + wasting_analyses = WastingAnalyses(str(sim_results_folder_path), datestamp, draw_nmb, run_nmb) + + # Create a csv to write down calibration outputs + # as bool values indicating whether model outcomes and calibration data intersect + wasting_analyses.create_calib_outcome_csv() + + # plot wasting incidence + wasting_analyses.plot_wasting_incidence() + + # plot wasting incidence mod:sev proportions + # wasting_analyses.plot_wasting_incidence_mod_to_sev_props() + + # plot wasting length + # wasting_analyses.plot_wasting_length() + + # # plot initial wasting prevalence + # wasting_analyses.plot_wasting_initial_overall_prevalence() + # wasting_analyses.plot_wasting_initial_prevalence_by_age_group() + + # plot prevalence through simulation + # wasting_analyses.plot_wasting_prevalence_per_year() + wasting_analyses.plot_wasting_prevalence_by_age_group() + + # plot wasting deaths as compared to GBD deaths + # wasting_analyses.plot_model_gbd_deaths_incl_burnin_period() + wasting_analyses.plot_model_gbd_deaths_excl_burnin_period() + + # ### Save all figures in one pdf + wasting_analyses.plot_all_figs_in_one_pdf() + + time_end = time.time() + print(f"... finished in (s): {(time_end - time_start)}") + + total_time_end = time.time() + print(f"total running time (s): {(total_time_end - total_time_start)}") + + diff --git a/src/scripts/wasting_analyses/calib_mean_CI_analysis_wasting.py b/src/scripts/wasting_analyses/calib_mean_CI_analysis_wasting.py new file mode 100644 index 0000000000..4d5510f98c --- /dev/null +++ b/src/scripts/wasting_analyses/calib_mean_CI_analysis_wasting.py @@ -0,0 +1,574 @@ +""" +An analysis file for the wasting module (mean over multiple runs, 1 draw) +""" +# %% Import statements +import glob +import gzip +import os +import shutil +import time +from pathlib import Path + +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +from PyPDF2 import PdfReader, PdfWriter +from scipy import stats + +from tlo.analysis.utils import compare_number_of_deaths, get_scenario_outputs, parse_log_file + +# start time of the whole analysis +total_time_start = time.time() + +# ####### TO SET ####################################################################################################### +# scenario_filename = 'wasting_analysis__minimal_model' +scenario_filename = 'wasting_analysis__full_model_SQ' +outputs_path = Path("./outputs/sejjej5@ucl.ac.uk/wasting/scenarios/SQ") +legend_fontsize = 12 +title_fontsize = 16 +######################################################################################################################## + +class WastingAnalyses: + """ + This class looks at plotting all important calibration outputs from the wasting module + """ + + def __init__(self, sim_results_folder_path_str, in_datestamp, in_draw_nmb, in_run_nmb, in_png=False): + self.outcomes_folder_path = sim_results_folder_path_str + self.datestamp = in_datestamp + self.draw_nmb = in_draw_nmb + self.run_nmb = in_run_nmb + self.png = in_png, """bool indicating whether we want to save all figures not only as pdf, but also as png""" + + sim_results_folder_draw_x_run_0_path_str = self.outcomes_folder_path + f'/{draw_nmb}/{run_nmb}/' + sim_results_file_name_prefix = scenario_filename + sim_results_file_name_extension = '.log.gz' + gz_results_file_path = \ + Path(glob.glob(os.path.join(sim_results_folder_draw_x_run_0_path_str, + f"{sim_results_file_name_prefix}*{sim_results_file_name_extension}"))[0]) + + # Path to the decompressed .log file + self.__log_file_path = gz_results_file_path.with_suffix('') + # Decompress the .log.gz file + with gzip.open(gz_results_file_path, 'rb') as f_in: + with open(self.__log_file_path, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + + # parse wasting logs + self.__w_logs_dict = parse_log_file(self.__log_file_path)['tlo.methods.wasting'] + # TODO: Why it prints the messages from parse_log_file() twice? + # parse scaling factor log + # self.__scaling_factor = \ + # parse_log_file(self.__log_file_path)['tlo.methods.population']['scaling_factor'].set_index('date').loc[ + # '2010-01-01', 'scaling_factor' + # ] + + # wasting types description + self.__wasting_types_desc = {'WHZ<-3': 'severe wasting', + '-3<=WHZ<-2': 'moderate wasting', + 'WHZ>=-2': 'no wasting'} + + self.fig_files = [] + + cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] + # # define colo(u)rs to use: + self.__colors_model = { + 'severe wasting': cycle[0], # #1f77b4 + 'moderate wasting': cycle[1], # #ff7f0e + 'SAM': '#B372B7', + 'MAM': '#D1BCD2', + } + self.__colors_data = { + 'severe wasting': '#82C1EC', + 'moderate wasting': '#C71E1E', + } + self.__colors_init_data = { + 'severe wasting': '#0E53EA', + 'moderate wasting': '#FFA783', + } + self.__data_name = { + 2010: 'DHS 2010', + 2014: 'MICS 2013-2014', + 2016: 'DHS 2015–2016', + 2020: 'MICS 2019–2020', + } + + self.calib_outcomes_folder_path_name = ( + Path(self.outcomes_folder_path).parent / f"calib_outcome_figures_SQ-{self.datestamp}" + ) + self.calib_outcomes_folder_path_name.mkdir(parents=True, exist_ok=True) + + # def plot_wasting_incidence(self): + # """ plot the incidence of wasting over time """ + # w_inc_df = self.__w_logs_dict['wasting_incidence_count'] + # w_inc_df = w_inc_df.set_index(w_inc_df.date.dt.year-1) + # w_inc_df = w_inc_df.drop(columns='date') + # # check no incidence of well-nourished + # all_zeros = w_inc_df['WHZ>=-2'].apply(lambda x: all(value == 0 for value in x.values())) + # assert all(all_zeros) + # w_inc_df = w_inc_df[["WHZ<-3", "-3<=WHZ<-2"]] + # + # pop_sizes_df = self.__w_logs_dict['pop sizes'] + # pop_sizes_df = pop_sizes_df.set_index(pop_sizes_df.date.dt.year) + # pop_sizes_df = pop_sizes_df.drop(columns='date') + # po_sizes_to_keep = [col for col in pop_sizes_df.columns if + # col.startswith('total__') and col not in ['total__under5']] + # age_gps_total_pop_sizes_df = pop_sizes_df[po_sizes_to_keep].copy() + # age_gps_total_pop_sizes_df['0y'] = \ + # age_gps_total_pop_sizes_df['total__0_5mo'] + age_gps_total_pop_sizes_df['total__6_11mo'] + # age_gps_total_pop_sizes_df = age_gps_total_pop_sizes_df.drop(columns=['total__0_5mo', 'total__6_11mo']) + # age_gps_total_pop_sizes_df = age_gps_total_pop_sizes_df.rename(columns={ + # 'total__12_23mo': '1y', + # 'total__24_35mo': '2y', + # 'total__36_47mo': '3y', + # 'total__48_59mo': '4y', + # 'total__5y+': '5+y' + # }) + # + # # get age_years, doesn't matter what wasting category you choose, + # # they all have same age groups + # age_years = list(w_inc_df.loc[w_inc_df.index[0], 'WHZ<-3'].keys()) + # # age_years.remove('5+y') + # + # _row_counter = 0 + # _col_counter = 0 + # # plot setup + # fig, axes = plt.subplots(nrows=2, ncols=3, sharex=True, sharey=True, figsize=(10, 6)) + # axes[1, 2].axis('off') # 5+y has no data (no new cases in 5+y), its space is used to display the label + # for age in age_years: + # plotting = pd.DataFrame() + # for state in w_inc_df.columns: + # plotting[state] = \ + # w_inc_df.apply(lambda row: row[state][age], axis=1) + # # remove sev cases from mod cases (all sev cases went through mod state) + # plotting["-3<=WHZ<-2"] = plotting.apply(lambda row: max(row["-3<=WHZ<-2"] - row["WHZ<-3"], 0), axis=1) + # # calculate props within the age group + # plotting = plotting.div(age_gps_total_pop_sizes_df[age], axis=0) + # plotting = plotting.rename(columns=self.__wasting_types_desc) + # # filter data to include only years from 2015 onwards + # plotting = plotting.loc[plotting.index >= 2015] + # # and up to 2030 + # plotting = plotting.loc[plotting.index <= 2030] + # # check for invalid values + # if (plotting < 0).any().any() or (plotting > 1).any().any(): + # print(f"Warning plot_wasting_incidence: Invalid values detected in plotting data for age group {age}:") + # print(plotting) + # + # ax = plotting.plot(kind='bar', stacked=True, + # ax=axes[_row_counter, _col_counter], + # title=f"{age} old")#, + # #ylim=[0, 1]) + # show_legend = (_row_counter == 1 and _col_counter == 2) + # # show_x_axis_label = (_row_counter == 0 and _col_counter == 2) + # if show_legend: + # ax.legend(loc='center', fontsize=legend_fontsize) + # ax.set_title('') + # else: + # ax.get_legend().remove() + # # if show_x_axis_label: + # # ax.set_xlabel('Year') # TODO: this is not working + # ax.set_xlabel('year') + # ax.set_ylabel('proportion (within age group)') + # # move to another row + # if _col_counter == 2: + # _row_counter += 1 + # _col_counter = -1 + # _col_counter += 1 # increment column counter + # fig.suptitle('Annual incidence of wasting among the age group', fontsize=title_fontsize) #, weight='bold') + # fig.tight_layout() + # fig_output_name = ('wasting_incidence__' + self.datestamp) + # self.save_fig__store_pdf_file(fig, fig_output_name) + # # plt.show() + + # def plot_wasting_prevalence_per_year(self): + # """ plot wasting prevalence of all age groups per year. Proportions are obtained by getting a total number of + # children wasted divide by the total number of children less than 5 years""" + # + # ## Prevalence at some years - data (2010 are the data used to draw initial prevalence) + # # TODO: add calibration data into the resource file: + # # resources_path / 'ResourceFile_Wasting/wasting_prevalence_and_sample_size.csv' + # # and load here and for initial overall prev from the RF + # # w_prev_calib_data_years_only_df = pd.DataFrame({ + # # 'sev_wast_calib': [0.015, 0.011, 0.006, 0.007], + # # 'mod_wast_calib': [0.025, 0.027, 0.021, 0.019] + # # }, index=[2010, 2014, 2016, 2020]) + # # date_range = pd.Index(range(2010, 2031), name='date') #TODO 2032 with new sims + # w_prev_calib_data_years_only_df = pd.DataFrame({ + # 'sev_wast_calib': [0.006, 0.007], + # 'mod_wast_calib': [0.021, 0.019] + # }, index=[2016, 2020]) + # date_range = pd.Index(range(2015, 2032), name='date') + # w_prev_calib = pd.DataFrame(index=date_range) + # # filling missing values with 0 + # w_prev_calib_df = w_prev_calib.merge( + # w_prev_calib_data_years_only_df, left_index=True, right_index=True, how='left' + # ).fillna(0) + # + # # ## Initial prevalence at the beginning of 2010 - model + # # init_w_prev_2010_only_df = self.__w_logs_dict["wasting_init_prevalence_props"] + # # init_w_prev_2010_only_df = init_w_prev_2010_only_df[['date', 'total_sev_under5_prop', 'total_mod_under5_prop']].rename( + # # columns={'total_sev_under5_prop': 'total_init_sev_under5_prop', 'total_mod_under5_prop': 'total_init_mod_under5_prop'} + # # ) + # # init_w_prev_2010_only_df = init_w_prev_2010_only_df.set_index(init_w_prev_2010_only_df.date.dt.year) + # # init_w_prev_2010_only_df = init_w_prev_2010_only_df.drop(columns='date') + # # init_w_prev_2010_only_df = init_w_prev_2010_only_df.loc[[2010]] + # # init_w_prev_df = pd.DataFrame(index=date_range) + # # # filling missing values with 0 + # # init_w_prev_df = init_w_prev_df.merge( + # # init_w_prev_2010_only_df, left_index=True, right_index=True, how='left' + # # ).fillna(0) + # + # ## Prevalence at the beginning of years - model + # w_prev_df = self.__w_logs_dict["wasting_prevalence_props"] + # w_prev_df = w_prev_df[['date', 'total_sev_under5_prop', 'total_mod_under5_prop']] + # w_prev_df = w_prev_df.set_index(w_prev_df.date.dt.year) + # w_prev_df = w_prev_df.drop(columns='date') + # + # # w_prev_calib_and_init_df = pd.merge(init_w_prev_df, w_prev_calib_df, on='date') + # # w_prev_plot_df = pd.merge(w_prev_df, w_prev_calib_and_init_df, on='date').loc[lambda df: df.index >= 2015] + # w_prev_plot_df = w_prev_df.loc[lambda df: df.index >= 2015] + # columns_to_plot = [ + # ['total_init_sev_under5_prop', 'total_init_mod_under5_prop'], + # ['total_sev_under5_prop', 'total_mod_under5_prop'], + # ['sev_wast_calib', 'mod_wast_calib'], + # ] + # colors_to_plot = { + # 'total_sev_under5_prop': self.__colors_model['severe wasting'], + # 'total_mod_under5_prop': self.__colors_model['moderate wasting'], + # 'sev_wast_calib': self.__colors_data['severe wasting'], + # 'mod_wast_calib': self.__colors_data['moderate wasting'], + # 'total_init_sev_under5_prop': self.__colors_init_data['severe wasting'], + # 'total_init_mod_under5_prop': self.__colors_init_data['moderate wasting'] + # + # } + # labels_to_plot = { + # 'total_sev_under5_prop': 'severe wasting (model)', + # 'total_mod_under5_prop': 'moderate wasting (model)', + # 'sev_wast_calib': 'severe wasting (data)', + # 'mod_wast_calib': 'moderate wasting (data)', + # 'total_init_sev_under5_prop': 'severe wasting (initial)', + # 'total_init_mod_under5_prop': 'moderate wasting (initial)' + # } + # + # fig, ax = plt.subplots() + # bar_spots = len(columns_to_plot) + # bar_width = 0.8 / bar_spots + # pos = np.arange(len(w_prev_plot_df)) + # dodge_offsets = np.linspace(-bar_spots * bar_width / 2, bar_spots * bar_width / 2, bar_spots, endpoint=False) + # for columns, offset in zip(columns_to_plot, dodge_offsets): + # bottom = 0 + # for col in ([columns] if isinstance(columns, str) else columns): + # ax.bar(pos + offset, w_prev_plot_df[col], bottom=bottom, width=bar_width, align='edge', + # label=labels_to_plot[col], color=colors_to_plot[col]) + # bottom += w_prev_plot_df[col] + # ax.set_xticks(pos) + # ax.set_xticklabels(w_prev_plot_df.index, rotation=90) + # # ax.set_title("Wasting prevalence in children 0-59 months per year", fontsize=title_fontsize-6) + # ax.set_ylabel('proportion of wasted children in the year') + # ax.set_xlabel('year') + # ax.set_ylim([0, 0.06]) + # ax.legend(fontsize=legend_fontsize-4) + # plt.tight_layout() + # fig_output_name = ('wasting_prevalence_per_year__' + self.datestamp) + # self.save_fig__store_pdf_file(fig, fig_output_name) + # # plt.show() + + def plot_wasting_prevalence_by_age_group(self): + """ Plot wasting prevalence per each age group. Proportions are obtained by getting a total number of + children wasted in a particular age-group divided by the total number of children per that age-group""" + + age_groups = ['0_5mo', '6_11mo', '12_23mo', '24_35mo', '36_47mo', '48_59mo', '5y+'] + + # ### Calibration Data + # Load calibration data from CSV file + wasting_calib_data_path = resources_path / 'ResourceFile_Wasting/wasting_prevalence_and_sample_size.csv' + wasting_calib_data_df = pd.read_csv(wasting_calib_data_path, index_col='year') + + # Recalculate data to proportions (0 to 1) and separate mod wast as (wasted - sev wast) + wasting_calib_data_df['mod_wast_calib'] = \ + (wasting_calib_data_df['prev any wast (%)'] - wasting_calib_data_df['prev severe wast (%)']) / 100 + wasting_calib_data_df['sev_wast_calib'] = wasting_calib_data_df['prev severe wast (%)'] / 100 + + # Pivot the data to get the required format + w_prev_calib_data_df = wasting_calib_data_df.pivot(columns='age_group (months)', + values=['mod_wast_calib', 'sev_wast_calib']) + w_prev_calib_data_df.columns = [f'{col[0][:3]}__{col[1]}' for col in w_prev_calib_data_df.columns] + + # Load calibration sample sizes from CSV file + sample_sizes_calib_data_df = wasting_calib_data_df.pivot(columns='age_group (months)', values='sample_size') + sample_sizes_calib_data_df = sample_sizes_calib_data_df.reindex(columns=age_groups) + + # ### Model Outcomes + # Load modelled prevalence proportions + # # year 2010 + # init_w_prev_2010_only_df = self.__w_logs_dict["wasting_init_prevalence_props"] + # init_w_prev_2010_only_df = init_w_prev_2010_only_df.drop(columns={'total_mod_under5_prop', 'total_sev_under5_prop'}) + # init_w_prev_2010_only_df = init_w_prev_2010_only_df.set_index(init_w_prev_2010_only_df.date.dt.year) + # init_w_prev_2010_only_df = init_w_prev_2010_only_df.drop(columns='date') + # from year 2011-- now all from 2010 here + w_prev_model_df = self.__w_logs_dict["wasting_prevalence_props"] + w_prev_model_df = w_prev_model_df.drop(columns={'total_mod_under5_prop', 'total_sev_under5_prop'}) + w_prev_model_df = w_prev_model_df.set_index(w_prev_model_df.date.dt.year) + w_prev_model_df = w_prev_model_df.drop(columns='date') + # # merge 2010 and 2011+ data frames + # w_prev_model_df = pd.concat([init_w_prev_2010_only_df, w_prev_model_df], axis=0) + + # Load modelled population sizes + # # year 2010 + # init_pop_sizes_2010_only_df = self.__w_logs_dict["init pop sizes"] + # init_pop_sizes_2010_only_df = \ + # init_pop_sizes_2010_only_df.set_index(init_pop_sizes_2010_only_df.date.dt.year).rename_axis('year') + # init_pop_sizes_2010_only_df = init_pop_sizes_2010_only_df.drop(columns='date') + # init_pop_sizes_2010_only_df = init_pop_sizes_2010_only_df.filter(like='total__').rename( + # lambda x: x.replace('total__', ''), axis=1 + # )[age_groups] + # from year 2011 -- now all from 2010 + pop_sizes_model_df = self.__w_logs_dict['pop sizes'] + pop_sizes_model_df = pop_sizes_model_df.set_index(pop_sizes_model_df.date.dt.year).rename_axis('year') + pop_sizes_model_df = pop_sizes_model_df.drop(columns='date') + pop_sizes_model_df = pop_sizes_model_df.filter(like='total__').rename( + lambda x: x.replace('total__', ''), axis=1 + )[age_groups] + # # merge 2010 and 2011+ data frames + # pop_sizes_model_df = pd.concat([init_pop_sizes_2010_only_df, pop_sizes_model_df], axis=0) + + for year_calib in w_prev_calib_data_df.index: + w_prev_calib_data_year_df = w_prev_calib_data_df.loc[w_prev_calib_data_df.index == year_calib] + w_prev_model_year_df = w_prev_model_df.loc[w_prev_model_df.index == year_calib] + + def create_plotting_data(df, df_name): + plotting = {'severe wasting': {}, 'moderate wasting': {}, 'any wasting': {}} + for col in df.columns: + prefix, agegp = col.split('__') + if prefix == 'sev': + plotting['severe wasting'][agegp] = df[col].values[0] + elif prefix == 'mod': + plotting['moderate wasting'][agegp] = df[col].values[0] + plotting['any wasting'][agegp] = df[col].values[0] + df[f'sev__{agegp}'].values[0] + plotting_df = pd.DataFrame(plotting) + assert set(plotting_df.index) == set(age_groups),\ + f"age groups in {df_name} are not in line with the age_groups." + plotting_df = plotting_df.reindex(age_groups) + return plotting_df + + # Create plotting data for both dataframes + plotting_model = create_plotting_data(w_prev_model_year_df, 'w_prev_model_year_df') + plotting_calib = create_plotting_data(w_prev_calib_data_year_df, 'w_prev_calib_data_year_df') + + # Calculate 95% confidence intervals for both + sample_sizes_calib_data_year = sample_sizes_calib_data_df.loc[year_calib, :] + sample_sizes_model_year = pop_sizes_model_df.loc[year_calib, :] + + confidence_level = 0.95 + z_score = stats.norm.ppf(1 - (1 - confidence_level) / 2) + + calib_data_margin_of_error_any_wast = [] + calib_data_margin_of_error_sev_wast = [] + for p, n in zip(plotting_calib['any wasting'].reindex(age_groups[:-1]), sample_sizes_calib_data_year[:-1]): + calib_data_margin_of_error_any_wast.append(z_score * np.sqrt((p * (1 - p)) / n)) + for p, n in \ + zip(plotting_calib['severe wasting'].reindex(age_groups[:-1]),sample_sizes_calib_data_year[:-1]): + calib_data_margin_of_error_sev_wast.append(z_score * np.sqrt((p * (1 - p)) / n)) + model_margin_of_error_any_wast = [] + model_margin_of_error_sev_wast = [] + for p, n in zip(plotting_model['any wasting'].reindex(age_groups[:-1]), sample_sizes_model_year[:-1]): + model_margin_of_error_any_wast.append(z_score * np.sqrt((p * (1 - p)) / n)) + for p, n in zip(plotting_model['severe wasting'].reindex(age_groups[:-1]), sample_sizes_model_year[:-1]): + model_margin_of_error_sev_wast.append(z_score * np.sqrt((p * (1 - p)) / n)) + + # ##### + # Plot wasting prevalence + fig, ax = plt.subplots(figsize=(10, 6)) + bar_width = 0.35 + # Set positions of bars on x-axis + r1 = range(len(plotting_model)) + r2 = [x + bar_width for x in r1] + + # Plot the first set of bars (model data) + ax.bar(r1, plotting_model['severe wasting'], + color=self.__colors_model['severe wasting'], width=bar_width, + label='severe wasting (model)') + ax.bar(r1, plotting_model['moderate wasting'], bottom=plotting_model['severe wasting'], + color=self.__colors_model['moderate wasting'], width=bar_width, + label='moderate wasting (model)') + + # Add the confidence intervals + for i, age_group in enumerate(age_groups[0:len(age_groups)-1]): + ax.errorbar(r1[i], plotting_model['any wasting'][age_group], + yerr=[model_margin_of_error_any_wast[i]], + capsize=5, fmt='none', color='black') + ax.errorbar(r1[i], plotting_model['severe wasting'][age_group], + yerr=[model_margin_of_error_sev_wast[i]], + capsize=5, fmt='none', color='white') + + # Plot the second set of bars (calibration data) + ax.bar( + r2, + plotting_calib["severe wasting"], + color=self.__colors_data["severe wasting"], + width=bar_width, + label=f"severe wasting ({self.__data_name[year_calib]})", + ) + ax.bar( + r2, + plotting_calib["moderate wasting"], + bottom=plotting_calib["severe wasting"], + color=self.__colors_data["moderate wasting"], + width=bar_width, + label=f"moderate wasting ({self.__data_name[year_calib]})", + ) + + # Add the confidence intervals + for i, age_group in enumerate(age_groups[0:len(age_groups)-1]): + ax.errorbar(r2[i], plotting_calib['any wasting'][age_group], + yerr=[calib_data_margin_of_error_any_wast[i]], + capsize=5, fmt='none', color='black') + ax.errorbar(r2[i], plotting_calib['severe wasting'][age_group], + yerr=[calib_data_margin_of_error_sev_wast[i]], + capsize=5, fmt='none', color='white') + + ax.set_xlabel('age group') + ax.set_ylabel('proportion') + # ax.set_title(f"Wasting prevalence in children 0-59 months per each age group in {year_calib}", + # fontsize=title_fontsize-1) + ax.set_xticks([r + bar_width / 2 for r in range(len(plotting_model))]) + ax.set_xticklabels(age_groups) + ax.set_ylim([0, 0.12]) + ax.legend(fontsize=legend_fontsize) + plt.tight_layout() + fig_output_name = (f'wasting_prevalence_per_each_age_group_{year_calib}__' + self.datestamp) + if year_calib in [2016, 2020]: + self.save_fig__store_pdf_file(fig, fig_output_name) + # plt.show() + + # def plot_model_gbd_deaths_incl_burnin_period(self): + # """ compare model and GBD deaths 2010-2014 & 2015-2019 """ + # death_compare = \ + # compare_number_of_deaths(self.__log_file_path, resources_path) + # fig, ax = plt.subplots(figsize=(10, 6)) + # # cause of death as of GBD 2019 'Protein-energy malnutrition' was labeled as 'Childhood Undernutrition' in + # # wasting module + # plot_df = death_compare.loc[(['2010-2014', '2015-2019'], + # slice(None), ['0-4'], 'Childhood Undernutrition' + # )].groupby('period').sum() + # plotting = plot_df.loc[['2010-2014', '2015-2019']] + # ax = plotting['model'].plot.bar(label='TLO model', ax=ax, rot=0) + # ax.errorbar(x=plotting['model'].index, y=plotting.GBD_mean, + # yerr=[plotting.GBD_lower, plotting.GBD_upper], + # fmt='o', color='#000', label="GBD 2019 estimates") + # + # # ax.set_title('Average annual under-five direct deaths due to severe acute malnutrition', fontsize=title_fontsize-1) + # ax.set_xlabel("time period") + # ax.set_ylabel("number of deaths") + # ax.legend(loc='upper center', fontsize=legend_fontsize) + # fig.tight_layout() + # fig_output_name = ('model_gbd_deaths_incl_burnin__' + self.datestamp) + # self.save_fig__store_pdf_file(fig, fig_output_name) + # # plt.show() + + def plot_model_gbd_deaths_excl_burnin_period(self): + """ compare model and GBD deaths 2015-2019 """ + death_compare = \ + compare_number_of_deaths(self.__log_file_path, resources_path) + fig, ax = plt.subplots(figsize=(10, 6)) + # cause of death as of GBD 2019 'Protein-energy malnutrition' was labeled as 'Childhood Undernutrition' in + # wasting module + plot_df = death_compare.loc[(['2015-2019'], + slice(None), ['0-4'], 'Childhood Undernutrition' + )].groupby('period').sum() + plotting = plot_df.loc[['2015-2019']] + ax = plotting['model'].plot.bar(label='deaths due to SAM (model)', ax=ax, rot=0) + ax.errorbar(x=plotting['model'].index, y=plotting.GBD_mean, + yerr=[plotting.GBD_lower, plotting.GBD_upper], + fmt='o', color='#000', label="deaths due to SAM (GBD 2019)") + + # ax.set_title('Average direct deaths per year due to severe acute malnutrition in children under 5', + # fontsize=title_fontsize - 1) + ax.set_xlabel("time period") + ax.set_ylabel("number of deaths") + ax.legend(loc='upper right', fontsize=legend_fontsize) + fig.tight_layout() + fig_output_name = ('model_gbd_deaths_excl_burnin__' + self.datestamp) + self.save_fig__store_pdf_file(fig, fig_output_name) + # plt.show() + + def plot_all_figs_in_one_pdf(self): + + output_file_path = (self.calib_outcomes_folder_path_name / + f'wasting_all_figures__{self.datestamp}_{self.draw_nmb}_{self.run_nmb}.pdf') + # Remove the existing output file if it exists to ensure a clean start + if os.path.exists(output_file_path): + os.remove(output_file_path) + + # Assert that the file doesn't exist anymore after removal + assert not os.path.exists(output_file_path), "The file was not successfully removed." + + # Merge the PDF files + # Create a PDF writer object + pdf_writer = PdfWriter() + + # Iterate through the figure files and add each to the writer + for fig_file in self.fig_files: + pdf_reader = PdfReader(fig_file) + for page_num in range(len(pdf_reader.pages)): + page = pdf_reader.pages[page_num] + pdf_writer.add_page(page) + + # Write the merged PDF to a file + with open(output_file_path, 'wb') as out_file: + pdf_writer.write(out_file) + + +if __name__ == "__main__": + + # Path to the resource files used by the disease and intervention methods + resources_path = Path("./resources") + + # Find sim_results_folder_path associated with a given batch_file (and get most recent [-1]) + sim_results_folder_path = get_scenario_outputs(scenario_filename, outputs_path)[-1] + sim_results_folder_name = sim_results_folder_path.name + print(f"Calibrating {sim_results_folder_name=}") + # Get the datestamp + assert sim_results_folder_name.startswith(scenario_filename + '-'),\ + "The scenario output name does not correspond with the set scenario_filename." + datestamp = sim_results_folder_name[(len(scenario_filename) + 1):] + + folders = [name for name in os.listdir(sim_results_folder_path) if \ + os.path.isdir(os.path.join(sim_results_folder_path, name)) and name.isdigit()] + + # Analyse each draw + # for now, we always have just one draw, draw 0 + draw_nmb = 0 + for run_nmb in range(0, len(folders)): + print(f"Analysing {run_nmb=} ...") + time_start = time.time() + + print(f"\n(str(sim_results_folder_path): {str(sim_results_folder_path)}") + # initialise the wasting class + wasting_analyses = WastingAnalyses(str(sim_results_folder_path), datestamp, draw_nmb, run_nmb) + + # # plot wasting incidence + # wasting_analyses.plot_wasting_incidence() + + # # plot initial wasting prevalence + # wasting_analyses.plot_wasting_initial_overall_prevalence() + # wasting_analyses.plot_wasting_initial_prevalence_by_age_group() + + # plot prevalence + # wasting_analyses.plot_wasting_prevalence_per_year() + wasting_analyses.plot_wasting_prevalence_by_age_group() + + # plot wasting deaths as compared to GBD deaths + # wasting_analyses.plot_model_gbd_deaths_incl_burnin_period() + wasting_analyses.plot_model_gbd_deaths_excl_burnin_period() + + # ### Save all figures in one pdf + wasting_analyses.plot_all_figs_in_one_pdf() + + time_end = time.time() + print(f"... finished in (s): {(time_end - time_start)}") + + total_time_end = time.time() + print(f"total running time (s): {(total_time_end - total_time_start)}") + + diff --git a/src/scripts/wasting_analyses/find_pars_draw_nmb.py b/src/scripts/wasting_analyses/find_pars_draw_nmb.py new file mode 100644 index 0000000000..1f26191548 --- /dev/null +++ b/src/scripts/wasting_analyses/find_pars_draw_nmb.py @@ -0,0 +1,34 @@ +''' +This will print out the draw number for which searched parameters set was used +''' +import itertools + +# update the parameters set for the job +parameters = { + "base_death_rate_untreated_sam": [0.1, 0.05, 0.03, 0.01], + "mod_wast_incidence__coef": [1.0, 0.6, 0.2], + "progression_to_sev_wast__coef": [0.5, 0.75, 1.0, 1.5, 2.0, 2.3], + "prob_death_after_SAMcare__as_prop_of_death_rate_untreated_sam": [0.1, 0.4, 0.7] +} +# what parameters set are you looking for? +pars_set_searched = [0.03, 0.6, 1.0, 0.1] + +################################################## +def find_pars_draw_nmb(in_parameters, in_pars_set_searched): + base_death_rate_untreated_sam__draws = parameters["base_death_rate_untreated_sam"] + mod_wast_incidence__coef = parameters["mod_wast_incidence__coef"] + progression_to_sev_wast__coef = parameters["progression_to_sev_wast__coef"] + prob_death_after_SAMcare__as_prop_of_death_rate_untreated_sam = parameters["prob_death_after_SAMcare__as_prop_of_death_rate_untreated_sam"] + + pars_combinations = list(itertools.product( + base_death_rate_untreated_sam__draws, + mod_wast_incidence__coef, + progression_to_sev_wast__coef, + prob_death_after_SAMcare__as_prop_of_death_rate_untreated_sam + )) + if tuple(in_pars_set_searched) in pars_combinations: + print(f"Position of searched parameters: {pars_combinations.index(tuple(in_pars_set_searched))}") + else: + print(f"Searched parameters {in_pars_set_searched} not found in combinations: {pars_combinations}") + +find_pars_draw_nmb(parameters, pars_set_searched) diff --git a/src/scripts/wasting_analyses/fusion_GLOBAL_DATAFLOW_UNICEF_1.0_MWI.CME_MRY0T4+CME_MRM0...csv b/src/scripts/wasting_analyses/fusion_GLOBAL_DATAFLOW_UNICEF_1.0_MWI.CME_MRY0T4+CME_MRM0...csv new file mode 100644 index 0000000000..4c17a2fa16 --- /dev/null +++ b/src/scripts/wasting_analyses/fusion_GLOBAL_DATAFLOW_UNICEF_1.0_MWI.CME_MRY0T4+CME_MRM0...csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6008b5a3e3c37a75bf5e2b447ea783a7a336319fcb424cf5990b269a802201a3 +size 15015 diff --git a/src/scripts/wasting_analyses/heatmaps_cons_wast.py b/src/scripts/wasting_analyses/heatmaps_cons_wast.py new file mode 100644 index 0000000000..a3de08e0cc --- /dev/null +++ b/src/scripts/wasting_analyses/heatmaps_cons_wast.py @@ -0,0 +1,16 @@ +""" +An analysis file for the wasting module to visualise availability of: + * essential consumables, + * treatments (i.e., probability of all consumables essential for the treatment being available) +""" + +from pathlib import Path + +import analysis_utility_functions_wast + +# ####### TO SET ####################################################################################################### +# Where to save the outcomes +outputs_path = Path("./outputs/sejjej5@ucl.ac.uk/wasting/scenarios/_outcomes/heatmaps") +######################################################################################################################## + +analysis_utility_functions_wast.plot_availability_heatmaps(outputs_path) diff --git a/src/scripts/wasting_analyses/implem_costs.py b/src/scripts/wasting_analyses/implem_costs.py new file mode 100644 index 0000000000..2b49eb296d --- /dev/null +++ b/src/scripts/wasting_analyses/implem_costs.py @@ -0,0 +1,341 @@ +from pathlib import Path + +import pandas as pd + + +def load_rm_database(path: Path | str | None = None, out_path: Path | str | None = None) -> pd.DataFrame: + """ + Load the RM_Database sheet from RM Round 7_20220714.ods and save it as out_path (CSV) + If the CSV already exists at out_path, load it instead. + """ + if path is None: + # default to the directory containing this script (same folder as the ods file) + path = Path(__file__).parent / "RM Round 7_20220714_onelineheader.ods" + path = Path(path) + if out_path is None: + # default to the directory containing this script (same folder as the ods file) + out_path = Path(__file__).parent / "RM_Database.csv" + out_path = Path(out_path) + + def try_load_df(): + if not path.exists(): + raise FileNotFoundError(f"`{path}` not found") + try: + return pd.read_excel(path, sheet_name="RM_Database", engine="odf") + except Exception as e: + raise RuntimeError("Failed to read `RM_Database` from `RM Round 7_20220714.ods`.") from e + + if not out_path.exists(): + df = try_load_df() + df.to_csv(out_path, index=False) + print(f"Saved {len(df)} rows to `{out_path}`") + else: + df = pd.read_csv(out_path) + print(f"Loaded {len(df)} rows from `{out_path}`") + # print(f"\ncolumns:\n{df.columns}") + + return df + +def load_rm_database_nutrition(path: Path | str | None = None, out_path: Path | str | None = None) -> pd.DataFrame: + if path is None: + # default to the directory containing this script (same folder as the ods file) + path = Path(__file__).parent / "RM_Database.csv" + path = Path(path) + def try_load_df(): + if not path.exists(): + load_rm_database() + try: + return pd.read_csv(path) + except Exception as e: + raise RuntimeError("Failed to read `RM_Database` from `RM_Database.csv`.") from e + + if out_path is None: + # default to the directory containing this script (same folder as the ods file) + out_path = Path(__file__).parent / "RM_Database_Nutrition.csv" + out_path = Path(out_path) + + if not out_path.exists(): + df = try_load_df() + col = "Programmatic Function" + if col not in df.columns: + raise KeyError(f"Column `{col}` not found in loaded DataFrame") + df_nutr = df[df[col] == "Nutrition"].copy() + # drop the column since it has the same value in all rows + df_nutr = df_nutr.drop(columns=[col]) + df_nutr.to_csv(out_path, index=False) + print(f"Saved {len(df_nutr)} rows to `{out_path}`") + else: + df_nutr = pd.read_csv(out_path) + print(f"Loaded {len(df_nutr)} rows from `{out_path}`") + # print(f"\ncolumns:\n{df_nutr.columns}") + + return df_nutr + +def load_rm_database_interv_level(interv_level_name: str, interv_level: str, path: Path | str | None = None) -> pd.DataFrame: + if path is None: + # default to the directory containing this script (same folder as the csv) + path = Path(__file__).parent / "RM_Database_Nutrition.csv" + path = Path(path) + + def try_load_df_nutr(): + if not path.exists(): + # ensure the nutrition CSV exists + load_rm_database_nutrition() + try: + return pd.read_csv(path) + except Exception as e: + raise RuntimeError("Failed to read `RM_Database_Nutrition` from `RM_Database_Nutrition.csv`.") from e + + out_path = Path(__file__).parent / f"RM_Database_{interv_level_name}.csv" + if not out_path.exists(): + df = try_load_df_nutr() + col = "Programmatic Intervention Level 1 [NEW]" + if col not in df.columns: + raise KeyError(f"Column `{col}` not found in loaded DataFrame") + df_interv_level = df[df[col] == interv_level].copy() + # drop the column since it has the same value in all rows + df_interv_level = df_interv_level.drop(columns=[col]) + df_interv_level.to_csv(out_path, index=False) + print(f"Saved {len(df_interv_level)} rows to `{out_path}`") + else: + df_interv_level = pd.read_csv(out_path) + print(f"Loaded {len(df_interv_level)} rows from `{out_path}`") + # print(f"\ncolumns:\n{df_interv_level.columns}") + + return df_interv_level + +if __name__ == "__main__": + # load_rm_database() + df_nutr = load_rm_database_nutrition() + df_preven_undernutr = load_rm_database_interv_level( + interv_level_name="Prevention_of_Undernutrition", interv_level="Prevention of Undernutrition" + ) + df_behavior_change_nutr = load_rm_database_interv_level( + interv_level_name="BehaviorChange_Nutri", interv_level="Behavior Change Communication for Nutrition" + ) + + def print_unique_vals(df:pd.DataFrame, df_name:str, col_name:str) -> None: + if col_name in df.columns: + vals = df[col_name].dropna().unique() + vals = sorted(vals, key=lambda x: str(x)) + print(f"\n{df_name}--unique values in {col_name} ({len(vals)}):") + for v in vals: + print(v) + else: + print(f"\nColumn {col_name} not found in `df_preven_undernutr`. Columns:\n{df.columns}") + + print_unique_vals(df_preven_undernutr, "df_preven_undernutr", "Description of Activity") + print_unique_vals(df_preven_undernutr, "df_preven_undernutr", "Cost Sub-Type") + print_unique_vals(df_preven_undernutr, "df_preven_undernutr", "Project Name ") + + print_unique_vals(df_behavior_change_nutr, "df_behavior_change_nutr", "Description of Activity") + print_unique_vals(df_behavior_change_nutr, "df_behavior_change_nutr", "Cost Sub-Type") + print_unique_vals(df_behavior_change_nutr, "df_behavior_change_nutr", "Project Name ") + + print_unique_vals(df_nutr, "df_nutr", "Programmatic Intervention Level 1 [NEW]") + print_unique_vals(df_nutr, "df_nutr", "Cost Sub-Type") + print_unique_vals(df_nutr, "df_nutr", "Project Name ") + + def print_structure(df: pd.DataFrame, df_name: str) -> None: + proj_col = "Project Name " + interv_level_col = "Programmatic Intervention Level 1 [NEW]" + activ_descrip_col = "Description of Activity" + cost_col = "Cost Sub-Type" + expend_col = "FY Ending 2019 EXPENDITURE (USD)(Jul 2018 - Jun 2019)" + budg_col = "FY Ending 2020 BUDGETS (USD)(Jul 2019 - Jun 2020)" + + for c in (proj_col, interv_level_col, activ_descrip_col, cost_col): + if c not in df.columns: + print(f"\nColumn `{c}` not found in `{df_name}`. Available columns:\n{df.columns}") + return + + projects = sorted(df[proj_col].dropna().unique(), key=lambda x: str(x)) + for proj in projects: + proj_mask = df[proj_col] == proj + levels = sorted(df.loc[proj_mask, interv_level_col].dropna().unique(), key=lambda x: str(x)) + print(f"\nProject: {proj} (levels: {len(levels)})") + for level in levels: + print(f" {interv_level_col}: {level}") + subset = df[proj_mask & (df[interv_level_col] == level)] + # prepare grouped combos with optional sums + cols_to_sum = [] + if expend_col in subset.columns: + cols_to_sum.append(expend_col) + if budg_col in subset.columns: + cols_to_sum.append(budg_col) + + if cols_to_sum: + work = subset[[activ_descrip_col, cost_col] + cols_to_sum].copy() + work[[activ_descrip_col, cost_col]] = work[[activ_descrip_col, cost_col]].fillna("") + combos = work.groupby([activ_descrip_col, cost_col], as_index=False)[cols_to_sum].sum(numeric_only=True) + else: + combos = subset[[activ_descrip_col, cost_col]].drop_duplicates().fillna("") + + if combos.empty: + print(" (no Description/Cost entries)") + continue + for _, row in combos.iterrows(): + desc = row.get(activ_descrip_col, "") or "(empty)" + cost = row.get(cost_col, "") or "(empty)" + if cols_to_sum: + expend_total = row.get(expend_col) + budg_total = row.get(budg_col) + expend_str = f"{expend_total:,.2f}" if pd.notna(expend_total) else "(no data)" + budg_str = f"{budg_total:,.2f}" if pd.notna(budg_total) else "(no data)" + print(f" - Description: {desc} | Cost Sub-Type: {cost} | FY2019 Exp: {expend_str} | FY2020 Budget: {budg_str}") + else: + print(f" - Description: {desc} | Cost Sub-Type: {cost}") + + def print_cost_by_programme(df: pd.DataFrame) -> None: + proj_col = "Project Name " + expend_col = "FY Ending 2019 EXPENDITURE (USD)(Jul 2018 - Jun 2019)" + budg_col = "FY Ending 2020 BUDGETS (USD)(Jul 2019 - Jun 2020)" + + for c in (proj_col, expend_col, budg_col): + if c not in df.columns: + print(f"\nColumn `{c}` not found in dataframe. Available columns:\n{df.columns}") + return + + grouped = df.groupby(proj_col, dropna=False)[[expend_col, budg_col]].sum(numeric_only=True) + + # summary statistics across projects (use only projects with numeric values) + expend_series = grouped[expend_col].dropna() + budg_series = grouped[budg_col].dropna() + + def fmt(v): + return f"{v:,.0f}" if pd.notna(v) else "(no data)" + + ex_min = expend_series.min() if not expend_series.empty else float("nan") + ex_max = expend_series.max() if not expend_series.empty else float("nan") + ex_median = expend_series.median() if not expend_series.empty else float("nan") + ex_mean = expend_series.mean() if not expend_series.empty else float("nan") + ex_sum = expend_series.sum() if not expend_series.empty else float("nan") + + bd_min = budg_series.min() if not budg_series.empty else float("nan") + bd_max = budg_series.max() if not budg_series.empty else float("nan") + bd_median = budg_series.median() if not budg_series.empty else float("nan") + bd_mean = budg_series.mean() if not budg_series.empty else float("nan") + bd_sum = budg_series.sum() if not budg_series.empty else float("nan") + + print("\nAcross-project statistics (per-project totals) 2018 USD:") + ex_low = expend_series.sort_values().head(5) + ex_high = expend_series.sort_values().tail(5) + ex_low_str = "; ".join(fmt(val) for val in ex_low) if not ex_low.empty else "(no data)" + ex_high_str = "; ".join(fmt(val) for val in ex_high) if not ex_high.empty else "(no data)" + print(f"FY 2018/19 Expenditure per project — | min: {fmt(ex_min)} | median: {fmt(ex_median)} | max {fmt(ex_max)} | mean: {fmt(ex_mean)} " + f"| sum over all projects: {fmt(ex_sum)}\n" + f" — | lowest 5: {ex_low_str} | highest 5: {ex_high_str} ") + bd_low = budg_series.sort_values().head(5) + bd_high = budg_series.sort_values().tail(5) + bd_low_str = "; ".join(fmt(val) for val in bd_low) if not bd_low.empty else "(no data)" + bd_high_str = "; ".join(fmt(val) for val in bd_high) if not bd_high.empty else "(no data)" + print(f"\nFY 2019/20 Budget per project — | min: {fmt(bd_min)} | median: {fmt(bd_median)} | max {fmt(bd_max)} | mean: {fmt(bd_mean)} " + f"| sum over all projects: {fmt(bd_sum)}\n" + f" — | lowest 5: {bd_low_str} | highest 5: {bd_high_str} ") + + # 2023 USD equivalents (apply multiplier) + multiplier = 1.0165 * 1.0133 * 1.0457 * 1.0713 * 1.0360 + ex_low_2023 = ex_low * multiplier if not ex_low.empty else ex_low + ex_high_2023 = ex_high * multiplier if not ex_high.empty else ex_high + ex_low_2023_str = "; ".join(fmt(val) for val in ex_low_2023) if not ex_low_2023.empty else "(no data)" + ex_high_2023_str = "; ".join(fmt(val) for val in ex_high_2023) if not ex_high_2023.empty else "(no data)" + ex_min_2023 = ex_min * multiplier if pd.notna(ex_min) else float("nan") + ex_max_2023 = ex_max * multiplier if pd.notna(ex_max) else float("nan") + ex_median_2023 = ex_median * multiplier if pd.notna(ex_median) else float("nan") + ex_mean_2023 = ex_mean * multiplier if pd.notna(ex_mean) else float("nan") + ex_sum_2023 = ex_sum * multiplier if pd.notna(ex_sum) else float("nan") + + bd_low_2023 = bd_low * multiplier if not bd_low.empty else bd_low + bd_high_2023 = bd_high * multiplier if not bd_high.empty else bd_high + bd_low_2023_str = "; ".join(fmt(val) for val in bd_low_2023) if not bd_low_2023.empty else "(no data)" + bd_high_2023_str = "; ".join(fmt(val) for val in bd_high_2023) if not bd_high_2023.empty else "(no data)" + bd_min_2023 = bd_min * multiplier if pd.notna(bd_min) else float("nan") + bd_max_2023 = bd_max * multiplier if pd.notna(bd_max) else float("nan") + bd_median_2023 = bd_median * multiplier if pd.notna(bd_median) else float("nan") + bd_mean_2023 = bd_mean * multiplier if pd.notna(bd_mean) else float("nan") + bd_sum_2023 = bd_sum * multiplier if pd.notna(bd_sum) else float("nan") + + print(f"\nIn 2023 USD (multiplier = {multiplier:.6f}):") + print(f"FY 2018/19 Expenditure per project — | min: {fmt(ex_min_2023)} | median: {fmt(ex_median_2023)} | max {fmt(ex_max_2023)} | mean: {fmt(ex_mean_2023)} " + f"| sum over all projects: {fmt(ex_sum_2023)}\n" + f" — | lowest 5: {ex_low_2023_str} | highest 5: {ex_high_2023_str} ") + print(f"\nFY 2019/20 Budget per project — | min: {fmt(bd_min_2023)} | median: {fmt(bd_median_2023)} | max {fmt(bd_max_2023)} | mean: {fmt(bd_mean_2023)} " + f"| sum over all projects: {fmt(bd_sum_2023)}\n" + f" — | lowest 5: {bd_low_2023_str} | highest 5: {bd_high_2023_str} ") + + # Additional statistics considering only projects with positive (> 0) expenditure/budget + pos_expend = expend_series[expend_series > 0] + pos_budg = budg_series[budg_series > 0] + + def make_stats(series): + if series.empty: + empty_series = pd.Series(dtype=float) + return { + "min": float("nan"), "max": float("nan"), "median": float("nan"), "mean": float("nan"), + "sum": float("nan"), "low5": empty_series, "high5": empty_series + } + return { + "min": series.min(), + "max": series.max(), + "median": series.median(), + "mean": series.mean(), + "sum": series.sum(), + "low5": series.sort_values().head(5), + "high5": series.sort_values().tail(5) + } + + pos_ex_stats = make_stats(pos_expend) + pos_bd_stats = make_stats(pos_budg) + + pos_ex_low = pos_ex_stats["low5"] + pos_ex_high = pos_ex_stats["high5"] + pos_bd_low = pos_bd_stats["low5"] + pos_bd_high = pos_bd_stats["high5"] + + pos_ex_low_str = "; ".join(fmt(val) for val in pos_ex_low) if not pos_ex_low.empty else "(no data)" + pos_ex_high_str = "; ".join(fmt(val) for val in pos_ex_high) if not pos_ex_high.empty else "(no data)" + pos_bd_low_str = "; ".join(fmt(val) for val in pos_bd_low) if not pos_bd_low.empty else "(no data)" + pos_bd_high_str = "; ".join(fmt(val) for val in pos_bd_high) if not pos_bd_high.empty else "(no data)" + + print("\nStatistics for projects with positive (>0) values:") + print(f"FY 2018/19 Expenditure per project (positive only) — | min: {fmt(pos_ex_stats['min'])} | median: {fmt(pos_ex_stats['median'])} | max: {fmt(pos_ex_stats['max'])} | mean: {fmt(pos_ex_stats['mean'])} " + f"| sum over all projects: {fmt(pos_ex_stats['sum'])}\n" + f" — | lowest 5: {pos_ex_low_str} | highest 5: {pos_ex_high_str}") + print(f"\nFY 2019/20 Budget per project (positive only) — | min: {fmt(pos_bd_stats['min'])} | median: {fmt(pos_bd_stats['median'])} | max: {fmt(pos_bd_stats['max'])} | mean: {fmt(pos_bd_stats['mean'])} " + f"| sum over all projects: {fmt(pos_bd_stats['sum'])}\n" + f" — | lowest 5: {pos_bd_low_str} | highest 5: {pos_bd_high_str}") + + # 2023 USD equivalents for positive-only stats (apply multiplier) + # reuse `multiplier` defined earlier + pos_ex_low_2023 = pos_ex_low * multiplier if not getattr(pos_ex_low, "empty", False) else pos_ex_low + pos_ex_high_2023 = pos_ex_high * multiplier if not getattr(pos_ex_high, "empty", False) else pos_ex_high + pos_bd_low_2023 = pos_bd_low * multiplier if not getattr(pos_bd_low, "empty", False) else pos_bd_low + pos_bd_high_2023 = pos_bd_high * multiplier if not getattr(pos_bd_high, "empty", False) else pos_bd_high + + pos_ex_min_2023 = pos_ex_stats["min"] * multiplier if pd.notna(pos_ex_stats["min"]) else float("nan") + pos_ex_max_2023 = pos_ex_stats["max"] * multiplier if pd.notna(pos_ex_stats["max"]) else float("nan") + pos_ex_median_2023 = pos_ex_stats["median"] * multiplier if pd.notna(pos_ex_stats["median"]) else float("nan") + pos_ex_mean_2023 = pos_ex_stats["mean"] * multiplier if pd.notna(pos_ex_stats["mean"]) else float("nan") + pos_ex_sum_2023 = pos_ex_stats["sum"] * multiplier if pd.notna(pos_ex_stats["sum"]) else float("nan") + + pos_bd_min_2023 = pos_bd_stats["min"] * multiplier if pd.notna(pos_bd_stats["min"]) else float("nan") + pos_bd_max_2023 = pos_bd_stats["max"] * multiplier if pd.notna(pos_bd_stats["max"]) else float("nan") + pos_bd_median_2023 = pos_bd_stats["median"] * multiplier if pd.notna(pos_bd_stats["median"]) else float("nan") + pos_bd_mean_2023 = pos_bd_stats["mean"] * multiplier if pd.notna(pos_bd_stats["mean"]) else float("nan") + pos_bd_sum_2023 = pos_bd_stats["sum"] * multiplier if pd.notna(pos_bd_stats["sum"]) else float("nan") + + pos_ex_low_2023_str = "; ".join(fmt(val) for val in pos_ex_low_2023) if not getattr(pos_ex_low_2023, "empty", True) else "(no data)" + pos_ex_high_2023_str = "; ".join(fmt(val) for val in pos_ex_high_2023) if not getattr(pos_ex_high_2023, "empty", True) else "(no data)" + pos_bd_low_2023_str = "; ".join(fmt(val) for val in pos_bd_low_2023) if not getattr(pos_bd_low_2023, "empty", True) else "(no data)" + pos_bd_high_2023_str = "; ".join(fmt(val) for val in pos_bd_high_2023) if not getattr(pos_bd_high_2023, "empty", True) else "(no data)" + + print(f"\nIn 2023 USD (multiplier = {multiplier:.6f}) for positive-only projects:") + print(f"FY 2018/19 Expenditure per project (positive only) — | min: {fmt(pos_ex_min_2023)} | median: {fmt(pos_ex_median_2023)} | max {fmt(pos_ex_max_2023)} | mean: {fmt(pos_ex_mean_2023)} " + f"| sum over all projects: {fmt(pos_ex_sum_2023)}\n" + f" — | lowest 5: {pos_ex_low_2023_str} | highest 5: {pos_ex_high_2023_str}") + print(f"\nFY 2019/20 Budget per project (positive only) — | min: {fmt(pos_bd_min_2023)} | median: {fmt(pos_bd_median_2023)} | max {fmt(pos_bd_max_2023)} | mean: {fmt(pos_bd_mean_2023)} " + f"| sum over all projects: {fmt(pos_bd_sum_2023)}\n" + f" — | lowest 5: {pos_bd_low_2023_str} | highest 5: {pos_bd_high_2023_str}") + + + print_cost_by_programme(df_nutr) diff --git a/src/scripts/wasting_analyses/plot_calib_outputs__using_analysis_utility_fncs_wast.py b/src/scripts/wasting_analyses/plot_calib_outputs__using_analysis_utility_fncs_wast.py new file mode 100644 index 0000000000..57a60fb258 --- /dev/null +++ b/src/scripts/wasting_analyses/plot_calib_outputs__using_analysis_utility_fncs_wast.py @@ -0,0 +1,16 @@ +""" +Visualise the calibration outcomes (i.e. comparison of modelled outcomes to data): + * prevalence of moderate and severe wasting among age groups in 2016 & 2020, +Prints modelled average annual direct deaths due to SAM, which are then used in the calib_analysis_wasting.py script. +""" + +from pathlib import Path + +import analysis_utility_functions_wast + +# ####### TO SET ####################################################################################################### +# Where to save the outcomes +calib_outputs_path = Path("./outputs/sejjej5@ucl.ac.uk/wasting/scenarios/_outcomes/calibration") +######################################################################################################################## + +analysis_utility_functions_wast.calc_calibration_annual_death_CIs(calib_outputs_path) diff --git a/src/scripts/wasting_analyses/run_costing_analysis_wast.py b/src/scripts/wasting_analyses/run_costing_analysis_wast.py new file mode 100644 index 0000000000..4dc1a053c3 --- /dev/null +++ b/src/scripts/wasting_analyses/run_costing_analysis_wast.py @@ -0,0 +1,378 @@ +""" +Produces cost analysis outputs for wasting paper +TODO: add more details +""" + +# %% Import statements +import time +from pathlib import Path + +import pandas as pd + +from src.scripts.costing.cost_estimation import ( + do_stacked_bar_plot_of_cost_by_category, + estimate_input_cost_of_scenarios, +) +from tlo import Date +from tlo.analysis.utils import ( + get_scenario_info, + get_scenario_outputs, + load_pickled_dataframes, +) + + +def run_costing_analysis_wast(cost_outcome_folderpath: Path, SQ_timestamp: str, scen_timestamps_suffix: str, + force_calculation: list): + # `start time of the analysis + total_time_start = time.time() + + # Save resource, output, outcome data, and figure output paths + resourcefilepath = Path("./resources") # resources (parameters etc) + outputfilepath = Path('./outputs/sejjej5@ucl.ac.uk/wasting/scenarios/costing_outputs') # simulated data + figurespath = Path('./outputs/sejjej5@ucl.ac.uk/wasting/scenarios/_outcomes') # figures + + # Load result files + # ------------------------------------------------------------------------------------------------------------------ + results_folder = get_scenario_outputs(f'costing-{SQ_timestamp}.py', outputfilepath)[0] + + # Check can read results from draw=0, run=0 + load_pickled_dataframes(results_folder, 0, 0) # look at one log (so can decide what to extract) + # params = extract_params(results_folder) + get_scenario_info(results_folder) + + # Declare default parameters for cost analysis + # ------------------------------------------------------------------------------------------------------------------ + # Period relevant for costing + TARGET_PERIOD = (Date(2026, 1, 1), Date(2030, 12, 31)) + relevant_period_for_costing = [i.year for i in TARGET_PERIOD] + list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1)) + list_of_years_for_plot = list(range(2026, 2031)) + # number_of_years_costed = relevant_period_for_costing[1] - relevant_period_for_costing[0] + 1 + + # Scenarios + cost_scenarios = {0: "SQ", 1: "GM", 2: "CS", 3: "FS", 4:"GM_FS", 5:"CS_FS", 6:"GM_CS_FS", 7:"GM_CS"} + + # Costing parameters + discount_rate = 0.03 + # discount_rate_lomas = {2023: 0.0036, 2024: 0.0040, 2025: 0.0039, 2026: 0.0042, 2027: 0.0042, 2028: 0.0041, + # 2029: 0.0041, 2030: 0.0040}# get the list of discount rates from 2023 until 2030 + + # Estimate standard input costs of scenario + # ----------------------------------------------------------------------------------------------------------------------- + cost_scenarios_draw_nmbs = list(cost_scenarios.keys()) + + input_costs_file_path = cost_outcome_folderpath / f"input_cost_outcomes_{SQ_timestamp}.pkl" + # Remove the file if already exists + if input_costs_file_path.exists(): + try: + input_costs_file_path.unlink() + except Exception as e: + print(f"warning: could not remove existing file {input_costs_file_path}: {e}") + print("\ninput cost outcomes calculation ...") + # Standard 3% discount rate + input_costs = estimate_input_cost_of_scenarios( + results_folder, resourcefilepath, _draws=cost_scenarios_draw_nmbs, + _years=list_of_relevant_years_for_costing, cost_only_used_staff=True, + _discount_rate=discount_rate, summarize=True + ) + print("saving input cost outcomes to file ...") + input_costs.to_pickle(input_costs_file_path) + + # pd.set_option('display.max_columns', None) # Show all columns + # pd.set_option('display.max_rows', None) # Show all rows + # pd.set_option('display.max_colwidth', None) # Show full content of each row + # print(f"\ninput_costs:\n{input_costs}") + # print(f"\ninput_costs index:\n{input_costs.index}," + # f"\ninput_costs columns:\n{input_costs.columns}") + # print(f"\nUnique cost_category values:\n{input_costs['cost_category'].unique()}") + # + # print(f"\ninput_costs (medical consumables only):\n{input_costs[input_costs['cost_category'] == 'medical consumables']}") + + + + # # Undiscounted costs + # input_costs_undiscounted = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = cost_scenarios_draw_nmbs, + # _years=list_of_relevant_years_for_costing, cost_only_used_staff=True, + # _discount_rate = 0, summarize = True) + # + # # Cost with variable discount rate based on Lomas et al (2021) + # input_costs_variable_discounting = estimate_input_cost_of_scenarios(results_folder, resourcefilepath, _draws = cost_scenarios_draw_nmbs, + # _years=list_of_relevant_years_for_costing, cost_only_used_staff=True, + # _discount_rate = discount_rate_lomas, summarize = True) + + # Get overall estimates for main text + # ----------------------------------------------------------------------------------------------------------------------- + # cost_by_draw = input_costs.groupby(['draw', 'stat'])['cost'].sum() + # undiscounted_cost_by_draw = input_costs_undiscounted.groupby(['draw', 'stat'])['cost'].sum() + + # Abstract + # consumable_cost_by_draw = input_costs[(input_costs.cost_category == 'medical consumables') & (input_costs.stat == 'mean')].groupby(['draw'])['cost'].sum() + # print(f"Under current system capacity, total healthcare delivery costs for 2023–2030 were estimated at \$" + # f"{cost_by_draw[0,'mean']/1e9:,.2f} billion [95\% confidence interval (CI), \${cost_by_draw[0,'lower']/1e9:,.2f}b - \${cost_by_draw[0,'upper']/1e9:,.2f}b], averaging \$" + # f"{undiscounted_cost_by_draw[0,'mean']/1e6/number_of_years_costed:,.2f} million [\${undiscounted_cost_by_draw[0,'lower']/1e6/number_of_years_costed:,.2f}m - \${undiscounted_cost_by_draw[0,'upper']/1e6/number_of_years_costed:,.2f}m] annually." + # f" Scenario analysis revealed the importance of health system interdependencies: improving consumable availability alone led to a modest " + # f"{(consumable_cost_by_draw[5]/consumable_cost_by_draw[0] - 1) * 100:.2f}\%" + # f" increase in consumables cost due to constraints in the health workforce. In contrast, expanding human resources for health (HRH) increased consumables costs by " + # f"{(consumable_cost_by_draw[3]/consumable_cost_by_draw[0] - 1) * 100:.2f}\%" + # f", while jointly expanding HRH and consumable availability raised consumables costs by " + # f"{(consumable_cost_by_draw[8]/consumable_cost_by_draw[0] - 1) * 100:.2f}\%, " + # f"illustrating how bottlenecks in one component limit the effect of changes in another.") + # Results 1 + # print(f"The total cost of healthcare delivery in Malawi between 2023 and 2030 was estimated to be " + # f"\${cost_by_draw[0,'mean']/1e9:,.2f} billion [95\% confidence interval (CI), \${cost_by_draw[0,'lower']/1e9:,.2f}b - \${cost_by_draw[0,'upper']/1e9:,.2f}b], under the actual scenario, and increased to " + # f"\${cost_by_draw[5,'mean']/1e9:,.2f} billion [\${cost_by_draw[5,'lower']/1e9:,.2f}b - \${cost_by_draw[5,'upper']/1e9:,.2f}b] under the improved consumable availability scenario, " + # f"followed by \${cost_by_draw[3,'mean']/1e9:,.2f} billion [\${cost_by_draw[3,'lower']/1e9:,.2f}b - \${cost_by_draw[3,'upper']/1e9:,.2f}b] under the expanded HRH scenario and finally " + # f"\${cost_by_draw[8,'mean']/1e9:,.2f} billion [\${cost_by_draw[8,'lower']/1e9:,.2f}b - \${cost_by_draw[8,'upper']/1e9:,.2f}b] under the expanded HRH + improved consumable availability scenario.") + # # Results 2 + # print(f"This translates to an average annual cost of " + # f"\${undiscounted_cost_by_draw[0,'mean']/1e6/number_of_years_costed:,.2f} million [\${undiscounted_cost_by_draw[0,'lower']/1e6/number_of_years_costed:,.2f}m - \${undiscounted_cost_by_draw[0,'upper']/1e6/number_of_years_costed:,.2f}m], under the actual scenario, " + # f"\${undiscounted_cost_by_draw[5,'mean']/1e6/number_of_years_costed:,.2f} million [\${undiscounted_cost_by_draw[5,'lower']/1e6/number_of_years_costed:,.2f}m - \${undiscounted_cost_by_draw[5,'upper']/1e6/number_of_years_costed:,.2f}m] under the improved consumable availability scenario, followed by " + # f"\${undiscounted_cost_by_draw[3,'mean']/1e6/number_of_years_costed:,.2f} million [\${undiscounted_cost_by_draw[3,'lower']/1e6/number_of_years_costed:,.2f}m - \${undiscounted_cost_by_draw[3,'upper']/1e6/number_of_years_costed:,.2f}m] under the expanded HRH scenario and finally " + # f"\${undiscounted_cost_by_draw[8,'mean']/1e6/number_of_years_costed:,.2f} million [\${undiscounted_cost_by_draw[8,'lower']/1e6/number_of_years_costed:,.2f}m - \${undiscounted_cost_by_draw[8,'upper']/1e6/number_of_years_costed:,.2f}m] under the expanded HRH + improved consumable availability scenario.") + # # Results 3 + # print(f"Notably, improving consumable availability alone increases the cost of medical consumables by just " + # f"{(consumable_cost_by_draw[5]/consumable_cost_by_draw[0] - 1) * 100:.2f}\% " + # f"because the limited health workforce (HRH) restricts the number of feasible appointments and, consequently, the quantity of consumables dispensed. " + # f"In contrast, expanding HRH alone raises consumable costs by " + # f"{(consumable_cost_by_draw[3]/consumable_cost_by_draw[0] - 1) * 100:.2f}\%" + # f". When both HRH and consumable availability are expanded together, consumable costs increase by " + # f"{(consumable_cost_by_draw[8]/consumable_cost_by_draw[0] - 1) * 100:.2f}\% " + # f"compared to the actual scenario.") + # # Results 4 + # cost_of_hiv_testing = input_costs[(input_costs.cost_subgroup == 'Test, HIV EIA Elisa') & (input_costs.stat == 'mean')].groupby(['draw'])['cost'].sum() + # print(f"For instance, the cost of HIV testing consumables increases by {(cost_of_hiv_testing[3]/cost_of_hiv_testing[0] - 1)*100:.2f}\% under the expanded HRH scenario and by " + # f"{(cost_of_hiv_testing[8]/cost_of_hiv_testing[0] - 1)*100:.2f}\% under the combined expanded HRH and improved consumable availability scenario, " + # f"while showing almost no change under the scenario with improved consumable availability alone") + + # Get figures for overview paper + # ----------------------------------------------------------------------------------------------------------------------- + # Figure 2: Estimated costs by cost category + # do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'all', _disaggregate_by_subgroup = False, + # _year = list_of_relevant_years_for_costing,show_title = False, + # _outputfilepath = figurespath, _scenario_dict = cost_scenarios) + + revised_consumable_subcategories = {'cost_of_separately_managed_medical_supplies_dispensed':'cost_of_consumables_dispensed', 'cost_of_excess_separately_managed_medical_supplies_stocked': 'cost_of_excess_consumables_stocked', 'supply_chain':'supply_chain'} + input_costs_new = input_costs.copy() + input_costs_new['cost_subcategory'] = input_costs_new['cost_subcategory'].map(revised_consumable_subcategories).fillna(input_costs_new['cost_subcategory']) + + # Figure 3: Estimated costs by cost sub-category + output_costs_medical = do_stacked_bar_plot_of_cost_by_category(_df = input_costs_new, _cost_category = 'medical consumables', _disaggregate_by_subgroup = False, + _year = list_of_years_for_plot, show_title = False, + _outputfilepath = figurespath, _scenario_dict = cost_scenarios, _add_figname_suffix=scen_timestamps_suffix) + output_costs_medical_file_path = cost_outcome_folderpath / f"output_costs_medical_outcomes_{SQ_timestamp}.pkl" + if output_costs_medical_file_path.exists(): + try: + output_costs_medical_file_path.unlink() + except Exception as e: + print(f"warning: could not remove existing file {output_costs_medical_file_path}: {e}") + print("saving output cost medical outcomes to file ...") + col_names = ['total', 'lower_bound', 'upper_bound'] + output_costs_medical_df = pd.DataFrame({name: t for name, t in zip(col_names, output_costs_medical)}) + output_costs_medical_df = output_costs_medical_df * 10 ** 6 + output_costs_medical_df['interv'] = output_costs_medical_df.index.map(cost_scenarios) + output_costs_medical_df = output_costs_medical_df.set_index('interv') + print("\noutput_costs_medical_df") + print(output_costs_medical_df) + output_costs_medical_df.to_pickle(output_costs_medical_file_path) + # do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'human resources for health', _disaggregate_by_subgroup = False, + # _year = list_of_years_for_plot, show_title = False, + # _outputfilepath = figurespath, _scenario_dict = cost_scenarios) + # do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'medical equipment', _disaggregate_by_subgroup = False, + # _year = list_of_years_for_plot, show_title = False, + # _outputfilepath = figurespath, _scenario_dict = cost_scenarios) + # do_stacked_bar_plot_of_cost_by_category(_df = input_costs, _cost_category = 'facility operating cost', _disaggregate_by_subgroup = False, + # _year = list_of_years_for_plot, show_title = False, + # _outputfilepath = figurespath, _scenario_dict = cost_scenarios) + + + # # Figure 4: Estimated costs by year + # do_line_plot_of_cost(_df = input_costs_undiscounted, _cost_category='all', + # _year=list_of_years_for_plot, _draws= [0], + # disaggregate_by= 'cost_category', + # _y_lim = 400, + # show_title = False, + # _outputfilepath = figurespath) + # do_line_plot_of_cost(_df = input_costs_undiscounted, _cost_category='all', + # _year=list_of_years_for_plot, _draws= [3], + # disaggregate_by= 'cost_category', + # _y_lim = 400, + # show_title = False, + # _outputfilepath = figurespath) + # do_line_plot_of_cost(_df = input_costs_undiscounted, _cost_category='all', + # _year=list_of_years_for_plot, _draws= [5], + # disaggregate_by= 'cost_category', + # _y_lim = 400, + # show_title = False, + # _outputfilepath = figurespath) + # do_line_plot_of_cost(_df = input_costs_undiscounted, _cost_category='all', + # _year=list_of_years_for_plot, _draws= [8], + # disaggregate_by= 'cost_category', + # _y_lim = 400, + # show_title = False, + # _outputfilepath = figurespath) + + # # Figure D1: Total cost by scenario assuming 0% discount rate + # do_stacked_bar_plot_of_cost_by_category(_df = input_costs_undiscounted, + # _cost_category = 'all', + # _year=list_of_years_for_plot, + # _disaggregate_by_subgroup = False, + # _outputfilepath = figurespath, + # _scenario_dict = cost_scenarios, + # _add_figname_suffix = '_UNDISCOUNTED') + # + # # Figure D2: Total cost by scenario assuming variable discount rates + # do_stacked_bar_plot_of_cost_by_category(_df = input_costs_variable_discounting, + # _cost_category = 'all', + # _year=list_of_years_for_plot, + # _disaggregate_by_subgroup = False, + # _outputfilepath = figurespath, + # _scenario_dict = cost_scenarios, + # _add_figname_suffix = '_VARIABLE_DISCOUNTING') + + + # Figure F1-F4: Cost by cost sub-group + #TODO: this might be useful + + # cost_categories = ['human resources for health', 'medical consumables', + # 'medical equipment', 'facility operating cost'] + # draws = input_costs.draw.unique().tolist() + # colourmap_for_consumables = {'First-line ART regimen: adult':'#1f77b4', + # 'Test, HIV EIA Elisa': '#ff7f0e', + # 'VL Test': '#2ca02c', + # 'Depot-Medroxyprogesterone Acetate 150 mg - 3 monthly': '#d62728', + # 'Oxygen, 1000 liters, primarily with oxygen cylinders': '#9467bd', + # 'Phenobarbital, 100 mg': '#8c564b', + # 'Rotavirus vaccine': '#e377c2', + # 'Carbamazepine 200mg_1000_CMST': '#7f7f7f', + # 'Infant resuscitator, clear plastic + mask + bag_each_CMST': '#bcbd22', + # 'Dietary supplements (country-specific)': '#17becf', + # 'Tenofovir (TDF)/Emtricitabine (FTC), tablet, 300/200 mg': '#2b8cbe', + # 'Pneumococcal vaccine': '#fdae61', + # 'Pentavalent vaccine (DPT, Hep B, Hib)': '#d73027', + # 'male circumcision kit, consumables (10 procedures)_1_IDA': '#756bb1', + # 'Jadelle (implant), box of 2_CMST': '#ffdd44', + # 'Urine analysis': '#66c2a5'} + + # for _cat in cost_categories: + # for _d in draws: + # if _cat == 'medical consumables': + # create_summary_treemap_by_cost_subgroup(_df = input_costs, _year = list_of_years_for_plot, + # _cost_category = _cat, _draw = _d, _color_map=colourmap_for_consumables, + # show_title= False, _label_fontsize= 8, _outputfilepath=figurespath) + # else: + # create_summary_treemap_by_cost_subgroup(_df=input_costs, _year=list_of_years_for_plot, + # _cost_category=_cat, _draw=_d, show_title= False, + # _label_fontsize= 8.5, _outputfilepath=figurespath) + + + # # Get tables for overview paper + # # ----------------------------------------------------------------------------------------------------------------------- + # # Group data and aggregate cost for each draw and stat + # def generate_detail_cost_table(_groupby_var, _groupby_var_name, _longtable = False): + # edited_input_costs = input_costs.copy() + # edited_input_costs[_groupby_var] = edited_input_costs[_groupby_var].replace('_', ' ', regex=True) + # edited_input_costs[_groupby_var] = edited_input_costs[_groupby_var].replace('%', '\%', regex=True) + # edited_input_costs[_groupby_var] = edited_input_costs[_groupby_var].replace('&', '\&', regex=True) + # + # grouped_costs = edited_input_costs.groupby(['cost_category', _groupby_var, 'draw', 'stat'])['cost'].sum() + # # Format the 'cost' values before creating the LaTeX table + # grouped_costs = grouped_costs.apply(lambda x: f"{float(x):,.0f}") + # # Remove underscores from all column values + # + # # Create a pivot table to restructure the data for LaTeX output + # pivot_data = {} + # for draw in cost_scenarios_draw_nmbs: + # draw_data = grouped_costs.xs(draw, level='draw').unstack(fill_value=0) # Unstack to get 'stat' as columns + # # Concatenate 'mean' with 'lower-upper' in the required format + # pivot_data[draw] = draw_data['mean'].astype(str) + ' [' + \ + # draw_data['lower'].astype(str) + '-' + \ + # draw_data['upper'].astype(str) + ']' + # + # # Combine draw data into a single DataFrame + # table_data = pd.concat([pivot_data[0], pivot_data[3], pivot_data[5], pivot_data[8]], axis=1, keys=['draw=0', 'draw=3', 'draw=5', 'draw=8']).reset_index() + # + # # Rename columns for clarity + # table_data.columns = ['Cost Category', _groupby_var_name, 'Actual', 'Expanded HRH', 'Improved consumable availability', 'Expanded HRH +\n Improved consumable availability'] + # + # # Replace '\n' with '\\' for LaTeX line breaks + # #table_data['Real World'] = table_data['Real World'].apply(lambda x: x.replace("\n", "\\\\")) + # #table_data['Perfect Health System'] = table_data['Perfect Health System'].apply(lambda x: x.replace("\n", "\\\\")) + # + # # Convert to LaTeX format with horizontal lines after every row + # latex_table = table_data.to_latex( + # longtable=_longtable, # Use the longtable environment for large tables + # column_format='|R{3cm}|R{3cm}|R{2.2cm}|R{2.2cm}|R{2.2cm}|R{2.2cm}|', + # caption=f"Summarized Costs by Category and {_groupby_var_name}", + # label=f"tab:cost_by_{_groupby_var}", + # position="h", + # index=False, + # escape=False, # Prevent escaping special characters like \n + # header=True + # ) + # + # # Add \hline after the header and after every row for horizontal lines + # latex_table = latex_table.replace("\\\\", "\\\\ \\hline") # Add \hline after each row + # #latex_table = latex_table.replace("_", " ") # Add \hline after each row + # + # # Specify the file path to save + # latex_file_path = figurespath / f'cost_by_{_groupby_var}.tex' + # + # # Write to a file + # with open(latex_file_path, 'w') as latex_file: + # latex_file.write(latex_table) + # + # # Print latex for reference + # print(latex_table) + + # # Table F1: Cost by cost subcategory + # generate_detail_cost_table(_groupby_var = 'cost_subcategory', _groupby_var_name = 'Cost Subcategory', _longtable = True) + # # Table F2: Cost by cost subgroup + # generate_detail_cost_table(_groupby_var = 'cost_subgroup', _groupby_var_name = 'Category Subgroup', _longtable = True) + + # # Figure E1: Consumable inflow to outflow ratio figure + # # ----------------------------------------------------------------------------------------------------------------------- + # inflow_to_outflow_ratio = pd.read_csv(resourcefilepath / "costing/ResourceFile_Consumables_Inflow_Outflow_Ratio.csv") + # + # # Clean category names for plot + # clean_category_names = {'cancer': 'Cancer', 'cardiometabolicdisorders': 'Cardiometabolic Disorders', + # 'contraception': 'Contraception', 'general': 'General', 'hiv': 'HIV', 'malaria': 'Malaria', + # 'ncds': 'Non-communicable Diseases', 'neonatal_health': 'Neonatal Health', + # 'other_childhood_illnesses': 'Other Childhood Illnesses', 'reproductive_health': 'Reproductive Health', + # 'road_traffic_injuries': 'Road Traffic Injuries', 'tb': 'Tuberculosis', + # 'undernutrition': 'Undernutrition'} + # inflow_to_outflow_ratio['category'] = inflow_to_outflow_ratio['item_category'].map(clean_category_names) + # + # + # def plot_inflow_to_outflow_ratio(_df, groupby_var, _outputfilepath): + # # Plot the bar plot with gray bars + # plt.figure(figsize=(10, 6)) + # sns.barplot(data=_df, x=groupby_var, y='inflow_to_outflow_ratio', errorbar=None, color="gray") + # + # # Add points representing the distribution of individual values + # sns.stripplot(data=_df, x=groupby_var, y='inflow_to_outflow_ratio', color='black', size=5, alpha=0.2) + # + # # Wrap x-axis labels ONLY if they are strings and longer than 15 characters + # labels = [] + # for label in _df[groupby_var].unique(): + # if isinstance(label, str) and len(label) > 15: + # labels.append(textwrap.fill(label, width=15)) + # else: + # labels.append(label) + # plt.xticks(ticks=range(len(labels)), labels=labels, rotation=90, ha='center') + # + # # Set labels and title + # plt.xlabel(groupby_var) + # plt.ylabel('Inflow to Outflow Ratio') + # + # # Show and save plot + # plt.tight_layout() + # plt.savefig(_outputfilepath / f'inflow_to_outflow_ratio_by_{groupby_var}.png') + # plt.close() + # + # plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'fac_type_tlo', _outputfilepath = figurespath) + # plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'district', _outputfilepath = figurespath) + # plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'item_code', _outputfilepath = figurespath) + # plot_inflow_to_outflow_ratio(inflow_to_outflow_ratio, 'category', _outputfilepath = figurespath) + + total_time_end = time.time() + print(f"\ntotal running time (s) of cost calculations: {(total_time_end - total_time_start)}") diff --git a/src/scripts/wasting_analyses/run_interventions_analysis_wasting.py b/src/scripts/wasting_analyses/run_interventions_analysis_wasting.py new file mode 100644 index 0000000000..3480c189ca --- /dev/null +++ b/src/scripts/wasting_analyses/run_interventions_analysis_wasting.py @@ -0,0 +1,739 @@ +""" +An analysis file for the wasting module to compare outcomes of one intervention under multiple assumptions. +""" + +# %% Import statements +import pickle +import time +from pathlib import Path + +import analysis_utility_functions_wast as util_fncs +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +from matplotlib.backends.backend_pdf import PdfPages + +from tlo.analysis.utils import get_scenario_outputs + +# start time of the whole analysis +total_time_start = time.time() + +# ####### TO SET ####################################################################################################### +# Create dicts for the intervention scenarios. 'Interv_abbrev': {'Intervention scenario title/abbreviation': draw_nmb} +# scenarios_dict = { +# 'SQ': {'Status Quo': 0}, +# 'GM': {'GM_all': 0, 'GM_1-2': 1, 'GM_FullAttend': 2}, +# 'CS': {'CS_10': 0, 'CS_30': 1, 'CS_50': 2, 'CS_100': 3}, +# 'FS': {'FS_70':0, 'FS_Full': 1} +# } +scenarios_dict = { + 'SQ': {'Status Quo': 0}, + 'GM': {'GM': 0}, # 'GM_all': 0, 'GM_1-2': 1, 'GM_FullAttend': 2}, + 'CS': {'CS': 0}, # 'CS_10': 0 ,'CS_30': 1, 'CS_50': 2, 'CS_100': 3}, + 'FS': {'FS': 0}, # 'FS_70':0, 'FS_Full': 1} + 'GM_FS': {'GM_FS': 0}, + 'CS_FS': {'CS_FS': 0}, + 'GM_CS_FS': {'GM_CS_FS': 0}, + 'GM_CS': {'GM_CS': 0}, +} +# Set the intervention to be analysed, and for which years they were simulated +intervs_all = ['SQ', 'GM', 'CS', 'FS', 'GM_CS', 'GM_FS', 'CS_FS', 'GM_CS_FS'] +intervs_of_interest = ['GM', 'CS', 'FS', 'GM_CS', 'GM_FS', 'CS_FS', 'GM_CS_FS'] +intervention_years = list(range(2026, 2031)) +scenarios_to_compare = ['GM', 'CS', 'FS', 'GM_CS', 'GM_FS', 'CS_FS', 'GM_CS_FS'] +# Which years to plot (from post burn-in period) +plot_years = list(range(2015, 2032)) +# Plot settings +legend_fontsize = 12 +title_fontsize = 16 + +# Where to find the modelled intervention scenarios +interv_scenarios_folder_path = Path("./outputs/sejjej5@ucl.ac.uk/wasting/scenarios") +# Files names prefix +scenario_filename_prefix = 'wasting_analysis__full_model' +# Where to save the outcomes +outputs_path = Path("./outputs/sejjej5@ucl.ac.uk/wasting/scenarios/_outcomes") +cohorts_to_plot = ['Under-5'] # ['Neonatal', 'Under-5'] # +# force_calculation of [births_data, deaths_data, dalys_data, tx_data, medical_cost_data, all_cost_data, pop_sizes_data], +# if True, enables to force recalculation of the corresponding data +force_calculation = [False, False, False, False, False, False, False] +# force_calculation = [False, False, False, False, False, True, False] +# force_calculation = [True, True, True, True, True, True, True] +############################################################## ######################################################## +assert all(interv in intervs_all for interv in intervs_of_interest), ("Some interventions in intervs_of_interest are not" + "in intervs_all") +# Ensure Status Quo is always included within the both intervs_of_interest and scenarios_to_compare +if 'SQ' not in intervs_of_interest: + intervs_of_interest = intervs_of_interest + ['SQ'] +if 'Status Quo' not in scenarios_to_compare: + scenarios_to_compare = scenarios_to_compare + ['Status Quo'] + +def run_interventions_analysis_wasting(outputspath:Path, plotyears:list, interventionyears:list, + intervs_ofinterest:list, scenarios_tocompare, intervsall) -> None: + """ + This function saves outcomes from analyses conducted for the Janoušková et al. (2025) paper on acute malnutrition. + + The analyses examine the impact of improved screening or treatment coverage. + * Outcome 1: + line plots for each intervs_ofinterest to compare mortality rate over time under multiple settings of the + intervention and the status quo scenarios + * Outcome 2: + line plots to compare mean deaths over time for scenarios_tocompare to each other + * Outcome 3: + bars to compare sum of deaths over intervention period for scenarios_tocompare to each other + + :param outputspath: Path to the directory to save output plots/tables; + Data calculated during analysis will be saved in outputspath/outcomes_data folder for later use + :param plotyears: The years to be included in the plots/tables + :param interventionyears: The years during which an intervention is implemented (if any) + :param intervs_ofinterest: List of interventions to plot scenarios with multiple settings of those interventions; + (SQ = status quo, GM = growth monitoring, CS = care-seeking, FS = food supplements) + :param scenarios_tocompare: List of scenarios to be plotted together for comparison + :param intervsall: List of all interventions + """ + + # deaths and dalys data are extracted for the whole year, which means when plotted in discrete times, at the point + # of year xxxx, which is beginning of the year data from xxxx-1 year needs to be plotted + datayears = [year-1 for year in plotyears] + # when plotting means for intervention years, it needs to be plotted from the first year of interventions being + # implemented until the beginning of year after last year of interventions + interv_plotyears = interventionyears + [interventionyears[-1] + 1] + # to plot the mean for year xxxx, since it shows as in first day of the year, the data from the end of previous + # years need to be used + interv_datayears = [year-1 for year in interv_plotyears] + + print("\n----------------------------") + print(" --- MAIN ANALYSES ---") + # Find the most recent folders containing results for each intervention + iterv_folders_dict = { + interv: get_scenario_outputs( + scenario_filename_prefix, Path(interv_scenarios_folder_path / interv) + )[-1] for interv in intervs_ofinterest + } + interv_timestamps_dict = { + interv: get_scenario_outputs( + scenario_filename_prefix, Path(interv_scenarios_folder_path / interv) + )[-1].name.split(f"{scenario_filename_prefix}_{interv}-")[-1] + for interv in intervs_ofinterest + } + print(f"\n{interv_timestamps_dict=}") + # Define folders for each scenario + scenario_folders = { + interv: { + scen_name: Path(iterv_folders_dict[interv] / str(scen_draw_nmb)) + for scen_name, scen_draw_nmb in scenarios_dict[interv].items() + } + for interv in intervs_ofinterest + } + + pd.set_option('display.max_columns', None) # Show all columns + pd.set_option('display.max_rows', None) # Show all rows + pd.set_option('display.max_colwidth', None) # Show full content of each row + + # --------------------------------- NEONATAL AND UNDER-5 BIRTH AND DEATH OUTCOMES -------------------------------- # + # Define paths for saving/loading outcomes + birth_outcomes_path = outputspath / f"outcomes_data/birth_outcomes_{'_'.join(interv_timestamps_dict.values())}.pkl" + death_outcomes_path = outputspath / f"outcomes_data/death_outcomes_{'_'.join(interv_timestamps_dict.values())}.pkl" + dalys_outcomes_path = outputspath / f"outcomes_data/dalys_outcomes_{'_'.join(interv_timestamps_dict.values())}.pkl" + pop_sizes_outcomes_path \ + = outputspath / f"outcomes_data/pop_sizes_outcomes_{'_'.join(interv_timestamps_dict.values())}.pkl" + + # Extract or load birth outcomes + if birth_outcomes_path.exists() and not force_calculation[0]: + print("\nloading birth outcomes from file ...") + with birth_outcomes_path.open("rb") as f: + birth_outcomes_dict = pickle.load(f) + else: + print("\nbirth outcomes calculation ...") + birth_outcomes_dict = { + interv: util_fncs.extract_birth_data_frames_and_outcomes( + iterv_folders_dict[interv], datayears, interv_datayears, interv + ) + for interv in scenario_folders + } + print("saving birth outcomes to file ...") + with birth_outcomes_path.open("wb") as f: + pickle.dump(birth_outcomes_dict, f) + # TODO: rm + # print("\nBIRTH OUTCOMES") + # for interv in birth_outcomes_dict.keys(): + # print(f"### {interv=}") + # for outcome in birth_outcomes_dict[interv]: + # print(f"{outcome}:\n{birth_outcomes_dict[interv][outcome]}") + # + + # Extract or load death outcomes + if death_outcomes_path.exists() and not force_calculation[1]: + print("\nloading death outcomes from file ...") + with death_outcomes_path.open("rb") as f: + death_outcomes_dict = pickle.load(f) + else: + print("\ndeath outcomes calculation ...") + sq_deaths = util_fncs.extract_death_data_frames_and_outcomes( + iterv_folders_dict['SQ'], birth_outcomes_dict['SQ']["births_df"], datayears, interventionyears, 'SQ' + ) + death_outcomes_dict = { + interv: util_fncs.extract_death_data_frames_and_outcomes( + iterv_folders_dict[interv], birth_outcomes_dict[interv]['births_df'], datayears, interventionyears, + interv, sq_deaths + ) for interv in scenario_folders + } + print("saving death outcomes to file ...") + with death_outcomes_path.open("wb") as f: + pickle.dump(death_outcomes_dict, f) + # # TODO: rm + # print("\nDEATH OUTCOMES") + # for interv in death_outcomes_dict.keys(): + # print(f"### {interv=}") + # for outcome in death_outcomes_dict[interv]: + # print(f"{outcome}:\n{death_outcomes_dict[interv][outcome]}") + # # + + # Extract or load dalys outcomes + if dalys_outcomes_path.exists() and not force_calculation[2]: + print("\nloading dalys outcomes from file ...") + with dalys_outcomes_path.open("rb") as f: + dalys_outcomes_dict = pickle.load(f) + # SQ_dalys = dalys_outcomes_dict['SQ'] + else: + print("\ndalys outcomes for intervention period calculation ...") + sq_dalys = util_fncs.extract_daly_data_frames_and_outcomes( + iterv_folders_dict['SQ'], datayears, interventionyears, 'SQ' + ) + dalys_outcomes_dict = { + interv: util_fncs.extract_daly_data_frames_and_outcomes( + iterv_folders_dict[interv], datayears, interventionyears, interv, sq_dalys + ) for interv in scenario_folders + } + print("saving dalys outcomes for intervention period to file ...") + with dalys_outcomes_path.open("wb") as f: + pickle.dump(dalys_outcomes_dict, f) + # # TODO: rm + # print("\nDALY OUTCOMES") + # for interv in scenario_folders: + # print(f"### {interv=}") + # for outcome in dalys_outcomes_dict[interv]: + # print(f"{outcome}:\n{dalys_outcomes_dict[interv][outcome]}") + # + + # Extract or load pop sizes outcomes + if pop_sizes_outcomes_path.exists() and not force_calculation[6]: + print("\nloading pop sizes outcomes from file ...") + with pop_sizes_outcomes_path.open("rb") as f: + pop_sizes_outcomes_dict = pickle.load(f) + else: + print("\npop sizes outcomes for intervention period calculation ...") + pop_sizes_outcomes_dict = { + interv: util_fncs.extract_pop_sizes_data_frames_and_outcomes( + iterv_folders_dict[interv], datayears, interventionyears, interv + ) for interv in scenario_folders + } + print("saving pop sizes outcomes for intervention period to file ...") + with pop_sizes_outcomes_path.open("wb") as f: + pickle.dump(pop_sizes_outcomes_dict, f) + # # TODO: rm + # print("\nPOP SIZES OUTCOMES") + # for interv in scenario_folders: + # print(f"### {interv=}") + # for outcome in pop_sizes_outcomes_dict[interv]: + # print(f"{outcome}:\n{pop_sizes_outcomes_dict[interv][outcome]}") + + + # --------------------------------------------- Main Analyses Plots --------------------------------------------- # + # Prepare scenarios_tocompare_prefix + if 'Status Quo' in scenarios_tocompare: + scenarios_tocompare_sq_shorten = [ + 'SQ' if scenario == 'Status Quo' else scenario for scenario in scenarios_tocompare + ] + else: + scenarios_tocompare_sq_shorten = scenarios_tocompare + if len(scenarios_tocompare_sq_shorten) > 4: + scenarios_tocompare_prefix = f"_{len(scenarios_tocompare_sq_shorten)}scenarios_inclSQ" + else: + scenarios_tocompare_prefix = "_".join(scenarios_tocompare_sq_shorten) + # Prepare timestamps_scenarios_comparison_suffix + timestamps_scenarios_comparison_suffix = '' + for interv in intervsall: + if len(intervsall) > 4: + timestamps_scenarios_comparison_suffix = f"{interv_timestamps_dict['SQ']}" + else: + if any(scenario.startswith(interv) for scenario in scenarios_tocompare): + if timestamps_scenarios_comparison_suffix == '': + timestamps_scenarios_comparison_suffix = f"{interv_timestamps_dict[interv]}" + else: + timestamps_scenarios_comparison_suffix = \ + timestamps_scenarios_comparison_suffix + f"_{interv_timestamps_dict[interv]}" + if 'Status Quo' in scenarios_tocompare: + if timestamps_scenarios_comparison_suffix == '': + timestamps_scenarios_comparison_suffix = f"{interv_timestamps_dict['SQ']}" + else: + timestamps_scenarios_comparison_suffix = \ + timestamps_scenarios_comparison_suffix + f"_{interv_timestamps_dict['SQ']}" + + for cohort in cohorts_to_plot: + print(f"\nplotting {cohort} outcomes ...") + print(" plotting mortality rates ...") + util_fncs.plot_mortality_rate__by_interv_multiple_settings( + cohort, interv_timestamps_dict, scenarios_dict, intervs_ofinterest, plotyears, death_outcomes_dict, + outputspath + ) + print(" plotting mean deaths ...") + util_fncs.plot_mean_outcome_and_CIs__scenarios_comparison( + cohort, scenarios_dict, scenarios_tocompare, plotyears, "deaths", death_outcomes_dict, + outputspath, scenarios_tocompare_prefix, timestamps_scenarios_comparison_suffix + ) + util_fncs.plot_mean_outcome_and_CIs__scenarios_comparison( + cohort, scenarios_dict, scenarios_tocompare, plotyears, "deaths_with_SAM", death_outcomes_dict, + outputspath, scenarios_tocompare_prefix, timestamps_scenarios_comparison_suffix + ) + print(" plotting sum of deaths ...") + util_fncs.plot_sum_outcome_and_CIs_intervention_period( + cohort, scenarios_dict, scenarios_tocompare,"deaths", death_outcomes_dict, + outputspath, scenarios_tocompare_prefix, timestamps_scenarios_comparison_suffix, interv_timestamps_dict, + birth_outcomes_dict, pop_sizes_outcomes_dict, force_calculation + ) + util_fncs.plot_sum_outcome_and_CIs_intervention_period( + cohort, scenarios_dict, scenarios_tocompare, "deaths_with_SAM", death_outcomes_dict, + outputspath, scenarios_tocompare_prefix, timestamps_scenarios_comparison_suffix, interv_timestamps_dict, + birth_outcomes_dict, pop_sizes_outcomes_dict, force_calculation + ) + print(" plotting mean DALYs ...") + util_fncs.plot_mean_outcome_and_CIs__scenarios_comparison( + cohort, scenarios_dict, scenarios_tocompare, plotyears, "DALYs", dalys_outcomes_dict, + outputspath, scenarios_tocompare_prefix, timestamps_scenarios_comparison_suffix + ) + print(" plotting sum of DALYs ...") + util_fncs.plot_sum_outcome_and_CIs_intervention_period( + cohort, scenarios_dict, scenarios_tocompare, "DALYs", dalys_outcomes_dict, + outputspath, scenarios_tocompare_prefix, timestamps_scenarios_comparison_suffix, interv_timestamps_dict, + birth_outcomes_dict, pop_sizes_outcomes_dict, force_calculation + ) + + # --------------------- Create a PDF to save all figures and save each page also as PNG file --------------------- # + # Create cohort prefix + cohort_prefix = "_".join( + ["Neo" if cohort == "Neonatal" else "Under5" if cohort == "Under-5" else cohort for cohort in cohorts_to_plot] + ) + # Create interventions prefix and timestamps_intervs_plotted suffix + intervs_ofinterest_prefix = "_".join(intervs_ofinterest) # mortality rates - multiple settings of Interventions + intervs_plotted = [ + interv for interv in intervsall + if interv in intervs_ofinterest or \ + any(scenario.startswith(interv) for scenario in scenarios_tocompare_sq_shorten) + ] + timestamps_intervs_plotted = "_".join(interv_timestamps_dict[interv] for interv in intervs_plotted) + + pdf_path = outputs_path / ( + f"{cohort_prefix}_{intervs_ofinterest_prefix}_interventions__{scenarios_tocompare_prefix}_scenarios_" + f"{timestamps_intervs_plotted}.pdf" + ) + with PdfPages(pdf_path) as pdf: + # Outcome 1: figures with mortality rates for each interv of interest, comparing different settings + for page_start in range(0, len(intervs_ofinterest), 2): + nrows = min(2, len(intervs_ofinterest) - page_start) + ncols = len(cohorts_to_plot) + fig1, axes1 = plt.subplots(nrows, ncols, figsize=(12, 12)) + + # Normalize axes1 to a 2D numpy array so indexing axes1[i, j] always works + if nrows == 1 and ncols == 1: + axes1 = np.array([[axes1]]) + elif nrows == 1: + # axes1 is 1D array of length ncols -> make it shape (1, ncols) + axes1 = np.expand_dims(axes1, axis=0) + elif ncols == 1: + # axes1 is 1D array of length nrows -> make it shape (nrows, 1) + axes1 = np.expand_dims(axes1, axis=1) + for i, interv in enumerate(intervs_ofinterest[page_start:page_start + 2]): + for j, cohort in enumerate(cohorts_to_plot): + if interv == 'SQ': + mort_rate_png_file_path = outputs_path / ( + f"{cohort}_mort_rate_{interv}_UNICEF_WPP__" + f"{interv_timestamps_dict[interv]}.png" + ) + else: + mort_rate_png_file_path = outputs_path / ( + f"{cohort}_mort_rate_{interv}_multiple_settings__" + f"{interv_timestamps_dict[interv]}__{interv_timestamps_dict['SQ']}.png" + ) + interv_title = ( + 'Growth Monitoring attendance (GM)' if interv == 'GM' + else 'Food Supplements availability (FS)' if interv == 'FS' + else 'Care-Seeking in MAM cases (CS)' if interv == 'CS' + else 'SQ' if interv == 'SQ' + else 'n/a' + ) + if mort_rate_png_file_path.exists(): + img = plt.imread(mort_rate_png_file_path) + axes1[i, j].imshow(img) + axes1[i, j].axis('off') + axes1[i, j].set_title(f"{interv_title}", fontsize=10) + pdf.savefig(fig1) # Save the current page to the PDF + fig1_png_file_path = outputs_path / ( + f"{cohort_prefix}_mortality_rates_{'_'.join(intervs_ofinterest[page_start:page_start + 2])}__" + f"{'_'.join(interv_timestamps_dict[interv] for interv in intervs_ofinterest[page_start:page_start + 2])}" + ".png" + ) + fig1.savefig(fig1_png_file_path, dpi=300, bbox_inches='tight') # Save as PNG + plt.close('all') + + # Outcome 2: figures with mean deaths and CI, scenarios comparison + for page_start in range(0, len(['any cause', 'SAM', 'ALRI', 'Diarrhoea']), 2): + fig2, axes2 = plt.subplots(2, len(cohorts_to_plot), figsize=(12, 12)) + + # Ensure `axes` is always a 2D array for consistent indexing + if len(cohorts_to_plot) == 1: + axes2 = np.expand_dims(axes2, axis=-1) + + # ### Mean deaths by cause + for i, cause_of_death in enumerate(['any cause', 'SAM', 'ALRI', 'Diarrhoea'][page_start:page_start + 2]): + for j, cohort in enumerate(cohorts_to_plot): + mean_deaths_png_file_path = outputs_path / ( + f"{cohort}_mean_{cause_of_death}_deaths_CI_scenarios_comparison__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + if mean_deaths_png_file_path.exists(): + img = plt.imread(mean_deaths_png_file_path) + axes2[i, j].imshow(img) + axes2[i, j].axis('off') + plt.tight_layout() + pdf.savefig(fig2) # Save the current page to the PDF + fig2_png_file_path = outputs_path / ( + f"{cohort_prefix}_mean_deaths_comparison_" + f"{'_'.join(['any cause', 'SAM', 'ALRI', 'Diarrhoea'][page_start:page_start + 2])}__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + fig2.savefig(fig2_png_file_path, dpi=300, bbox_inches='tight') # Save as PNG + plt.close('all') + + for page_start in range(0, len(['ALRI', 'Diarrhoea']), 2): + fig2_sam, axes2_sam = plt.subplots(2, len(cohorts_to_plot), figsize=(12, 12)) + + # Ensure `axes` is always a 2D array for consistent indexing + if len(cohorts_to_plot) == 1: + axes2_sam = np.expand_dims(axes2_sam, axis=-1) + + # ### Mean deaths with SAM by cause + for i, cause_of_death in enumerate(['ALRI', 'Diarrhoea']): + fig2_sam, axes2_sam = plt.subplots(1, len(cohorts_to_plot), figsize=(12, 6)) + if len(cohorts_to_plot) == 1: + axes2_sam = np.expand_dims(axes2_sam, axis=-1) + for j, cohort in enumerate(cohorts_to_plot): + mean_deaths_with_SAM_png_file_path = outputs_path / ( + f"{cohort}_mean_{cause_of_death}_deaths_with_SAM_CI_scenarios_comparison__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + if mean_deaths_with_SAM_png_file_path.exists(): + img = plt.imread(mean_deaths_with_SAM_png_file_path) + axes2_sam[j].imshow(img) + axes2_sam[j].axis('off') + plt.tight_layout() + pdf.savefig(fig2_sam) + fig2_sam_png_file_path = outputs_path / ( + f"{cohort_prefix}_mean_deaths_with_SAM_comparison_{cause_of_death}__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + fig2_sam.savefig(fig2_sam_png_file_path, dpi=300, bbox_inches='tight') + plt.close('all') + + # Outcome 3: figures with sum of deaths and CI, scenarios comparison + for page_start in range(0, len(['any cause', 'SAM', 'ALRI', 'Diarrhoea']), 2): + fig3, axes3 = plt.subplots(2, len(cohorts_to_plot), figsize=(12, 12)) + + # Ensure `axes3` is always a 2D array for consistent indexing + if len(cohorts_to_plot) == 1: + axes3 = np.expand_dims(axes3, axis=-1) + + # ### Sum of deaths over intervention period by cause + for i, cause_of_death in enumerate(['any cause', 'SAM', 'ALRI', 'Diarrhoea'][page_start:page_start + 2]): + for j, cohort in enumerate(cohorts_to_plot): + sum_deaths_png_file_path = outputs_path / ( + f"{cohort}_sum_{cause_of_death}_deaths_CI_intervention_period_scenarios_comparison__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + if sum_deaths_png_file_path.exists(): + img = plt.imread(sum_deaths_png_file_path) + axes3[i, j].imshow(img) + axes3[i, j].axis('off') + plt.tight_layout() + pdf.savefig(fig3) # Save the current page to the PDF + fig3_png_file_path = outputs_path / ( + f"{cohort_prefix}_sum_deaths_comparison_" + f"{'_'.join(['any cause', 'SAM', 'ALRI', 'Diarrhoea'][page_start:page_start + 2])}__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + fig3.savefig(fig3_png_file_path, dpi=300, bbox_inches='tight') # Save as PNG + plt.close('all') + + for page_start in range(0, len(['ALRI', 'Diarrhoea']), 2): + fig3_sam, axes3_sam = plt.subplots(2, len(cohorts_to_plot), figsize=(12, 12)) + + # Ensure `axes3_sam` is always a 2D array for consistent indexing + if len(cohorts_to_plot) == 1: + axes3_sam = np.expand_dims(axes3_sam, axis=-1) + + # ### Sum of deaths with SAM over intervention period by cause + for i, cause_of_death in enumerate(['ALRI', 'Diarrhoea']): + fig3_sam, axes3_sam = plt.subplots(1, len(cohorts_to_plot), figsize=(12, 6)) + if len(cohorts_to_plot) == 1: + axes3_sam = np.expand_dims(axes3_sam, axis=-1) + for j, cohort in enumerate(cohorts_to_plot): + sum_deaths_with_SAM_png_file_path = outputs_path / ( + f"{cohort}_sum_{cause_of_death}_deaths_with_SAM_CI_intervention_period_scenarios_comparison__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + if sum_deaths_with_SAM_png_file_path.exists(): + img = plt.imread(sum_deaths_with_SAM_png_file_path) + axes3_sam[j].imshow(img) + axes3_sam[j].axis('off') + plt.tight_layout() + pdf.savefig(fig3_sam) + fig3_sam_png_file_path = outputs_path / ( + f"{cohort_prefix}_sum_deaths_with_SAM_comparison_{cause_of_death}__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + fig3_sam.savefig(fig3_sam_png_file_path, dpi=300, bbox_inches='tight') + plt.close('all') + + # Outcome 4: figures with mean DALYs and CI, scenarios comparison + for page_start in range(0, len(['any cause', 'SAM', 'ALRI', 'Diarrhoea']), 2): + cohorts_to_plot_fig4 = [c for c in cohorts_to_plot if c != "Neonatal"] + fig4, axes4 = plt.subplots(2, len(cohorts_to_plot_fig4), figsize=(12, 12)) + + # Ensure `axes4` is always a 2D array for consistent indexing + if len(cohorts_to_plot_fig4) == 1: + axes4 = np.expand_dims(axes4, axis=-1) + + for i, cause_of_daly in enumerate(['any cause', 'SAM', 'ALRI', 'Diarrhoea'][page_start:page_start + 2]): + for j, cohort in enumerate(cohorts_to_plot_fig4): + mean_dalys_png_file_path = outputs_path / ( + f"{cohort}_mean_{cause_of_daly}_DALYs_CI_scenarios_comparison__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + if mean_dalys_png_file_path.exists(): + img = plt.imread(mean_dalys_png_file_path) + axes4[i, j].imshow(img) + axes4[i, j].axis('off') + plt.tight_layout() + pdf.savefig(fig4) # Save the current page to the PDF + fig4_png_file_path = outputs_path / ( + f"{cohort_prefix}_mean_DALYs_comparison_" + f"{'_'.join(['any cause', 'SAM', 'ALRI', 'Diarrhoea'][page_start:page_start + 2])}__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + fig4.savefig(fig4_png_file_path, dpi=300, bbox_inches='tight') # Save as PNG + plt.close('all') + + # Outcome 5: figures with sum of DALYs and CI, scenarios comparison + for page_start in range(0, len(['any cause', 'SAM', 'ALRI', 'Diarrhoea']), 2): + cohorts_to_plot_fig5 = [c for c in cohorts_to_plot if c != "Neonatal"] + fig5, axes5 = plt.subplots(2, len(cohorts_to_plot_fig5), figsize=(12, 12)) + + # Ensure `axes5` is always a 2D array for consistent indexing + if len(cohorts_to_plot_fig5) == 1: + axes5 = np.expand_dims(axes5, axis=-1) + + for i, cause_of_daly in enumerate(['any cause', 'SAM', 'ALRI', 'Diarrhoea'][page_start:page_start + 2]): + for j, cohort in enumerate(cohorts_to_plot_fig5): + sum_dalys_png_file_path = outputs_path / ( + f"{cohort}_sum_{cause_of_daly}_DALYs_CI_intervention_period_scenarios_comparison__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + if sum_dalys_png_file_path.exists(): + img = plt.imread(sum_dalys_png_file_path) + axes5[i, j].imshow(img) + axes5[i, j].axis('off') + plt.tight_layout() + pdf.savefig(fig5) # Save the current page to the PDF + fig5_png_file_path = outputs_path / ( + f"{cohort_prefix}_sum_DALYs_comparison_" + f"{'_'.join(['any cause', 'SAM', 'ALRI', 'Diarrhoea'][page_start:page_start + 2])}__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + fig5.savefig(fig5_png_file_path, dpi=300, bbox_inches='tight') # Save as PNG + plt.close('all') + + # Outcome 6: figures with averted sum of deaths and CI, scenarios comparison to SQ + for page_start in range(0, len(['any cause', 'SAM', 'ALRI', 'Diarrhoea']), 2): + fig6, axes6 = plt.subplots(2, len(cohorts_to_plot), figsize=(12, 12)) + + # Ensure `axes` is always a 2D array for consistent indexing + if len(cohorts_to_plot) == 1: + axes6 = np.expand_dims(axes6, axis=-1) + + # ### Sum of averted deaths over intervention period by cause + for i, cause_of_death in enumerate(['any cause', 'SAM', 'ALRI', 'Diarrhoea'][page_start:page_start + 2]): + for j, cohort in enumerate(cohorts_to_plot): + sum_deaths_png_file_path = outputs_path / ( + f"{cohort}_sum_averted_{cause_of_death}_deaths_CI_intervention_period_scenarios_comparison__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + if sum_deaths_png_file_path.exists(): + img = plt.imread(sum_deaths_png_file_path) + axes6[i, j].imshow(img) + axes6[i, j].axis('off') + plt.tight_layout() + pdf.savefig(fig6) # Save the current page to the PDF + fig6_png_file_path = outputs_path / ( + f"{cohort_prefix}_averted_sum_deaths_comparison_" + f"{'_'.join(['any cause', 'SAM', 'ALRI', 'Diarrhoea'][page_start:page_start + 2])}__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + fig6.savefig(fig6_png_file_path, dpi=300, bbox_inches='tight') # Save as PNG + plt.close('all') + + # Outcome 7: figures with averted sum of DALYs and CI, scenarios comparison + for page_start in range(0, len(['any cause', 'SAM', 'ALRI', 'Diarrhoea']), 2): + cohorts_to_plot_fig7 = [c for c in cohorts_to_plot if c != "Neonatal"] + fig7, axes7 = plt.subplots(2, len(cohorts_to_plot_fig7), figsize=(12, 12)) + + # Ensure `axes` is always a 2D array for consistent indexing + if len(cohorts_to_plot_fig7) == 1: + axes7 = np.expand_dims(axes7, axis=-1) + + for i, cause_of_daly in enumerate(['any cause', 'SAM', 'ALRI', 'Diarrhoea'][page_start:page_start + 2]): + for j, cohort in enumerate(cohorts_to_plot_fig7): + sum_dalys_png_file_path = outputs_path / ( + f"{cohort}_sum_averted_{cause_of_daly}_DALYs_CI_intervention_period_scenarios_comparison__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + if sum_dalys_png_file_path.exists(): + img = plt.imread(sum_dalys_png_file_path) + axes7[i, j].imshow(img) + axes7[i, j].axis('off') + plt.tight_layout() + pdf.savefig(fig7) # Save the current page to the PDF + fig7_png_file_path = outputs_path / ( + f"{cohort_prefix}_sum_DALYs_comparison_" + f"{'_'.join(['any cause', 'SAM', 'ALRI', 'Diarrhoea'][page_start:page_start + 2])}__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + fig7.savefig(fig7_png_file_path, dpi=300, bbox_inches='tight') # Save as PNG + plt.close('all') + + # Outcome 8: cost-effectiveness sensitivity plot + cost_effectiveness_png_path = outputs_path / ( + f"{cohort_prefix}_cost_effectiveness_sensitivity_grid__" + f"{scenarios_tocompare_prefix}__{timestamps_scenarios_comparison_suffix}.png" + ) + if cost_effectiveness_png_path.exists(): + # Read image and set figure size to match pixel dimensions so embedding keeps original quality + img = plt.imread(cost_effectiveness_png_path) + h, w = img.shape[0], img.shape[1] + target_dpi = 300 + figsize = (w / target_dpi, h / target_dpi) + fig_ce = plt.figure(figsize=figsize, dpi=target_dpi) + ax_ce = fig_ce.add_axes([0, 0, 1, 1]) + ax_ce.imshow(img, interpolation='nearest', aspect='auto') + ax_ce.axis('off') + pdf.savefig(fig_ce, dpi=target_dpi, bbox_inches='tight', pad_inches=0) + plt.close(fig_ce) + plt.close('all') + +# --------------------------------------- Behind the scene Analyses Plots --------------------------------------- # +def run_behind_the_scene_analysis_wasting( + outputspath: Path, + plotyears: list, + interventionyears: list, + intervs_ofinterest: list, + scenariosdict +) -> None: + """ + Loads or extracts treatment outcomes for behind-the-scenes analysis. + """ + + datayears = [year-1 for year in plotyears] + + print("\n----------------------------") + print("--- BEHIND-THE-SCENE ANALYSES ---") + iterv_folders_dict = { + interv: get_scenario_outputs( + scenario_filename_prefix, Path(interv_scenarios_folder_path / interv) + )[-1] for interv in intervs_ofinterest + } + interv_timestamps_dict = { + interv: get_scenario_outputs( + scenario_filename_prefix, Path(interv_scenarios_folder_path / interv) + )[-1].name.split(f"{scenario_filename_prefix}_{interv}-")[-1] + for interv in intervs_ofinterest + } + print(f"\n{interv_timestamps_dict=}") + # Define folders for each scenario + scenario_folders = { + interv: { + scen_name: Path(iterv_folders_dict[interv] / str(scen_draw_nmb)) + for scen_name, scen_draw_nmb in scenariosdict[interv].items() + } + for interv in intervs_ofinterest + } + + info_pickles_file_path = outputspath / "outcomes_data/pickles_regenerated.pkl" + regenerate_pickles_bool = False + if info_pickles_file_path.exists(): + print("\nloading pickles_regenerated_df from file ...") + with info_pickles_file_path.open("rb") as f: + pickles_regenerated_df = pickle.load(f) + else: + pickles_regenerated_df = pd.DataFrame(columns=["interv", "timestamp"]) + # check all are already regenerated, if any is not regenerate them all and add the timestamps to the df + for interv, timestamp in interv_timestamps_dict.items(): + if not ( + (pickles_regenerated_df["interv"] == interv) & (pickles_regenerated_df["timestamp"] == timestamp) + ).any(): + regenerate_pickles_bool = True + pickles_regenerated_df = pd.concat([ + pickles_regenerated_df, + pd.DataFrame({"interv": [interv], "timestamp": [timestamp]}) + ], ignore_index=True) + + if regenerate_pickles_bool: + print("saving pickles_regenerated_df to file ...") + with info_pickles_file_path.open("wb") as f: + pickle.dump(pickles_regenerated_df, f) + print("\nRegenerating pickles with debug logs ...") + util_fncs.regenerate_pickles_with_debug_logs(iterv_folders_dict) + + pd.set_option('display.max_columns', None) # Show all columns + pd.set_option('display.max_rows', None) # Show all rows + pd.set_option('display.max_colwidth', None) # Show full content of each row + + tx_outcomes_path = \ + (outputspath / + f"outcomes_data/tx_outcomes_{'_'.join(iterv_folders_dict[interv].name for interv in scenario_folders)}.pkl") + + # Extract or load treatment outcomes + if tx_outcomes_path.exists() and not force_calculation[3]: + print("\nloading tx outcomes from file ...") + with tx_outcomes_path.open("rb") as f: + tx_outcomes_dict = pickle.load(f) + else: + print("\ntx outcomes calculation ...") + tx_outcomes_dict = { + interv: util_fncs.extract_tx_data_frames( + iterv_folders_dict[interv], datayears, interventionyears, interv + ) for interv in scenario_folders + } + print("saving tx outcomes to file ...") + with tx_outcomes_path.open("wb") as f: + pickle.dump(tx_outcomes_dict, f) + + # Further analysis and plotting will be added here + # TODO: rm + # print("\nTX OUTCOMES") + # for interv in tx_outcomes_dict.keys(): + # print(f"### {interv=}") + # for outcome in tx_outcomes_dict[interv]: + # print(f"{outcome}:\n{tx_outcomes_dict[interv][outcome]}") + + # print(" plotting mean nmbs of tx...") + # util_fncs.plot_mean_tx_and_CIs__scenarios_comparison() + +# ---------------- # +# RUN THE ANALYSIS # +# ---------------- # +run_interventions_analysis_wasting(outputs_path, plot_years, intervention_years, intervs_of_interest, + scenarios_to_compare, intervs_all) +# run_behind_the_scene_analysis_wasting(outputs_path, plot_years, intervention_years, intervs_of_interest, +# scenarios_dict) + +total_time_end = time.time() +print(f"\ntotal running time (s): {(total_time_end - total_time_start)}") diff --git a/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_CS.py b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_CS.py new file mode 100644 index 0000000000..bf9622d31b --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_CS.py @@ -0,0 +1,71 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_CS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_CS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=100_000, + number_of_draws=1, + runs_per_draw=100, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_CS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up care-seeking (CS) scenarios + def draw_parameters(self, draw_number, rng): + ### prob of symptoms awareness in MAM cases leading to care-seeking + # awareness_prob = [0.1, 0.3, 0.5, 1.0] + awareness_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_awareness_MAM_prob': awareness_prob[draw_number] + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_CS_FS.py b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_CS_FS.py new file mode 100644 index 0000000000..b5f7dce09b --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_CS_FS.py @@ -0,0 +1,74 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_CS_FS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_CS_FS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=100_000, + number_of_draws=1, + runs_per_draw=100, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_CS_FS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up Care-Seeking & Food Supplements availability (CS & FS) scenarios + def draw_parameters(self, draw_number, rng): + awareness_prob = [1.0, 1.0] + avail_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_awareness_MAM_prob': awareness_prob[draw_number], + 'interv_food_supplements_avail_bool': True, + 'interv_avail_F75milk': avail_prob[draw_number], + 'interv_avail_RUTF': avail_prob[draw_number], + 'interv_avail_CSB++': avail_prob[draw_number], + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_FS.py b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_FS.py new file mode 100644 index 0000000000..5e4ebdf147 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_FS.py @@ -0,0 +1,72 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_FS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_FS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=100_000, + number_of_draws=1, + runs_per_draw=100, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_FS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up Food Supplements availability (FS) scenarios + def draw_parameters(self, draw_number, rng): + avail_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_food_supplements_avail_bool': True, + 'interv_avail_F75milk': avail_prob[draw_number], + 'interv_avail_RUTF': avail_prob[draw_number], + 'interv_avail_CSB++': avail_prob[draw_number], + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM.py b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM.py new file mode 100644 index 0000000000..3cd685b0a6 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM.py @@ -0,0 +1,76 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=100_000, + number_of_draws=1, + runs_per_draw=100, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_GM', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up growth monitoring (GM) attendance scenarios + def draw_parameters(self, draw_number, rng): + ### growth_monitoring_attendance_probs by age categories + # < 1 year, 1-2 years, and > 2 years + attendance_probs_by_agecat = [ + # [0.76, 0.20, 0.55], + # [0.76, 0.25, 0.50], + [0.76, 1.00, 1.00], + [0.76, 1.00, 1.00] + ] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_growth_monitoring_attendance_prob_agecat': attendance_probs_by_agecat[draw_number] + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM_CS.py b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM_CS.py new file mode 100644 index 0000000000..2394386946 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM_CS.py @@ -0,0 +1,78 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM_CS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM_CS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=100_000, + number_of_draws=1, + runs_per_draw=100, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_GM_CS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up growth monitoring attendance & Care-Seeking (GM & CS) scenarios + def draw_parameters(self, draw_number, rng): + ### growth_monitoring_attendance_probs by age categories + # < 1 year, 1-2 years, and > 2 years + attendance_probs_by_agecat = [ + # [0.76, 0.20, 0.55], + # [0.76, 0.25, 0.50], + [0.76, 1.00, 1.00], + [0.76, 1.00, 1.00] + ] + awareness_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_growth_monitoring_attendance_prob_agecat': attendance_probs_by_agecat[draw_number], + 'interv_awareness_MAM_prob': awareness_prob[draw_number] + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM_CS_FS.py b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM_CS_FS.py new file mode 100644 index 0000000000..05fd54c7ef --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM_CS_FS.py @@ -0,0 +1,83 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM_CS_FS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM_CS_FS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=100_000, + number_of_draws=1, + runs_per_draw=100, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_GM_CS_FS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up Growth Monitoring attendance & Care-Seeking & Food Supplements availability (GM & CS & FS) scenarios + def draw_parameters(self, draw_number, rng): + ### growth_monitoring_attendance_probs by age categories + # < 1 year, 1-2 years, and > 2 years + attendance_probs_by_agecat = [ + # [0.76, 0.20, 0.55], + # [0.76, 0.25, 0.50], + [0.76, 1.00, 1.00], + [0.76, 1.00, 1.00] + ] + awareness_prob = [1.0, 1.0] + avail_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_growth_monitoring_attendance_prob_agecat': attendance_probs_by_agecat[draw_number], + 'interv_awareness_MAM_prob': awareness_prob[draw_number], + 'interv_food_supplements_avail_bool': True, + 'interv_avail_F75milk': avail_prob[draw_number], + 'interv_avail_RUTF': avail_prob[draw_number], + 'interv_avail_CSB++': avail_prob[draw_number], + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM_FS.py b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM_FS.py new file mode 100644 index 0000000000..1368e8021e --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM_FS.py @@ -0,0 +1,81 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM_FS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_GM_FS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=100_000, + number_of_draws=1, + runs_per_draw=100, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_GM_FS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up growth monitoring attendance & Food Supplements availability (GM & FS) scenarios + def draw_parameters(self, draw_number, rng): + ### growth_monitoring_attendance_probs by age categories + # < 1 year, 1-2 years, and > 2 years + attendance_probs_by_agecat = [ + # [0.76, 0.20, 0.55], + # [0.76, 0.25, 0.50], + [0.76, 1.00, 1.00], + [0.76, 1.00, 1.00] + ] + avail_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_growth_monitoring_attendance_prob_agecat': attendance_probs_by_agecat[draw_number], + 'interv_food_supplements_avail_bool': True, + 'interv_avail_F75milk': avail_prob[draw_number], + 'interv_avail_RUTF': avail_prob[draw_number], + 'interv_avail_CSB++': avail_prob[draw_number], + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_SQ.py b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_SQ.py new file mode 100644 index 0000000000..faf9fa7aba --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_SQ.py @@ -0,0 +1,60 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_SQ.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/100K/scenario_wasting_full_model_SQ.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=100_000, + number_of_draws=1, + runs_per_draw=100, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_SQ', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Keeping Status Quo parameters + def draw_parameters(self, draw_number, rng): + return get_parameters_for_status_quo() + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_CS.py b/src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_CS.py new file mode 100644 index 0000000000..5f1aae114d --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_CS.py @@ -0,0 +1,71 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_CS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_CS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=250_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_CS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up care-seeking (CS) scenarios + def draw_parameters(self, draw_number, rng): + ### prob of symptoms awareness in MAM cases leading to care-seeking + # awareness_prob = [0.1, 0.3, 0.5, 1.0] + awareness_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_awareness_MAM_prob': awareness_prob[draw_number] + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_FS.py b/src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_FS.py new file mode 100644 index 0000000000..ebd870df7e --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_FS.py @@ -0,0 +1,72 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_FS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_FS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=250_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_FS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up Food Supplements availability (FS) scenarios + def draw_parameters(self, draw_number, rng): + avail_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_food_supplements_avail_bool': True, + 'interv_avail_F75milk': avail_prob[draw_number], + 'interv_avail_RUTF': avail_prob[draw_number], + 'interv_avail_CSB++': avail_prob[draw_number], + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_GM.py b/src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_GM.py new file mode 100644 index 0000000000..6eba5e286e --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_GM.py @@ -0,0 +1,76 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_GM.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_GM.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=250_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_GM', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up growth monitoring (GM) attendance scenarios + def draw_parameters(self, draw_number, rng): + ### growth_monitoring_attendance_probs by age categories + # < 1 year, 1-2 years, and > 2 years + attendance_probs_by_agecat = [ + # [0.76, 0.20, 0.55], + # [0.76, 0.25, 0.50], + [0.76, 1.00, 1.00], + [0.76, 1.00, 1.00] + ] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_growth_monitoring_attendance_prob_agecat': attendance_probs_by_agecat[draw_number] + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_SQ.py b/src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_SQ.py new file mode 100644 index 0000000000..4c1109cb5c --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_SQ.py @@ -0,0 +1,60 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_SQ.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/250K/scenario_wasting_full_model_SQ.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=250_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_SQ', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Keeping Status Quo parameters + def draw_parameters(self, draw_number, rng): + return get_parameters_for_status_quo() + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_CS_totestsubmission.py b/src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_CS_totestsubmission.py new file mode 100644 index 0000000000..e46c02e091 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_CS_totestsubmission.py @@ -0,0 +1,71 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_CS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_CS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2011, month=1, day=1), + initial_population_size=2_000, + number_of_draws=2, + runs_per_draw=3, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_CS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.INFO, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel(resourcefilepath=self.resources, + module_kwargs=get_parameters_for_status_quo()) + + # Scaling up care-seeking (CS) scenarios + def draw_parameters(self, draw_number, rng): + ### prob of care seeking for MAM cases + # care_seek_prob = [ + # 0.1, + # 0.3, + # 0.5 + # ] + awareness_prob = [1.0, 1.0] + + return { + 'Wasting': { + 'interv_awareness_MAM_prob': awareness_prob[draw_number] + } + } + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_FS_2ys.py b/src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_FS_2ys.py new file mode 100644 index 0000000000..0f9d89bd31 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_FS_2ys.py @@ -0,0 +1,73 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_FS_2ys.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_FS_2ys.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2012, month=1, day=2), + initial_population_size=2_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_FS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up Food Supplements availability (FS) scenarios + def draw_parameters(self, draw_number, rng): + avail_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_start_year' : 2011, + 'interv_food_supplements_avail_bool': True, + 'interv_avail_F75milk': avail_prob[draw_number], + 'interv_avail_RUTF': avail_prob[draw_number], + 'interv_avail_CSB++': avail_prob[draw_number], + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_SQ_1y.py b/src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_SQ_1y.py new file mode 100644 index 0000000000..7ca8d80056 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_SQ_1y.py @@ -0,0 +1,60 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_SQ_2y.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_SQ_2y.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2011, month=1, day=2), + initial_population_size=2_000, + number_of_draws=1, + runs_per_draw=4, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_SQ', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Keeping Status Quo parameters + def draw_parameters(self, draw_number, rng): + return get_parameters_for_status_quo() + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_SQ_2ys.py b/src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_SQ_2ys.py new file mode 100644 index 0000000000..685b94a278 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_SQ_2ys.py @@ -0,0 +1,60 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_SQ_2y.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/2K/scenario_wasting_full_model_SQ_2y.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2012, month=1, day=2), + initial_population_size=2_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_SQ', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Keeping Status Quo parameters + def draw_parameters(self, draw_number, rng): + return get_parameters_for_status_quo() + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/30K/calibration_scenario_wasting_minimal_model.py b/src/scripts/wasting_analyses/scenarios/30K/calibration_scenario_wasting_minimal_model.py new file mode 100644 index 0000000000..9387196e11 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/30K/calibration_scenario_wasting_minimal_model.py @@ -0,0 +1,122 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_minimal_model.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_minimal_model.py + + +After several iterations of simulations, a range of values for the calibrated parameters was identified and tested. +From these, the best-calibrated values were selected for the module. +""" + + + +import itertools +import warnings + +from tlo import Date, logging +from tlo.methods import ( + alri, + demography, + diarrhoea, + enhanced_lifestyle, + epi, + healthburden, + healthseekingbehaviour, + healthsystem, + hiv, + simplified_births, + stunting, + symptommanager, + tb, + wasting, +) +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=1), + initial_population_size=30_000, + number_of_draws=81, + runs_per_draw=1, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__minimal_model', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return [demography.Demography(resourcefilepath=self.resources), + healthsystem.HealthSystem(resourcefilepath=self.resources, + service_availability=['*'], use_funded_or_actual_staffing='actual', + mode_appt_constraints=1, + cons_availability='default', beds_availability='default', + equip_availability='all'), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), + healthburden.HealthBurden(resourcefilepath=self.resources), + symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=True), + enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), + simplified_births.SimplifiedBirths(resourcefilepath=self.resources), + hiv.Hiv(resourcefilepath=self.resources), + tb.Tb(resourcefilepath=self.resources), + epi.Epi(resourcefilepath=self.resources), + alri.Alri(resourcefilepath=self.resources), + diarrhoea.Diarrhoea(resourcefilepath=self.resources), + stunting.Stunting(resourcefilepath=self.resources), + wasting.Wasting(resourcefilepath=self.resources)] + + def draw_parameters(self, draw_number, rng): + base_death_rate_untreated_sam__draws = [0.04, 0.03, 0.02] + mod_wast_incidence__coef = [0.5, 0.4, 0.3] + # base mod wast incidence rate calibrated with bathtub model + base_overall_mod_wast_inc_rate_bathtub = 0.019 + # relative risks for age groups of mod wast incidence rates calibrated with bathtub model + # rr_inc_rate_wasting_by_agegp = [1.00, 1.22, 1.71, 0.30, 0.40, 0.26] --- as in RFWast/parameters + progression_to_sev_wast__coef = [0.75, 1.0, 1.25] + # progression rates to severe wast calibrated with bathtub model + progression_severe_wasting_monthly_props_by_agegp = [0.3082, 0.8614, 0.4229, 0.4337, 0.2508, 0.3321] + prob_death_after_SAMcare__as_prop_of_death_rate_untreated_sam = [0.05, 0.1, 0.15] + + pars_combinations = list(itertools.product( + base_death_rate_untreated_sam__draws, + mod_wast_incidence__coef, + progression_to_sev_wast__coef, + prob_death_after_SAMcare__as_prop_of_death_rate_untreated_sam + )) + + return { + 'Wasting': { + 'base_death_rate_untreated_SAM': pars_combinations[draw_number][0], + 'base_overall_inc_rate_wasting': base_overall_mod_wast_inc_rate_bathtub * pars_combinations[draw_number][1] , + 'progression_severe_wasting_monthly_by_agegp': [s * pars_combinations[draw_number][2] for \ + s in progression_severe_wasting_monthly_props_by_agegp], + 'prob_death_after_SAMcare': ((pars_combinations[draw_number][0] * pars_combinations[draw_number][3]) / + (1-0.738)) + } + } + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_CS.py b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_CS.py new file mode 100644 index 0000000000..1065c43ca8 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_CS.py @@ -0,0 +1,71 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_CS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_CS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=30_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_CS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up care-seeking (CS) scenarios + def draw_parameters(self, draw_number, rng): + ### prob of symptoms awareness in MAM cases leading to care-seeking + # awareness_prob = [0.1, 0.3, 0.5, 1.0] + awareness_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_awareness_MAM_prob': awareness_prob[draw_number] + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_CS_FS.py b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_CS_FS.py new file mode 100644 index 0000000000..81b162e0b0 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_CS_FS.py @@ -0,0 +1,74 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_CS_FS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_CS_FS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=30_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_CS_FS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up Care-Seeking & Food Supplements availability (CS & FS) scenarios + def draw_parameters(self, draw_number, rng): + awareness_prob = [1.0, 1.0] + avail_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_awareness_MAM_prob': awareness_prob[draw_number], + 'interv_food_supplements_avail_bool': True, + 'interv_avail_F75milk': avail_prob[draw_number], + 'interv_avail_RUTF': avail_prob[draw_number], + 'interv_avail_CSB++': avail_prob[draw_number], + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_FS.py b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_FS.py new file mode 100644 index 0000000000..ddab525378 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_FS.py @@ -0,0 +1,72 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_FS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_FS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=30_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_FS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up Food Supplements availability (FS) scenarios + def draw_parameters(self, draw_number, rng): + avail_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_food_supplements_avail_bool': True, + 'interv_avail_F75milk': avail_prob[draw_number], + 'interv_avail_RUTF': avail_prob[draw_number], + 'interv_avail_CSB++': avail_prob[draw_number], + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM.py b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM.py new file mode 100644 index 0000000000..64895c7a90 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM.py @@ -0,0 +1,76 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=30_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_GM', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up growth monitoring (GM) attendance scenarios + def draw_parameters(self, draw_number, rng): + ### growth_monitoring_attendance_probs by age categories + # < 1 year, 1-2 years, and > 2 years + attendance_probs_by_agecat = [ + # [0.76, 0.20, 0.55], + # [0.76, 0.25, 0.50], + [0.76, 1.00, 1.00], + [0.76, 1.00, 1.00] + ] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_growth_monitoring_attendance_prob_agecat': attendance_probs_by_agecat[draw_number] + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM_CS.py b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM_CS.py new file mode 100644 index 0000000000..752b5dd55f --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM_CS.py @@ -0,0 +1,78 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM_CS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM_CS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=30_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_GM_CS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up growth monitoring attendance & Care-Seeking (GM & CS) scenarios + def draw_parameters(self, draw_number, rng): + ### growth_monitoring_attendance_probs by age categories + # < 1 year, 1-2 years, and > 2 years + attendance_probs_by_agecat = [ + # [0.76, 0.20, 0.55], + # [0.76, 0.25, 0.50], + [0.76, 1.00, 1.00], + [0.76, 1.00, 1.00] + ] + awareness_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_growth_monitoring_attendance_prob_agecat': attendance_probs_by_agecat[draw_number], + 'interv_awareness_MAM_prob': awareness_prob[draw_number] + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM_CS_FS.py b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM_CS_FS.py new file mode 100644 index 0000000000..b4d0400ff7 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM_CS_FS.py @@ -0,0 +1,83 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM_CS_FS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM_CS_FS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=30_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_GM_CS_FS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up Growth Monitoring attendance & Care-Seeking & Food Supplements availability (GM & CS & FS) scenarios + def draw_parameters(self, draw_number, rng): + ### growth_monitoring_attendance_probs by age categories + # < 1 year, 1-2 years, and > 2 years + attendance_probs_by_agecat = [ + # [0.76, 0.20, 0.55], + # [0.76, 0.25, 0.50], + [0.76, 1.00, 1.00], + [0.76, 1.00, 1.00] + ] + awareness_prob = [1.0, 1.0] + avail_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_growth_monitoring_attendance_prob_agecat': attendance_probs_by_agecat[draw_number], + 'interv_awareness_MAM_prob': awareness_prob[draw_number], + 'interv_food_supplements_avail_bool': True, + 'interv_avail_F75milk': avail_prob[draw_number], + 'interv_avail_RUTF': avail_prob[draw_number], + 'interv_avail_CSB++': avail_prob[draw_number], + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM_FS.py b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM_FS.py new file mode 100644 index 0000000000..c8ccfc01e7 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM_FS.py @@ -0,0 +1,81 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM_FS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_GM_FS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=30_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_GM_FS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up growth monitoring attendance & Food Supplements availability (GM & FS) scenarios + def draw_parameters(self, draw_number, rng): + ### growth_monitoring_attendance_probs by age categories + # < 1 year, 1-2 years, and > 2 years + attendance_probs_by_agecat = [ + # [0.76, 0.20, 0.55], + # [0.76, 0.25, 0.50], + [0.76, 1.00, 1.00], + [0.76, 1.00, 1.00] + ] + avail_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_growth_monitoring_attendance_prob_agecat': attendance_probs_by_agecat[draw_number], + 'interv_food_supplements_avail_bool': True, + 'interv_avail_F75milk': avail_prob[draw_number], + 'interv_avail_RUTF': avail_prob[draw_number], + 'interv_avail_CSB++': avail_prob[draw_number], + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_SQ.py b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_SQ.py new file mode 100644 index 0000000000..ba43772ac4 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_SQ.py @@ -0,0 +1,60 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_SQ.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_full_model_SQ.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=30_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_SQ', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Keeping Status Quo parameters + def draw_parameters(self, draw_number, rng): + return get_parameters_for_status_quo() + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_minimal_model.py b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_minimal_model.py new file mode 100644 index 0000000000..e3a657b0b6 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_minimal_model.py @@ -0,0 +1,102 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_minimal_model.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/30K/scenario_wasting_minimal_model.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.methods import ( + alri, + demography, + diarrhoea, + enhanced_lifestyle, + epi, + healthburden, + healthseekingbehaviour, + healthsystem, + hiv, + simplified_births, + stunting, + symptommanager, + tb, + wasting, +) +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=1), + initial_population_size=30_000, + number_of_draws=4, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__minimal_model', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return [demography.Demography(resourcefilepath=self.resources), + healthsystem.HealthSystem(resourcefilepath=self.resources, + service_availability=['*'], use_funded_or_actual_staffing='actual', + mode_appt_constraints=1, + cons_availability='default', beds_availability='default', + equip_availability='all'), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), + healthburden.HealthBurden(resourcefilepath=self.resources), + symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=True), + enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), + simplified_births.SimplifiedBirths(resourcefilepath=self.resources), + hiv.Hiv(resourcefilepath=self.resources), + tb.Tb(resourcefilepath=self.resources), + epi.Epi(resourcefilepath=self.resources), + alri.Alri(resourcefilepath=self.resources), + diarrhoea.Diarrhoea(resourcefilepath=self.resources), + stunting.Stunting(resourcefilepath=self.resources), + wasting.Wasting(resourcefilepath=self.resources)] + + # Scaling up growth monitoring attendance scenarios + def draw_parameters(self, draw_number, rng): + ### growth_monitoring_attendance_probs by age categories + # < 1 year + attendance_prob_below1y = 0.76 + # 1-2 years + attendance_prob_1to2y = [0.14, 0.20, 0.25, 1.00] + # > 2 years + attendance_prob_above2y = [0.50, 0.55, 0.50, 1.00] + + return { + 'Wasting': { + 'interv_growth_monitoring_attendance_prob_agecat': + [attendance_prob_below1y, attendance_prob_1to2y[draw_number], attendance_prob_above2y[draw_number]] + } + } + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_CS.py b/src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_CS.py new file mode 100644 index 0000000000..c780faa9a6 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_CS.py @@ -0,0 +1,71 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_CS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_CS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=4_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_CS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up care-seeking (CS) scenarios + def draw_parameters(self, draw_number, rng): + ### prob of care seeking for MAM cases + # care_seek_prob = [0.1, 0.3, 0.5, 1.0] + awareness_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_awareness_MAM_prob': awareness_prob[draw_number] + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_FS.py b/src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_FS.py new file mode 100644 index 0000000000..2b8ecbeddf --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_FS.py @@ -0,0 +1,72 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_FS.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_FS.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=4_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_FS', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up Food Supplements availability (FS) scenarios + def draw_parameters(self, draw_number, rng): + avail_prob = [1.0, 1.0] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_food_supplements_avail_bool': True, + 'interv_avail_F75milk': avail_prob[draw_number], + 'interv_avail_RUTF': avail_prob[draw_number], + 'interv_avail_CSB++': avail_prob[draw_number], + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_GM.py b/src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_GM.py new file mode 100644 index 0000000000..693c704f29 --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_GM.py @@ -0,0 +1,76 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_GM.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_GM.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=4_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_GM', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Scaling up growth monitoring (GM) attendance scenarios + def draw_parameters(self, draw_number, rng): + ### growth_monitoring_attendance_probs by age categories + # < 1 year, 1-2 years, and > 2 years + attendance_probs_by_agecat = [ + # [0.76, 0.20, 0.55], + # [0.76, 0.25, 0.50], + [0.76, 1.00, 1.00], + [0.76, 1.00, 1.00] + ] + + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'Wasting': { + 'interv_growth_monitoring_attendance_prob_agecat': attendance_probs_by_agecat[draw_number] + } + } + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_SQ.py b/src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_SQ.py new file mode 100644 index 0000000000..df79c48d0f --- /dev/null +++ b/src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_SQ.py @@ -0,0 +1,60 @@ +""" +This file defines a scenario for wasting analysis. + +It can be submitted on Azure Batch by running: + + tlo batch-submit src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_SQ.py + +or locally using: + + tlo scenario-run src/scripts/wasting_analyses/scenarios/4K/scenario_wasting_full_model_SQ.py +""" +# import itertools +import warnings + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +# capture warnings during simulation run +warnings.simplefilter('default', (UserWarning, RuntimeWarning)) + + +class WastingAnalysis(BaseScenario): + + def __init__(self): + super().__init__( + seed=0, + start_date=Date(year=2010, month=1, day=1), + end_date=Date(year=2031, month=1, day=2), + initial_population_size=4_000, + number_of_draws=1, + runs_per_draw=10, + ) + + def log_configuration(self): + return { + 'filename': 'wasting_analysis__full_model_SQ', + 'directory': './outputs/wasting_analysis', + "custom_levels": { # Customise the output of specific loggers + "tlo.methods.demography": logging.INFO, + "tlo.methods.healthburden": logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + "tlo.methods.population": logging.INFO, + "tlo.methods.wasting": logging.DEBUG, + '*': logging.WARNING + } + } + + def modules(self): + return fullmodel() + + # Keeping Status Quo parameters + def draw_parameters(self, draw_number, rng): + return get_parameters_for_status_quo() + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/wasting_analyses/submission_scripts/download_results.sh b/src/scripts/wasting_analyses/submission_scripts/download_results.sh new file mode 100755 index 0000000000..9cc3eb29a1 --- /dev/null +++ b/src/scripts/wasting_analyses/submission_scripts/download_results.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# Check for two input arguments +if [ $# -ne 2 ]; then + echo "Usage: $0 " + exit 1 +fi + +INPUT_FILE="$(realpath "$1")" +START_TIME="$2" + +cd ~/Scratch/thanzi/TLOmodel-outputs || exit + +# Output files with timestamp suffix +DOWNLOAD_FILE="download_commands_${START_TIME}.txt" +RUN_NAMES_FILE="run_names_${START_TIME}.csv" + +# Create or clear output files +> "$DOWNLOAD_FILE" +> "$RUN_NAMES_FILE" + +# Process each line of the CSV, skipping the header +tail -n +2 "$INPUT_FILE" | while IFS=, read -r JOBID SCENARIO_TYPE FSTIME; do + SCENARIO_NAME="wasting_analysis__full_model_${SCENARIO_TYPE}" + TIMESTAMP=$(date -d "$FSTIME" +"%Y-%m-%dT%H%M%SZ") + + orig_name=$(find . -maxdepth 1 -type d -name "${SCENARIO_NAME}*${JOBID}") + name_w_timestamp="${SCENARIO_NAME}-${TIMESTAMP}-${JOBID}" + + mv "${orig_name}" "${name_w_timestamp}" + + base_name=$(echo "${name_w_timestamp}" | sed "s/-${JOBID}//") + + cp -r "${name_w_timestamp}" "${base_name}" + zip -r "${base_name}.zip" "${base_name}" + rm -r "${base_name}" + + mkdir "${name_w_timestamp}/oe_files_${JOBID}" + mv *.o${JOBID}* "${name_w_timestamp}/oe_files_${JOBID}/" + mv *.e${JOBID}* "${name_w_timestamp}/oe_files_${JOBID}/" + + echo "scp sejjej5@myriad.rc.ucl.ac.uk:~/Scratch/thanzi/TLOmodel-outputs/${SCENARIO_NAME}-${TIMESTAMP}.zip ~/PycharmProjects/TLOmodel/outputs/sejjej5@ucl.ac.uk/wasting/scenarios/${SCENARIO_TYPE}/" >> "$DOWNLOAD_FILE" + echo "${SCENARIO_NAME}-${TIMESTAMP}" >> "$RUN_NAMES_FILE" +done diff --git a/src/scripts/wasting_analyses/submission_scripts/submit_job_with_pars_by_names.sh b/src/scripts/wasting_analyses/submission_scripts/submit_job_with_pars_by_names.sh new file mode 100755 index 0000000000..67fb20bb1c --- /dev/null +++ b/src/scripts/wasting_analyses/submission_scripts/submit_job_with_pars_by_names.sh @@ -0,0 +1,60 @@ + +#!/bin/bash + +set -x + +# Default values (optional, if needed) +SCENARIO="" +TIME="" +MEMORY="" +DRAWS="" +RUNS="" +SHIFT="" +POPSIZE="" + +# Parse named arguments in NAME=value format +for ARG in "$@"; do + case $ARG in + SCENARIO=*) + SCENARIO="${ARG#*=}" + ;; + TIME=*) + TIME="${ARG#*=}" + ;; + MEMORY=*) + MEMORY="${ARG#*=}" + ;; + DRAWS=*) + DRAWS="${ARG#*=}" + ;; + RUNS=*) + RUNS="${ARG#*=}" + ;; + SHIFT=*) + SHIFT="${ARG#*=}" + ;; + POPSIZE=*) + POPSIZE="${ARG#*=}" + ;; + *) + echo "Unknown parameter: $ARG" + exit 1 + ;; + esac +done + +# Validate required arguments +if [[ -z "$SCENARIO" || -z "$TIME" || -z "$MEMORY" || -z "$DRAWS" || -z "$RUNS" || -z "$SHIFT" || -z "$POPSIZE" ]]; then + echo "Usage: $0 SCENARIO= TIME=