diff --git a/resources/ResourceFile_Cervical_Cancer/parameter_values.csv b/resources/ResourceFile_Cervical_Cancer/parameter_values.csv index a277ea3972..0bd9e1e15b 100644 --- a/resources/ResourceFile_Cervical_Cancer/parameter_values.csv +++ b/resources/ResourceFile_Cervical_Cancer/parameter_values.csv @@ -52,4 +52,4 @@ months_to_next_appt_treated_less_than_12_mo_ago,3,local,1,36,vague prior,Missing months_to_next_appt_treated_12_to_36_mo_ago,6,local,1,36,vague prior,Missing months_to_next_appt_treated_36_to_60_mo_ago,12,local,1,36,vague prior,Missing palliative_care_interval_months,1,local,0.5,12,vague prior,Missing -main_polling_frequency_months,1,undetermined,1,12,design decision,N/A \ No newline at end of file +main_polling_frequency_months,1,undetermined,1,12,design decision,N/A diff --git a/resources/ResourceFile_HPV/parameter_values.csv b/resources/ResourceFile_HPV/parameter_values.csv new file mode 100644 index 0000000000..e02e1eaa5d --- /dev/null +++ b/resources/ResourceFile_HPV/parameter_values.csv @@ -0,0 +1,24 @@ +parameter_name,value,param_label,prior_min,prior_max,prior_note,reference +init_prev_hpv_hr1,0.1,undetermined,0,1,calibration,"Cubie HA, Morton D, Kawonga E, Mautanga M, Mwenitete I, Teakle N, Ngwira B, Walker H, Walker G, Kafwafwa S, Kabota B, Ter Haar R, Campbell C. HPV prevalence in women attending cervical screening in rural Malawi using the cartridge-based Xpert® HPV assay. J Clin Virol. 2017 Feb;87:1-4. doi: 10.1016/j.jcv.2016.11.014. " +init_prev_hpv_hr2,0.15,undetermined,0,1,calibration,"Cubie HA, Morton D, Kawonga E, Mautanga M, Mwenitete I, Teakle N, Ngwira B, Walker H, Walker G, Kafwafwa S, Kabota B, Ter Haar R, Campbell C. HPV prevalence in women attending cervical screening in rural Malawi using the cartridge-based Xpert® HPV assay. J Clin Virol. 2017 Feb;87:1-4. doi: 10.1016/j.jcv.2016.11.014. " +init_prev_hpv_hr3,0.15,undetermined,0,1,calibration,"Cubie HA, Morton D, Kawonga E, Mautanga M, Mwenitete I, Teakle N, Ngwira B, Walker H, Walker G, Kafwafwa S, Kabota B, Ter Haar R, Campbell C. HPV prevalence in women attending cervical screening in rural Malawi using the cartridge-based Xpert® HPV assay. J Clin Virol. 2017 Feb;87:1-4. doi: 10.1016/j.jcv.2016.11.014. " +b_hpv,0.8,undetermined,0,1,calibration,N/A +rr_hpv_hiv_no_art,2.6,undetermined,0,1,calibration,"Liu G, Sharma M, Tan N, Barnabas RV. HIV-positive women have higher risk of human papilloma virus infection, precancerous lesions, and cervical cancer. AIDS. 2018 Mar 27;32(6):795-808. doi: 10.1097/QAD.0000000000001765. PMID: 29369827; PMCID: PMC5854529." +rr_hr1_vaccinated,0.1,undetermined,0,1,calibration,"Basu P, et al. Vaccine efficacy against persistent human papillomavirus (HPV) 16/18 infection at 10 years after one, two, and three doses of quadrivalent HPV vaccine in girls in India: a multicentre, prospective, cohort study. Lancet Oncol. 2021 Nov;22(11):1518-1529. doi: 10.1016/S1470-2045(21)00453-8. Epub 2021 Oct 8. Erratum in: Lancet Oncol. 2022 Jan;23(1):e16. doi: 10.1016/S1470-2045(21)00700-2. " +rr_hr2_vaccinated,0.5,undetermined,0,1,calibration,N/A +rr_hr3_vaccinated,0.8,undetermined,0,1,calibration,N/A +rr_hpv_age50plus,0.5,undetermined,0,1,calibration,N/A +median_clear_hr1,16,undetermined,0,1,calibration,"Muñoz N, Méndez F, Posso H, Molano M, van den Brule AJ, Ronderos M, Meijer C, Muñoz A; Instituto Nacional de Cancerologia HPV Study Group. Incidence, duration, and determinants of cervical human papillomavirus infection in a cohort of Colombian women with normal cytological results. J Infect Dis. 2004 Dec 15;190(12):2077-87. doi: 10.1086/425907. Epub 2004 Nov 22. PMID: 15551205." +median_clear_hr2,14,undetermined,0,1,calibration,"Muñoz N, Méndez F, Posso H, Molano M, van den Brule AJ, Ronderos M, Meijer C, Muñoz A; Instituto Nacional de Cancerologia HPV Study Group. Incidence, duration, and determinants of cervical human papillomavirus infection in a cohort of Colombian women with normal cytological results. J Infect Dis. 2004 Dec 15;190(12):2077-87. doi: 10.1086/425907. Epub 2004 Nov 22. PMID: 15551205." +median_clear_hr3,12,undetermined,0,1,calibration,"Muñoz N, Méndez F, Posso H, Molano M, van den Brule AJ, Ronderos M, Meijer C, Muñoz A; Instituto Nacional de Cancerologia HPV Study Group. Incidence, duration, and determinants of cervical human papillomavirus infection in a cohort of Colombian women with normal cytological results. J Infect Dis. 2004 Dec 15;190(12):2077-87. doi: 10.1086/425907. Epub 2004 Nov 22. PMID: 15551205." +clear_shape,0.8,undetermined,0,1,calibration,N/A +rr_clear_hiv_no_art,0.72,undetermined,0,1,calibration,"Liu G, Sharma M, Tan N, Barnabas RV. HIV-positive women have higher risk of human papilloma virus infection, precancerous lesions, and cervical cancer. AIDS. 2018 Mar 27;32(6):795-808. doi: 10.1097/QAD.0000000000001765. +" +age_mixing_within,0.7,undetermined,0,1,calibration,"Reed DM. Age-disparate and intergenerational sex partnerships and HIV: the role of gender norms among adolescent girls and young women in Malawi. BMC Public Health. 2024 Feb 22;24(1):575. doi: 10.1186/s12889-024-17868-5. +" +age_mixing_adjacent,0.23,undetermined,0,1,calibration,"Reed DM. Age-disparate and intergenerational sex partnerships and HIV: the role of gender norms among adolescent girls and young women in Malawi. BMC Public Health. 2024 Feb 22;24(1):575. doi: 10.1186/s12889-024-17868-5. +" +age_mixing_distant,0.07,undetermined,0,1,calibration,"Reed DM. Age-disparate and intergenerational sex partnerships and HIV: the role of gender norms among adolescent girls and young women in Malawi. BMC Public Health. 2024 Feb 22;24(1):575. doi: 10.1186/s12889-024-17868-5. +" +hpv_event_frequency_months,6,undetermined,0,1,calibration,N/A +persistent_threshold_months,12,undetermined,0,1,calibration,N/A diff --git a/src/scripts/HPV_Analyses/Analysis_HPV.py b/src/scripts/HPV_Analyses/Analysis_HPV.py new file mode 100644 index 0000000000..37ea1cb9fd --- /dev/null +++ b/src/scripts/HPV_Analyses/Analysis_HPV.py @@ -0,0 +1,382 @@ +""" +Run the HPV modules + """ + +import datetime +import pickle +import random +import pandas as pd +import matplotlib.pyplot as plt +from pathlib import Path + +from tlo import Date, Simulation, logging +from tlo.analysis.utils import parse_log_file, extract_results +from tlo.methods import ( + demography, + enhanced_lifestyle, + epi, + healthburden, + healthseekingbehaviour, + healthsystem, + measles, + simplified_births, + symptommanager, + hpv, + hiv, + tb +) + +results_folder = Path("./outputs") + +# Where will outputs go +outputpath = Path("./outputs") # folder for convenience of storing outputs + +# date-stamp to label log files and any other outputs +datestamp = datetime.date.today().strftime("__%Y_%m_%d") + +# The resource files +resourcefilepath = './resources' + +# %% Run the simulation +start_date = Date(2010, 1, 1) +end_date = Date(2020, 1, 1) +popsize = 2000 + + +# set up the log config +log_config = { + "filename": "test_runs", + "directory": outputpath, + "custom_levels": { + "*": logging.WARNING, + "tlo.methods.hpv": logging.INFO, + }, +} +# +# # Register the appropriate modules +# # need to call epi before tb to get bcg vax +seed = random.randint(0, 50000) +# # seed = 41728 # set seed for reproducibility + +# HPV model labels +AGE_LABELS = ["15_19", "20_24", "25_34", "35_44", "45_54", "55plus"] +HPV_GROUPS = ["hr1", "hr2", "hr3"] +SEXES = ["M", "F"] + +# 1. Run simulation +sim = Simulation(start_date=start_date, seed=seed, log_config=log_config, + show_progress_bar=True, resourcefilepath=resourcefilepath) +sim.register( + demography.Demography(), + simplified_births.SimplifiedBirths(), + enhanced_lifestyle.Lifestyle(), + healthsystem.HealthSystem(service_availability=["*"], # all treatment allowed + mode_appt_constraints=1, # mode of constraints to do with officer numbers and time + cons_availability="default", # mode for consumable constraints (if ignored, all consumables available) + ignore_priority=False, # do not use the priority information in HSI event to schedule + capabilities_coefficient=1.0, # multiplier for the capabilities of health officers + use_funded_or_actual_staffing="actual", # actual: use numbers/distribution of staff available currently + disable=False, # disables the healthsystem (no constraints and no logging) and every HSI runs + disable_and_reject_all=False, # disable healthsystem and no HSI runs + ), + symptommanager.SymptomManager(), + healthseekingbehaviour.HealthSeekingBehaviour(), + healthburden.HealthBurden(), + epi.Epi(), + hiv.Hiv(), + measles.Measles(), + tb.Tb(), + hpv.HPV(), +) + +# # set the scenario +#sim.modules["HPV"].parameters["r_hpv"] = 0.01 +#sim.modules["HPV"].parameters["r_hpv_clear"] = 0.6 + +# # Run the simulation and flush the logger +sim.make_initial_population(n=popsize) +sim.simulate(end_date=end_date) + + +# 2. Parse and save results +output = parse_log_file(sim.log_filepath) + +# save the results, argument 'wb' means write using binary mode. use 'rb' for reading file +with open(outputpath / "default_run.pickle", "wb") as f: + # Pickle the 'data' dictionary using the highest protocol available. + pickle.dump(dict(output), f, pickle.HIGHEST_PROTOCOL) + +# load the results +with open(outputpath / "default_run.pickle", "rb") as f: + output = pickle.load(f) + +#Show the results +hpv_outputs = output["tlo.methods.hpv"]["summary"] +print(hpv_outputs) +# proportion_infected = extract_results( +# results_folder, +# module="tlo.methods.hpv", +# key="summary", +# column="PropInf", +# do_scaling=False, +# ) +# +# number_infected = extract_results( +# results_folder, +# module="tlo.methods.hpv", +# key="summary", +# column="TotalInf", +# do_scaling=True, +# ) + +hpv_outputs = output["tlo.methods.hpv"]["summary"] +hpv_df = pd.DataFrame(hpv_outputs) + +print(hpv_df) +print(hpv_df.columns) + +hpv_df["Date"] = pd.to_datetime(hpv_df["date"]) +hpv_df = hpv_df.sort_values("Date").reset_index(drop=True) + +# 4. Helper functions +def compute_group_prev_by_sex( + df: pd.DataFrame, + hpv_group: str, + sex: str, + age_labels: list[str], +) -> pd.Series: + + inf_cols = [f"{hpv_group}_{sex}_{age}_Inf" for age in age_labels] + n_cols = [f"Any_{sex}_{age}_N" for age in age_labels] + + missing_inf = [c for c in inf_cols if c not in df.columns] + missing_n = [c for c in n_cols if c not in df.columns] + + if missing_inf or missing_n: + print(f"\nCannot compute {hpv_group}_{sex}_TotalPrev.") + if missing_inf: + print("Missing infection columns:", missing_inf) + if missing_n: + print("Missing denominator columns:", missing_n) + + return pd.Series([float("nan")] * len(df), index=df.index) + + total_inf = df[inf_cols].sum(axis=1) + total_n = df[n_cols].sum(axis=1) + + return total_inf / total_n.replace(0, pd.NA) + +for sex in SEXES: + for group in HPV_GROUPS: + hpv_df[f"{group}_{sex}_TotalPrev"] = compute_group_prev_by_sex( + hpv_df, + hpv_group=group, + sex=sex, + age_labels=AGE_LABELS, + ) + +# Plot 1: Total infection +plt.figure(figsize=(8, 5)) +plt.plot(hpv_df["Date"], hpv_df["TotalPrev"], marker="o") +plt.xlabel("Date") +plt.ylabel("Total HPV prevalence") +plt.title("Total HPV prevalence over time") +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "hpv_total_prevalence.png", dpi=300) +plt.show() + +# 2. HPV prevalence in different gender +plt.figure(figsize=(8, 5)) +plt.plot(hpv_df["Date"], hpv_df["M_Prev"], marker="o", label="Male") +plt.plot(hpv_df["Date"], hpv_df["F_Prev"], marker="o", label="Female") +plt.xlabel("Date") +plt.ylabel("Any HPV prevalence") +plt.title("Any HPV prevalence by sex") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "hpv_prevalence_by_sex.png", dpi=300) +plt.show() + +# 3. Prevalence of different HPV groups in female +plt.figure(figsize=(8, 5)) +for group in HPV_GROUPS: + plt.plot( + hpv_df["Date"], + hpv_df[f"{group}_F_TotalPrev"], + marker="o", + label=group, + ) +plt.xlabel("Date") +plt.ylabel("Prevalence") +plt.title("Female HPV prevalence by group") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "female_hpv_group_prevalence.png", dpi=300) +plt.show() + +# 4. Prevalence of different HPV groups in male +plt.figure(figsize=(8, 5)) +for group in HPV_GROUPS: + plt.plot( + hpv_df["Date"], + hpv_df[f"{group}_M_TotalPrev"], + marker="o", + label=group, + ) +plt.xlabel("Date") +plt.ylabel("Prevalence") +plt.title("Male HPV prevalence by group") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "male_hpv_group_prevalence.png", dpi=300) +plt.show() + +# Plot 5: Multiplicity of infection +plt.figure(figsize=(8, 5)) +plt.plot(hpv_df["Date"], hpv_df["InfGroup1"], marker="o", label="1 HPV group") +plt.plot(hpv_df["Date"], hpv_df["InfGroup2"], marker="o", label="2 HPV groups") +plt.plot(hpv_df["Date"], hpv_df["InfGroup3"], marker="o", label="3 HPV groups") +plt.xlabel("Date") +plt.ylabel("Number of infected individuals") +plt.title("Multiplicity of HPV infection") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "hpv_multiplicity_over_time.png", dpi=300) +plt.show() + +# Plot 6: Multiplicity of infection by sex +plt.figure(figsize=(9, 5)) + +plt.plot(hpv_df["Date"], hpv_df["MaleGroup1"], marker="o", label="Male: 1 group") +plt.plot(hpv_df["Date"], hpv_df["MaleGroup2"], marker="o", label="Male: 2 groups") +plt.plot(hpv_df["Date"], hpv_df["MaleGroup3"], marker="o", label="Male: 3 groups") + +plt.plot(hpv_df["Date"], hpv_df["FemaleGroup1"], marker="o", label="Female: 1 group") +plt.plot(hpv_df["Date"], hpv_df["FemaleGroup2"], marker="o", label="Female: 2 groups") +plt.plot(hpv_df["Date"], hpv_df["FemaleGroup3"], marker="o", label="Female: 3 groups") + +plt.xlabel("Date") +plt.ylabel("Number of infected individuals") +plt.title("Multiplicity of HPV infection by sex") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "hpv_multiplicity_by_sex.png", dpi=300) +plt.show() + +# Plot 7: Persistent HPV infection prevalence +plt.figure(figsize=(8, 5)) + +for group in HPV_GROUPS: + plt.plot( + hpv_df["Date"], + hpv_df[f"{group}_Persistent12_Prev"], + marker="o", + label=group, + ) + +plt.xlabel("Date") +plt.ylabel("Persistent infection prevalence") +plt.title("Persistent HPV infection prevalence, >=12 months") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "hpv_persistent_prevalence.png", dpi=300) +plt.show() + +# Plot 8: Persistent HPV infection by sex +for group in HPV_GROUPS: + male_col = f"{group}_Persistent12_M_Prev" + female_col = f"{group}_Persistent12_F_Prev" + + required_cols = ["Date", male_col, female_col] + + plt.figure(figsize=(8, 5)) + plt.plot(hpv_df["Date"], hpv_df[male_col], marker="o", label="Male") + plt.plot(hpv_df["Date"], hpv_df[female_col], marker="o", label="Female") + + plt.xlabel("Date") + plt.ylabel("Persistent infection prevalence") + plt.title(f"{group} persistent infection prevalence by sex") + plt.legend() + plt.grid(True) + plt.tight_layout() + plt.savefig(outputpath / f"{group}_persistent_by_sex.png", dpi=300) + plt.show() + +# Plot 9: Female any HPV prevalence by age group +female_age_cols = [f"Any_F_{age}_Prev" for age in AGE_LABELS] + +plt.figure(figsize=(9, 5)) + +for age in AGE_LABELS: + plt.plot( + hpv_df["Date"], + hpv_df[f"Any_F_{age}_Prev"], + marker="o", + label=age, + ) + +plt.xlabel("Date") +plt.ylabel("Any HPV prevalence") +plt.title("Female any HPV prevalence by age group") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "female_any_hpv_by_age.png", dpi=300) +plt.show() + +# Plot 10: Male any HPV prevalence by age group +male_age_cols = [f"Any_M_{age}_Prev" for age in AGE_LABELS] + +plt.figure(figsize=(9, 5)) + +for age in AGE_LABELS: + plt.plot( + hpv_df["Date"], + hpv_df[f"Any_M_{age}_Prev"], + marker="o", + label=age, + ) + +plt.xlabel("Date") +plt.ylabel("Any HPV prevalence") +plt.title("Male any HPV prevalence by age group") +plt.legend() +plt.grid(True) +plt.tight_layout() +plt.savefig(outputpath / "male_any_hpv_by_age.png", dpi=300) +plt.show() + +# Plot 11: Any HPV prevalence by HIV/ART status +hiv_prev_cols = [ + "Any_HIVneg_Prev", + "Any_HIVpos_unknown_Prev", + "Any_HIVpos_noART_Prev", + "Any_HIVpos_unsupp_Prev", + "Any_HIVpos_supp_Prev", +] + +available_hiv_cols = [c for c in hiv_prev_cols if c in hpv_df.columns] + +if len(available_hiv_cols) > 0: + plt.figure(figsize=(9, 5)) + + for col in available_hiv_cols: + label = col.replace("Any_", "").replace("_Prev", "") + plt.plot(hpv_df["Date"], hpv_df[col], marker="o", label=label) + + plt.xlabel("Date") + plt.ylabel("Any HPV prevalence") + plt.title("Any HPV prevalence by HIV/ART status") + plt.legend() + plt.grid(True) + plt.tight_layout() + plt.savefig(outputpath / "hpv_prevalence_by_hiv_status.png", dpi=300) + plt.show() + diff --git a/src/scripts/analysis_example_Xin/scenario_impact_of_consumables_availability.py b/src/scripts/analysis_example_Xin/scenario_impact_of_consumables_availability.py new file mode 100644 index 0000000000..0d8f0e9a41 --- /dev/null +++ b/src/scripts/analysis_example_Xin/scenario_impact_of_consumables_availability.py @@ -0,0 +1,30 @@ +from tlo import Date, logging +from tlo.methods.fullmodel import fullmodel +from tlo.scenario import BaseScenario + +class ImpactOfConsumablesAvailability(BaseScenario): + def __init__(self): + super().__init__( + seed=0, + start_date=Date(2010, 1, 1), + end_date=Date(2015, 1, 1), + initial_population_size=10_000, + number_of_draws= 2, + runs_per_draw=2, + ) + def log_configuration(self): + return { + 'filename': 'impact_of_consumables_availability', + 'directory': './outputs', + 'custom_levels': { + '*': logging.WARNING, + 'tlo.methods.demography': logging.INFO, + 'tlo.methods.healthburden': logging.INFO, + } + } + def modules(self): + return fullmodel() + def draw_parameters(self, draw_number, rng): + return { + 'HealthSystem': {'cons_availability': ['default', 'all'][draw_number]} + } diff --git a/src/scripts/fullmodel_Xin/fullmodel_test_2DEC.py b/src/scripts/fullmodel_Xin/fullmodel_test_2DEC.py new file mode 100644 index 0000000000..4fedb2c4f0 --- /dev/null +++ b/src/scripts/fullmodel_Xin/fullmodel_test_2DEC.py @@ -0,0 +1,62 @@ +""" +Run the full model with intervention coverage specified at national level + """ +import datetime +import pickle +from pathlib import Path +from tlo import Date, Simulation, logging +from tlo.analysis.utils import parse_log_file +from tlo.methods.fullmodel import fullmodel +# Where will outputs go +outputpath = Path("./outputs") # folder for convenience of storing outputs +# date-stamp to label log files and any other outputs +datestamp = datetime.date.today().strftime("__%Y_%m_%d") +# The resource files +resourcefilepath = Path("./resources") +# %% Run the simulation +start_date = Date(2010, 1, 1) +end_date = Date(2012, 1, 1) +popsize = 500 +# set up the log config +# add deviance measure logger if needed +log_config = { + "filename": "hiv_test_runs", + "directory": outputpath, + "custom_levels": { + "*": logging.WARNING, + "tlo.methods.hiv": logging.INFO, + "tlo.methods.tb": logging.INFO, + "tlo.methods.demography": logging.INFO, + # "tlo.methods.healthsystem.summary": logging.INFO, + "tlo.methods.labour.detail": logging.WARNING, # this logger keeps outputting even when set to warning + }, +} +# Register the appropriate modules +# need to call epi before tb to get bcg vax +# seed = random.randint(0, 50000) +seed = 32 # set seed for reproducibility +sim = Simulation(start_date=start_date, seed=seed, log_config=log_config, + show_progress_bar=True, resourcefilepath=resourcefilepath) +sim.register(*fullmodel(use_simplified_births=False, + module_kwargs={ + "SymptomManager": {"spurious_symptoms": True}, + "HealthSystem": {"disable": False, + "service_availability": ["*"], + "mode_appt_constraints": 1, + "cons_availability": "default", + "beds_availability": "all", + "ignore_priority": False, + "use_funded_or_actual_staffing": "actual"}, + }, +)) +# Run the simulation and flush the logger +sim.make_initial_population(n=popsize) +sim.simulate(end_date=end_date) +# parse the results +output = parse_log_file(sim.log_filepath) +# save the results, argument 'wb' means write using binary mode. use 'rb' for reading file +with open(outputpath / "default_run.pickle", "wb") as f: + # Pickle the 'data' dictionary using the highest protocol available. + pickle.dump(dict(output), f, pickle.HIGHEST_PROTOCOL) +with open(outputpath / "default_run.pickle", "rb") as f: + output = pickle.load(f) diff --git a/src/scripts/hiv_tb_Xin/hiv_tb_test_2DEC.py b/src/scripts/hiv_tb_Xin/hiv_tb_test_2DEC.py new file mode 100644 index 0000000000..1c215b6e75 --- /dev/null +++ b/src/scripts/hiv_tb_Xin/hiv_tb_test_2DEC.py @@ -0,0 +1,88 @@ +""" +Run the HIV/TB modules with intervention coverage specified at national level +save outputs for plotting (file: output_plots_tb.py) + """ +import datetime +import pickle +import random +from pathlib import Path +from tlo import Date, Simulation, logging +from tlo.analysis.utils import parse_log_file +from tlo.methods import ( # deviance_measure, + demography, + enhanced_lifestyle, + epi, + healthburden, + healthseekingbehaviour, + healthsystem, + hiv, + simplified_births, + symptommanager, + tb, +) +# Where will outputs go +outputpath = Path("./outputs") # folder for convenience of storing outputs +# date-stamp to label log files and any other outputs +datestamp = datetime.date.today().strftime("__%Y_%m_%d") +# The resource files +resourcefilepath = './resources' +# %% Run the simulation +start_date = Date(2010, 1, 1) +end_date = Date(2030, 1, 1) +popsize = 5000 +# scenario = 1 +# set up the log config +log_config = { + "filename": "test_runs", + "directory": outputpath, + "custom_levels": { + "*": logging.WARNING, + # "tlo.methods.deviance_measure": logging.INFO, + # "tlo.methods.epi": logging.INFO, + "tlo.methods.hiv": logging.INFO, + "tlo.methods.tb": logging.INFO, + "tlo.methods.demography": logging.INFO, + # "tlo.methods.demography.detail": logging.WARNING, + "tlo.methods.healthsystem.summary": logging.INFO, + # "tlo.methods.healthsystem": logging.INFO, + # "tlo.methods.healthburden": logging.INFO, + }, +} +# Register the appropriate modules +seed = random.randint(0, 50000) +# seed = 41728 # set seed for reproducibility +sim = Simulation(start_date=start_date, seed=seed, log_config=log_config, + show_progress_bar=True, resourcefilepath=resourcefilepath) +sim.register( + demography.Demography(), + simplified_births.SimplifiedBirths(), + enhanced_lifestyle.Lifestyle(), + healthsystem.HealthSystem(service_availability=["*"], # all treatment allowed + mode_appt_constraints=1, # mode of constraints to do with officer numbers and time + cons_availability="default", # mode for consumable constraints (if ignored, all consumables available) + ignore_priority=False, # do not use the priority information in HSI event to schedule + capabilities_coefficient=1.0, # multiplier for the capabilities of health officers + use_funded_or_actual_staffing="actual", # actual: use numbers/distribution of staff available currently + disable=False, # disables the healthsystem (no constraints and no logging) and every HSI runs + disable_and_reject_all=False, # disable healthsystem and no HSI runs + ), + symptommanager.SymptomManager(), + healthseekingbehaviour.HealthSeekingBehaviour(), + healthburden.HealthBurden(), + epi.Epi(), + hiv.Hiv(run_with_checks=False), + tb.Tb(), + # deviance_measure.Deviance(resourcefilepath=resourcefilepath), +) +# Run the simulation and flush the logger +sim.make_initial_population(n=popsize) +sim.simulate(end_date=end_date) +# parse the results +output = parse_log_file(sim.log_filepath) +# save the results, argument 'wb' means write using binary mode. use 'rb' for reading file +with open(outputpath / "default_run.pickle", "wb") as f: + # Pickle the 'data' dictionary using the highest protocol available. + pickle.dump(dict(output), f, pickle.HIGHEST_PROTOCOL) +# load the results +with open(outputpath / "default_run.pickle", "rb") as f: + output = pickle.load(f) diff --git a/src/tlo/methods/hpv.py b/src/tlo/methods/hpv.py new file mode 100644 index 0000000000..7f22e0f4c8 --- /dev/null +++ b/src/tlo/methods/hpv.py @@ -0,0 +1,917 @@ +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING, List, Optional + +import pandas as pd +import math +import numpy as np + +# from scripts.diarrhoea_analyses.analysis_diarrhoea_with_and_without_treatment import data +from tlo import DAYS_IN_YEAR, DateOffset, Module, Parameter, Property, Types, logging +from tlo.analysis.utils import get_counts_by_sex_and_age_group +from tlo.events import Event, IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent +from tlo.lm import LinearModel, LinearModelType, Predictor +from tlo.methods import Metadata +from tlo.methods.causes import Cause +from tlo.methods.demography import InstantaneousDeath +from tlo.methods.hsi_event import HSI_Event +from tlo.methods.hsi_generic_first_appts import GenericFirstAppointmentsMixin +from tlo.methods.symptommanager import Symptom +from tlo.util import random_date, read_csv_files + +if TYPE_CHECKING: + from tlo.methods.hsi_generic_first_appts import HSIEventScheduler + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class HPV(Module, GenericFirstAppointmentsMixin): + """This is an HPV infection Process. + Groups: + g1 = HPV16/18 + g2 = other vaccine-covered high-risk HPV (31/33/45/52/58) + g3 = other high-risk HPV (35/39/51/56/59/68) + + It demonstrates the following behaviours in respect of the healthsystem module: + + - Registration of the disease module with healthsystem + - Reading DALY weights and reporting daly values related to this disease + - Health care seeking + - Usual HSI behaviour + - Restrictive requirements on the facility_level for the HSI_event + - Use of the SymptomManager + """ + + INIT_DEPENDENCIES = {'Demography', 'SymptomManager', 'Hiv'} + + OPTIONAL_INIT_DEPENDENCIES = {'HealthBurden'} + + # Declare Metadata + METADATA = { + Metadata.DISEASE_MODULE, + Metadata.USES_SYMPTOMMANAGER, + Metadata.USES_HEALTHSYSTEM, + Metadata.USES_HEALTHBURDEN, + Metadata.REPORTS_DISEASE_NUMBERS + } + + # Declare Causes of Death + CAUSES_OF_DEATH = {} + + # Declare Causes of Disability + CAUSES_OF_DISABILITY = {} + + HPV_GROUPS = ['hr1', 'hr2', 'hr3'] + AGE_BINS = [15, 20, 25, 35, 45, 55, 200] + AGE_LABELS = ['15_19', '20_24', '25_34', '35_44', '45_54', '55plus'] + + PARAMETERS = { + # ------------------ Initial prevalence ------------------ # + "init_prev_hpv_hr1": Parameter( + Types.REAL, + "Initial prevalence of hpv 16/18 infection", + ), + "init_prev_hpv_hr2": Parameter( + Types.REAL, + "Initial prevalence of HPV 31/33/45/52/58 infection", + ), + "init_prev_hpv_hr3": Parameter( + Types.REAL, + "Initial prevalence of other HR types" + ), + + # ------------------ HPV Transmission ------------------ # + # transmission coefficient for HPV Infection + "b_hpv": Parameter( + Types.REAL, + "Baseline transmission coefficient for HPV Infection", + ), + + # Modifiers + "rr_hpv_hiv_no_art": Parameter( + Types.REAL, + "Relative risk for HPV acquisition among HIV positive people who are not virally suppressed", + ), + "rr_hr1_vaccinated": Parameter( + Types.REAL, + "Relative risk for hr1 infection if vaccinated", + ), + "rr_hr2_vaccinated": Parameter( + Types.REAL, + "Relative risk for hr2 infection if vaccinated", + ), + "rr_hr3_vaccinated": Parameter( + Types.REAL, + "Relative risk for hr3 infection if vaccinated", + ), + + "rr_hpv_age50plus": Parameter( + Types.REAL, + "Relative risk multiplier for age >=50", + ), + + # ------------------ HPV Self-clear ------------------ # + # Weibull baseline + "median_clear_hr1": Parameter( + Types.REAL, + "Median months to self-clear for hr1 infection", + ), + "median_clear_hr2": Parameter( + Types.REAL, + "Median months to self-clear for hr2 infection", + ), + "median_clear_hr3": Parameter( + Types.REAL, + "Median months to self-clear for hr3 infection", + ), + "clear_shape": Parameter( + Types.REAL, + "Weibull shape parameter for HPV clearance duration", + ), + + # Modifiers + "rr_clear_hiv_no_art": Parameter( + Types.REAL, + "Rate ratio for HPV clearance among PLWH not on ART or not virally suppressed", + ), + + # age-mixing + "age_mixing_within": Parameter( + Types.REAL, + "Proportion of sexual mixing occurring within the same age group", + ), + "age_mixing_adjacent": Parameter( + Types.REAL, + "Proportion of sexual mixing occurring with adjacent age groups", + ), + "age_mixing_distant": Parameter( + Types.REAL, + "Proportion of sexual mixing occurring with non-adjacent age groups", + ), + "hpv_event_frequency_months": Parameter( + Types.INT, + "Frequency in months for updating HPV infection and clearance events", + ), + "persistent_threshold_months": Parameter( + Types.REAL, + "Duration threshold in months for defining persistent HPV infection", + ), + } + + PROPERTIES = { + 'hp_date_infected_hr1': Property( + Types.DATE, 'Date of infection of hr1'), + 'hp_date_infected_hr2': Property( + Types.DATE, 'Date of infection of hr2'), + 'hp_date_infected_hr3': Property( + Types.DATE, 'Date of infection of hr3'), + 'hp_duration_hr1': Property( + Types.REAL, 'Duration for current hr1 infection'), + 'hp_duration_hr2': Property( + Types.REAL, 'Duration for current hr2 infection'), + 'hp_duration_hr3': Property( + Types.REAL, 'Duration for current hr3 infection'), + 'hp_duration_all_clear': Property( + Types.REAL, 'Duration for current all HPV infection'), + 'hp_persistent_hr1': Property( + Types.BOOL, 'Persistent hr1 infection, duration >= 12 months'), + 'hp_persistent_hr2': Property( + Types.BOOL, 'Persistent hr2 infection, duration >= 12 months'), + 'hp_persistent_hr3': Property( + Types.BOOL, 'Persistent hr3 infection, duration >= 12 months'), + } + + def __init__(self, name=None): + super().__init__(name) + + def read_parameters(self, resourcefilepath: Optional[Path] = None): + """Read in parameters and do the registration of this module and its symptoms""" + self.load_parameters_from_dataframe( + read_csv_files(Path(resourcefilepath) / "ResourceFile_HPV", + files="parameter_values") + ) + + def _get_group_infected_series(self, group, index=None): + df = self.sim.population.props + date_col = f'hp_date_infected_{group}' + infected = df[date_col].notna() + + if index is not None: + infected = infected.loc[index] + + return infected.fillna(False).astype(bool) + + def _get_hpv_any_infected_series(self, index=None): + df = self.sim.population.props + date_cols = [f'hp_date_infected_{group}' for group in self.HPV_GROUPS] + infected_any = df[date_cols].notna().any(axis=1) + + if index is not None: + infected_any = infected_any.loc[index] + + return infected_any.fillna(False).astype(bool) + + def _hp_is_infected(self, person_id): + df = self.sim.population.props + for group in self.HPV_GROUPS: + if not pd.isna(df.at[person_id, f'hp_date_infected_{group}']): + return True + return False + + def _is_group_infected(self, person_id, group): + df = self.sim.population.props + return not pd.isna(df.at[person_id, f'hp_date_infected_{group}']) + + def _get_first_infection_date(self, person_id): + df = self.sim.population.props + infection_dates = [] + + for group in self.HPV_GROUPS: + date_inf = df.at[person_id, f'hp_date_infected_{group}'] + if not pd.isna(date_inf): + infection_dates.append(date_inf) + + return min(infection_dates) if infection_dates else pd.NaT + + def _get_hpv_group_set(self, person_id): + df = self.sim.population.props + current_groups = set() + + for group in self.HPV_GROUPS: + if self._is_group_infected(person_id, group): + current_groups.add(group) + + return current_groups + + def initialise_population(self, population): + df = population.props # a shortcut to the dataframe storing data for individuals + + # Set default for properties + df.loc[df.is_alive, f'hp_duration_all_clear'] = -1 + for group in self.HPV_GROUPS: + df.loc[df.is_alive, f'hp_date_infected_{group}'] = pd.NaT + df.loc[df.is_alive, f'hp_duration_{group}'] = -1 + df.loc[df.is_alive, f'hp_persistent_{group}'] = False + + eligible = df.index[df.is_alive & (df.age_years >= 15)] + + for group in self.HPV_GROUPS: + p_init = self.parameters[f'init_prev_hpv_{group}'] + u = self.rng.random(size=len(eligible)) + infected_this_group = eligible[u < p_init] + + if len(infected_this_group) == 0: + continue + + previous_infection = self.rng.randint( + low=0, + high=24, + size=len(infected_this_group),) + + infection_dates = [self.sim.date - DateOffset(months=int(months)) + for months in previous_infection + ] + + df.loc[infected_this_group, f'hp_date_infected_{group}'] = infection_dates + df.loc[infected_this_group, f'hp_duration_{group}'] = previous_infection.astype(float) + df.loc[infected_this_group, f'hp_persistent_{group}'] = (previous_infection >= self.parameters['persistent_threshold_months']) + + # Age mixing + def _get_age_group_series(self, ages): + return pd.cut( + ages, + bins=self.AGE_BINS, + labels=self.AGE_LABELS, + right=False # right side not included + ) + def _get_age_group(self, age_years): + for i in range(len(self.AGE_BINS)-1): + if self.AGE_BINS[i] <= age_years < self.AGE_BINS[i + 1]: + return self.AGE_LABELS[i] + return None + + def _build_age_mixing_matrix(self, within, adjacent, distant): + labels = self.AGE_LABELS + total = within + adjacent + distant + if not np.isclose(total, 1.0): + raise ValueError( + f"Age mixing parameters must sum to 1.0, but received {total}." + ) + + M = pd.DataFrame(0.0, index=labels, columns=labels, dtype=float) + + for i, label in enumerate(labels): + row = pd.Series(0.0, index=labels, dtype=float) + + # within-group + row[label] = within + + # adjacent groups + neighbors = [] + if i -1 >= 0: + neighbors.append(labels[i - 1]) + if i + 1 < len(labels): + neighbors.append(labels[i + 1]) + + if len(neighbors) > 0: + share_adj = adjacent / len(neighbors) + for nb in neighbors: + row[nb] = share_adj + + # distant group + distant_groups = [x for x in labels if x != label and x not in neighbors] + if len(distant_groups) > 0: + share_dist = distant / len(distant_groups) + for dg in distant_groups: + row[dg] = share_dist + + # normalize to exactly + row = row / row.sum() + M.loc[label] = row + return M + + # Time and clearance functions + def _months_since(self,start_date,end_date=None): + if pd.isna(start_date): + return None + + if end_date is None or pd.isna(end_date): + end_date = self.sim.date + + return max(0.0, (end_date - start_date).days / 30.5) + + def _get_infection_rr(self, person_id, group): + df = self.sim.population.props + p = self.parameters + + modifier = 1.0 + + age = df.at[person_id, 'age_years'] + if pd.isna(age) or age < 15: + return 0.0 + + if age >= 50: + modifier *= p['rr_hpv_age50plus'] + + if 'va_hpv' in df.columns: + va_hpv = df.at[person_id, 'va_hpv'] + if va_hpv in [1, 2]: + modifier *= p[f'rr_{group}_vaccinated'] + + if 'hv_inf' in df.columns: + hv_inf = df.at[person_id, 'hv_inf'] + if (not pd.isna(hv_inf)) and bool(hv_inf): + if 'hv_art' in df.columns: + hv_art = df.at[person_id, 'hv_art'] + if hv_art in ['not', 'on_not_VL_suppressed']: + modifier *= p['rr_hpv_hiv_no_art'] + else: + modifier *= p['rr_hpv_hiv_no_art'] + + return max(0.0, float(modifier)) + + def _get_clearance_rr(self, person_id): + df = self.sim.population.props + p = self.parameters + + # If HIV module is not registered, assume no HIV-related effect on HPV clearance + if 'Hiv' not in self.sim.modules: + return 1.0 + + if 'hv_inf' not in df.columns: + return 1.0 + + hv_inf = df.at[person_id, 'hv_inf'] + + if pd.isna(hv_inf) or (not bool(hv_inf)): + return 1.0 + + if 'hv_art' not in df.columns: + return p['rr_clear_hiv_no_art'] + + hv_art = df.at[person_id, 'hv_art'] + + if hv_art in ['not', 'on_not_VL_suppressed']: + return p['rr_clear_hiv_no_art'] + + return 1.0 + + def _get_clearance_probability(self, group, person_id, duration_months, interval_months): + p = self.parameters + + median = p[f'median_clear_{group}'] + shape = p['clear_shape'] + + # median = scale * (ln 2)^(1/shape) + scale = median / (math.log(2) ** (1.0 / shape)) + + t1 = max(0.0, float(duration_months)) + t0 = max(0.0, t1 - float(interval_months)) + + # Weibull baseline cumulative hazard increment over [t0, t1] + H0_t0 = (t0 / scale) ** shape + H0_t1 = (t1 / scale) ** shape + delta_H0 = max(0.0, H0_t1 - H0_t0) + + rr = self._get_clearance_rr(person_id) + + # p = 1 - exp(- rr * delta_H0) + p_clear = 1.0 - math.exp(-rr * delta_H0) + + return min(max(p_clear, 0.0), 1.0) + + def _add_new_infection_groups(self, person_id, new_groups): + if len(new_groups) == 0: + return + + df = self.sim.population.props + was_infected_before = self._hp_is_infected(person_id) + + # set infection date for new groups + for group in new_groups: + if self._is_group_infected(person_id, group): + continue + df.at[person_id, f'hp_date_infected_{group}'] = self.sim.date + df.at[person_id,f'hp_duration_{group}'] = 0 + df.at[person_id, f'hp_persistent_{group}'] = False + + # start a new HPV infection process only if the person was uninfected/ self-clear + if not was_infected_before: + df.at[person_id, 'hp_duration_all_clear'] = -1 + + def _clear_single_group(self, person_id, group): + """clear a single HPV group for a person""" + df = self.sim.population.props + first_infection_date = self._get_first_infection_date(person_id) + + df.at[person_id, f'hp_date_infected_{group}'] = pd.NaT + df.at[person_id, f'hp_duration_{group}'] = -1 + df.at[person_id, f'hp_persistent_{group}'] = False + + still_infected = self._hp_is_infected(person_id) + + if not still_infected: + overall_duration = self._months_since(first_infection_date, self.sim.date) + if overall_duration is not None and not pd.isna(overall_duration): + df.at[person_id, f'hp_duration_all_clear'] = float(overall_duration) + else: + df.at[person_id, f'hp_duration_all_clear'] = -1.0 + + def _update_persistence_status(self): + df = self.sim.population.props + threshold_months = self.parameters['persistent_threshold_months'] + eligible = df.is_alive & (df.age_years >= 15) + + for group in self.HPV_GROUPS: + date_col = f'hp_date_infected_{group}' + dur_col = f'hp_duration_{group}' + pers_col = f'hp_persistent_{group}' + + ineligible = ~eligible + df.loc[ineligible, dur_col] = -1.0 + df.loc[ineligible, pers_col] = False + + non_infected = eligible & df[date_col].isna() + df.loc[non_infected, dur_col] = -1 + df.loc[non_infected, pers_col] = False + + infected = eligible & df[date_col].notna() + infected_idx = df.index[infected] + + for person_id in infected_idx: + date_inf = df.at[person_id, date_col] + duration = self._months_since(date_inf, self.sim.date) + + if duration is None or pd.isna(duration): + df.at[person_id, dur_col] = -1.0 + df.at[person_id, pers_col] = False + continue + + duration = float(duration) + df.at[person_id, dur_col] = duration + df.at[person_id, pers_col] = duration >= threshold_months + + def initialise_simulation(self, sim): + """Get ready for simulation start. + + This method is called just before the main simulation loop begins, and after all + modules have read their parameters and the initial population has been created. + It is a good place to add initial events to the event queue. + """ + p = self.parameters + self.lm = {} + self.age_mixing_matrix = self._build_age_mixing_matrix( + within=p['age_mixing_within'], + adjacent=p['age_mixing_adjacent'], + distant=p['age_mixing_distant'] + ) + self._pre_logged_prev = {} + + event = HpvInfectionEvent( + self, + frequency_months=p['hpv_event_frequency_months'] + ) + sim.schedule_event( + event, + sim.date + DateOffset(months=p['hpv_event_frequency_months']) + ) + + sim.schedule_event( + HpvLoggingEvent(self), + sim.date + DateOffset(months=6) + ) + + def on_birth(self, mother_id, child_id): + + df = self.sim.population.props # shortcut to the population props dataframe + for group in self.HPV_GROUPS: + df.at[child_id, f'hp_date_infected_{group}'] = pd.NaT + df.at[child_id, f'hp_duration_{group}'] = -1 + df.at[child_id, 'hp_duration_all_clear'] = -1 + df.at[child_id, f'hp_persistent_{group}'] = False + + def report_daly_values(self): + # This must send back a pd.Series or pd.DataFrame that reports on the average daly-weights that have been + # experienced by persons in the previous month. Only rows for alive-persons must be returned. + # The names of the series of columns is taken to be the label of the cause of this disability. + # It will be recorded by the healthburden module as _. + + logger.debug(key="debug", data="This is hpv reporting my health values") + df = self.sim.population.props # shortcut to population properties dataframe + health_values = pd.Series(index=df.index[df.is_alive], data=0.0) + return health_values # returns the series + + def report_summary_stats(self): + df = self.sim.population.props + self._update_persistence_status() + + df_report = df.copy() + df_report['hp_is_infected'] = self._get_hpv_any_infected_series(index=df_report.index) + + summary = { + 'infected_any': get_counts_by_sex_and_age_group(df_report, 'hp_is_infected')} + + for group in self.HPV_GROUPS: + temp_col = f'hp_infected_{group}' + df_report[temp_col] = self._get_group_infected_series(group, index=df_report.index) + summary[f'infected_{group}'] = get_counts_by_sex_and_age_group(df_report, temp_col) + summary[f'persistent_{group}'] = get_counts_by_sex_and_age_group(df_report, f'hp_persistent_{group}') + + return summary + +class HpvInfectionEvent(RegularEvent, PopulationScopeEventMixin): + """This event is occurring regularly at one 6 months intervals and controls the infection process of HPV.""" + + def __init__(self, module, frequency_months): + self.frequency_months = int(frequency_months) + super().__init__(module, frequency=DateOffset(months=self.frequency_months)) + assert isinstance(module, HPV) + + def apply(self, population): + logger.debug(key='debug', data='This is HpvInfectionEvent, tracking the disease progression of the population.') + df = population.props + module = self.module + now = self.sim.date + + # 1. define eligible population + eligible = df.index[df.is_alive & (df.age_years >= 15)] + if len(eligible) == 0: + return + + interval_months = float(self.frequency_months) + interval_years = self.frequency_months / 12.0 + + # 2. self-clearance + infected_any = module._get_hpv_any_infected_series() + infected_idx = df.index[df.is_alive & (df.age_years >= 15)& infected_any] + + for person_id in infected_idx: + current_groups = module._get_hpv_group_set(person_id) + + for group in list(current_groups): + date_inf = df.at[person_id, f'hp_date_infected_{group}'] + if pd.isna(date_inf): + continue + + duration_months = module._months_since (date_inf, now) + if duration_months is None: + continue + + df.at[person_id, f'hp_duration_{group}'] = float(duration_months) + + p_clear = module._get_clearance_probability( + group=group, + person_id=person_id, + duration_months=duration_months, + interval_months=float(self.frequency_months) + ) + + if module.rng.random() < p_clear: + module._clear_single_group(person_id, group) + + module._update_persistence_status() + + # 3. recalculate prevalence by HPV group after clearance + df_alive = df.loc[df.is_alive & (df.age_years >= 15)].copy() + df_alive['age_group'] = module._get_age_group_series(df_alive['age_years']) + + for group in module.HPV_GROUPS: + df_alive[f'hp_infected_{group}'] = module._get_group_infected_series( + group, + index=df_alive.index + ) + + male_df = df_alive.loc[df_alive.sex == 'M'] + female_df = df_alive.loc[df_alive.sex == 'F'] + + prev_by_age_male = {} + prev_by_age_female = {} + + for group in module.HPV_GROUPS: + prev_by_age_male[group] = ( + male_df.groupby('age_group', observed=True)[f'hp_infected_{group}'] + .mean() + .reindex(module.AGE_LABELS, fill_value=0.0) + ) + prev_by_age_female[group] = ( + female_df.groupby('age_group', observed=True)[f'hp_infected_{group}'] + .mean() + .reindex(module.AGE_LABELS, fill_value=0.0) + ) + + # 4. new infection + for person_id in eligible: + sex = df.at[person_id,'sex'] + current_groups = module._get_hpv_group_set(person_id) + new_group = set() + + if sex == 'F': + source_prev_by_age = prev_by_age_male + elif sex == 'M': + source_prev_by_age = prev_by_age_female + else: + continue + + my_age_group = module._get_age_group(df.at[person_id,'age_years']) + if my_age_group is None: + continue + + mix_row = module.age_mixing_matrix.loc[my_age_group] + + for group in module.HPV_GROUPS: + if group in current_groups: + continue + + weighted_prev = float((mix_row * source_prev_by_age[group]).sum()) + + beta_name = f'b_hpv_{group}' + beta = module.parameters[beta_name] if beta_name in module.parameters else module.parameters['b_hpv'] + + modifier = module._get_infection_rr(person_id, group) + + lambda_inf = beta * weighted_prev * modifier + lambda_inf = max(lambda_inf, 0.0) + + p_inf = 1.0 - math.exp(-lambda_inf * interval_years) + p_inf = min(max(p_inf, 0.0), 1.0) + + if module.rng.random() < p_inf: + new_group.add(group) + + if len(new_group) > 0: + module._add_new_infection_groups(person_id, new_group) + + module._update_persistence_status() + +class HpvLoggingEvent(RegularEvent, PopulationScopeEventMixin): + def __init__(self, module): + """Produce a summmary of the numbers of people with respect to their 'hpv status'""" + self.repeat = 6 + super().__init__(module, frequency=DateOffset(months=self.repeat)) + assert isinstance(module, HPV) + + def apply(self, population): + # get some summary statistics + df = population.props + module = self.module + + module._update_persistence_status() + + eligible = df.index[df.is_alive & (df.age_years >= 15)] + log_data = {'EligibleN':int(len(eligible)),} + + if len(eligible) == 0: + logger.info(key='summary', data=log_data) + return + + sub = df.loc[eligible].copy() + sub['hp_is_infected'] = module._get_hpv_any_infected_series(index=sub.index) + + for hpv_group in module.HPV_GROUPS: + sub[f'hp_infected_{hpv_group}'] = module._get_group_infected_series( + hpv_group, + index=sub.index + ) + + sub['age_group'] = module._get_age_group_series(sub['age_years']) + sub['hiv_group'] = 'HIVneg' + + if 'hv_inf' in sub.columns: + sub.loc[sub['hv_inf'].fillna(False), 'hiv_group'] = 'HIVpos_unknown' + + if 'hv_art' in sub.columns: + sub.loc[sub['hv_inf'].fillna(False) & (sub['hv_art'] == 'not'), 'hiv_group'] = 'HIVpos_noART' + sub.loc[sub['hv_inf'].fillna(False) & ( + sub['hv_art'] == 'on_not_VL_suppressed'), 'hiv_group'] = 'HIVpos_unsupp' + sub.loc[ + sub['hv_inf'].fillna(False) & (sub['hv_art'] == 'on_VL_suppressed'), 'hiv_group'] = 'HIVpos_supp' + + # 1. Overall summary + total_inf = int(sub['hp_is_infected'].sum()) + log_data['TotalInf'] = total_inf + log_data['TotalPrev'] = sub['hp_is_infected'].mean() + + for sex_name, sex_df in [('M', sub.loc[sub.sex == 'M']), + ('F', sub.loc[sub.sex == 'F'])]: + n = len(sex_df) + log_data[f'{sex_name}_N'] = int(n) + log_data[f'{sex_name}_Inf'] = int(sex_df['hp_is_infected'].sum()) if n > 0 else 0 + log_data[f'{sex_name}_Prev'] = sex_df['hp_is_infected'].mean() if n > 0 else math.nan + + # 2. Prevalence by sex and age group + prev_snapshot = {} + + for sex_name, sex_df in [('All', sub), + ('M', sub.loc[sub.sex == 'M']), + ('F', sub.loc[sub.sex == 'F'])]: + + for age_group in module.AGE_LABELS: + age_df = sex_df.loc[sex_df['age_group'] == age_group] + n = len(age_df) + + log_data[f'Any_{sex_name}_{age_group}_N'] = int(n) + + if n == 0: + log_data[f'Any_{sex_name}_{age_group}_Inf'] = 0 + log_data[f'Any_{sex_name}_{age_group}_Prev'] = math.nan + + prev_snapshot[f'Any_{sex_name}_{age_group}_Prev'] = math.nan + + for hpv_group in module.HPV_GROUPS: + log_data[f'{hpv_group}_{sex_name}_{age_group}_Inf'] = 0 + log_data[f'{hpv_group}_{sex_name}_{age_group}_Prev'] = math.nan + + prev_snapshot[f'{hpv_group}_{sex_name}_{age_group}_Prev'] = math.nan + + continue + + any_inf = int(age_df['hp_is_infected'].sum()) + any_prev = age_df['hp_is_infected'].mean() + + log_data[f'Any_{sex_name}_{age_group}_Inf'] = any_inf + log_data[f'Any_{sex_name}_{age_group}_Prev'] = any_prev + prev_snapshot[f'Any_{sex_name}_{age_group}_Prev'] = any_prev + + for hpv_group in module.HPV_GROUPS: + inf_n = int(age_df[f'hp_infected_{hpv_group}'].sum()) + prev = age_df[f'hp_infected_{hpv_group}'].mean() + + log_data[f'{hpv_group}_{sex_name}_{age_group}_Inf'] = inf_n + log_data[f'{hpv_group}_{sex_name}_{age_group}_Prev'] = prev + prev_snapshot[f'{hpv_group}_{sex_name}_{age_group}_Prev'] = prev + + # 3. HIV + hiv_log_groups = [ + 'HIVneg', + 'HIVpos_unknown', + 'HIVpos_noART', + 'HIVpos_unsupp', + 'HIVpos_supp', + ] + + for hiv_group in hiv_log_groups: + log_data[f'Any_{hiv_group}_N'] = 0 + log_data[f'Any_{hiv_group}_Inf'] = 0 + log_data[f'Any_{hiv_group}_Prev'] = math.nan + + for hpv_group in module.HPV_GROUPS: + log_data[f'{hpv_group}_{hiv_group}_Prev'] = math.nan + + for hiv_group in hiv_log_groups: + hiv_df = sub.loc[sub['hiv_group'] == hiv_group] + n = len(hiv_df) + + log_data[f'Any_{hiv_group}_N'] = int(n) + + if n > 0: + log_data[f'Any_{hiv_group}_Inf'] = int(hiv_df['hp_is_infected'].sum()) + log_data[f'Any_{hiv_group}_Prev'] = float(hiv_df['hp_is_infected'].mean()) + + for hpv_group in module.HPV_GROUPS: + log_data[f'{hpv_group}_{hiv_group}_Prev'] = float( + hiv_df[f'hp_infected_{hpv_group}'].mean() + ) + else: + log_data[f'Any_{hiv_group}_Inf'] = 0 + log_data[f'Any_{hiv_group}_Prev'] = math.nan + + for hpv_group in module.HPV_GROUPS: + log_data[f'{hpv_group}_{hiv_group}_Prev'] = math.nan + + # 4. Delta + prev_logged = getattr(module, '_pre_logged_prev', {}) + for key, current_val in prev_snapshot.items(): + previous_val = prev_logged.get(key, math.nan) + if pd.isna(previous_val) or pd.isna(current_val): + log_data[f'{key}_Delta'] = math.nan + else: + log_data[f'{key}_Delta'] = current_val - previous_val + + module._pre_logged_prev = prev_snapshot + + # 5. multiplicity of infection + infection_people = sub.index[sub['hp_is_infected']] + n_group_1 = 0 + n_group_2 = 0 + n_group_3 = 0 + + male_n_group_1 = 0 + male_n_group_2 = 0 + male_n_group_3 = 0 + + female_n_group_1 = 0 + female_n_group_2 = 0 + female_n_group_3 = 0 + + for person_id in infection_people: + n_group = len(module._get_hpv_group_set(person_id)) + sex = df.at[person_id, 'sex'] + + if n_group == 1: + n_group_1 += 1 + if sex == 'M': + male_n_group_1 += 1 + elif sex =='F': + female_n_group_1 += 1 + + elif n_group == 2: + n_group_2 += 1 + if sex == 'M': + male_n_group_2 += 1 + elif sex =='F': + female_n_group_2 += 1 + + elif n_group == 3: + n_group_3 += 1 + if sex == 'M': + male_n_group_3 += 1 + elif sex =='F': + female_n_group_3 += 1 + + log_data['InfGroup1'] = n_group_1 + log_data['InfGroup2'] = n_group_2 + log_data['InfGroup3'] = n_group_3 + + log_data['MaleGroup1'] = male_n_group_1 + log_data['MaleGroup2'] = male_n_group_2 + log_data['MaleGroup3'] = male_n_group_3 + + log_data['FemaleGroup1'] = female_n_group_1 + log_data['FemaleGroup2'] = female_n_group_2 + log_data['FemaleGroup3'] = female_n_group_3 + + # 6. Persistent infection 统计 + for hpv_group in module.HPV_GROUPS: + pers_col = f'hp_persistent_{hpv_group}' + + if pers_col not in sub.columns: + continue + + persistent = sub[pers_col].fillna(False) + + log_data[f'{hpv_group}_Persistent12_N'] = int(persistent.sum()) + log_data[f'{hpv_group}_Persistent12_Prev'] = float(persistent.mean()) + + for sex_name, sex_df in [('M', sub.loc[sub.sex == 'M']), + ('F', sub.loc[sub.sex == 'F'])]: + n = len(sex_df) + if n > 0: + log_data[f'{hpv_group}_Persistent12_{sex_name}_Prev'] = float( + sex_df[pers_col].fillna(False).mean() + ) + else: + log_data[f'{hpv_group}_Persistent12_{sex_name}_Prev'] = math.nan + + for age_group in module.AGE_LABELS: + age_df = sub.loc[sub['age_group'] == age_group] + n = len(age_df) + if n > 0: + log_data[f'{hpv_group}_Persistent12_{age_group}_Prev'] = float( + age_df[pers_col].fillna(False).mean() + ) + else: + log_data[f'{hpv_group}_Persistent12_{age_group}_Prev'] = math.nan + + logger.info(key='summary', data=log_data) diff --git a/tests/test_docs_data/test_HPV.py b/tests/test_docs_data/test_HPV.py new file mode 100644 index 0000000000..2d5435df86 --- /dev/null +++ b/tests/test_docs_data/test_HPV.py @@ -0,0 +1,332 @@ +import os +import warnings +from pathlib import Path + +import pandas as pd +import pytest + +from tlo import Date, Simulation, logging +from tlo.methods import ( + demography, + enhanced_lifestyle, + epi, + healthburden, + healthseekingbehaviour, + healthsystem, + hpv, + hiv, + simplified_births, + symptommanager, +) + +try: + resourcefilepath = Path(os.path.dirname(__file__)) / "../resources" +except NameError: + resourcefilepath = "resources" + +log_config = { + "filename": "hpv_test", # The name of the output file (a timestamp will be appended). + "directory": "./outputs/", # The default output path is `./outputs`. Change it here, if necessary + "custom_levels": { # Customise the output of specific loggers. They are applied in order: + "*": logging.WARNING, # Asterisk matches all loggers - we set the default level to WARNING + "tlo.methods.hpv": logging.INFO, + "tlo.methods.healthsystem": logging.INFO, + "tlo.methods.demography": logging.INFO + } +} + +HPV_GROUPS = hpv.HPV.HPV_GROUPS +DATE_COLS = [f"hp_date_infected_{group}" for group in HPV_GROUPS] + +def make_sim(seed,log_config=None): + start_date = Date(2010, 1, 1) + sim = Simulation(start_date=start_date, seed=seed, log_config=log_config, resourcefilepath=resourcefilepath) + + # Register the appropriate modules + sim.register( + demography.Demography(), + simplified_births.SimplifiedBirths(), + enhanced_lifestyle.Lifestyle(), + symptommanager.SymptomManager(), + healthseekingbehaviour.HealthSeekingBehaviour(), + healthburden.HealthBurden(), + healthsystem.HealthSystem( + disable=True, # disables the health system constraints so all HSI events run + ), + epi.Epi(), + hiv.Hiv(), + hpv.HPV(), + ) + + return sim + +@pytest.fixture +def sim(seed): + return make_sim(seed=seed, log_config=None) + +def check_dtypes(simulation): + """ + Check that population property dtypes remain consistent. + """ + df = simulation.population.props + orig = simulation.population.new_row + assert (df.dtypes == orig.dtypes).all() + +def get_any_hpv_infected(df): + """Return whether each person is infected with any HPV group.""" + return df[DATE_COLS].notna().any(axis=1) + + +def assert_hpv_state_consistent(simulation): + """Check that HPV date-based states, durations, and persistence flags are internally consistent.""" + + module = simulation.modules["HPV"] + df = simulation.population.props + + module._update_persistence_status() + + eligible = df.is_alive & (df.age_years >= 15) + + for group in HPV_GROUPS: + date_col = f"hp_date_infected_{group}" + dur_col = f"hp_duration_{group}" + pers_col = f"hp_persistent_{group}" + + infected = df[date_col].notna() + non_infected = df[date_col].isna() + + # Non-infected people should not be persistent + assert not df.loc[non_infected, pers_col].fillna(False).any() + + # Eligible infected people should have non-negative duration + assert (df.loc[eligible & infected, dur_col] >= 0).all() + + # Eligible non-infected people should have duration -1.0 + assert (df.loc[eligible & non_infected, dur_col] == -1.0).all() + + # Persistent people must also be infected + assert df.loc[df[pers_col].fillna(False), date_col].notna().all() + + # Persistent status should match duration threshold + threshold = module.parameters["persistent_threshold_months"] + expected_persistent = eligible & infected & (df[dur_col] >= threshold) + observed_persistent = df[pers_col].fillna(False) + + assert (observed_persistent == expected_persistent).all() + + +def test_hpv_initial_population_date_based_state(sim): + + module = sim.modules["HPV"] + module.parameters["init_prev_hpv_hr1"] = 1.0 + module.parameters["init_prev_hpv_hr2"] = 0.0 + module.parameters["init_prev_hpv_hr3"] = 0.0 + + sim.make_initial_population(n=500) + df = sim.population.props + + eligible = df.is_alive & (df.age_years >= 15) + under15 = df.is_alive & (df.age_years < 15) + + # All eligible people should be infected with hr1 + assert df.loc[eligible, "hp_date_infected_hr1"].notna().all() + + # Nobody should be infected with hr2/hr3 + assert df.loc[df.is_alive, "hp_date_infected_hr2"].isna().all() + assert df.loc[df.is_alive, "hp_date_infected_hr3"].isna().all() + + # Under-15 people should not be infected + assert df.loc[under15, "hp_date_infected_hr1"].isna().all() + + # Initial infection durations should be between 0 and 23 months + assert (df.loc[eligible, "hp_duration_hr1"] >= 0).all() + assert (df.loc[eligible, "hp_duration_hr1"] < 24).all() + + # Persistent status should match duration threshold + threshold = module.parameters["persistent_threshold_months"] + expected_persistent = df.loc[eligible, "hp_duration_hr1"] >= threshold + observed_persistent = df.loc[eligible, "hp_persistent_hr1"] + + assert (observed_persistent == expected_persistent).all() + + check_dtypes(sim) + +def test_hpv_add_and_clear_single_group(sim): + + module = sim.modules["HPV"] + module.parameters["init_prev_hpv_hr1"] = 0.0 + module.parameters["init_prev_hpv_hr2"] = 0.0 + module.parameters["init_prev_hpv_hr3"] = 0.0 + + sim.make_initial_population(n=500) + df = sim.population.props + + eligible_idx = df.index[df.is_alive & (df.age_years >= 15)] + + if len(eligible_idx) == 0: + pytest.skip("No eligible person aged 15+ in the test population.") + + person_id = eligible_idx[0] + + # Initially not infected + assert not module._hp_is_infected(person_id) + + # Add hr1 infection + module._add_new_infection_groups(person_id, {"hr1"}) + + assert module._is_group_infected(person_id, "hr1") + assert df.at[person_id, "hp_date_infected_hr1"] == sim.date + assert df.at[person_id, "hp_duration_hr1"] == 0 + assert not df.at[person_id, "hp_persistent_hr1"] + + # Clear hr1 infection + module._clear_single_group(person_id, "hr1") + + assert not module._is_group_infected(person_id, "hr1") + assert pd.isna(df.at[person_id, "hp_date_infected_hr1"]) + assert df.at[person_id, "hp_duration_hr1"] == -1 + assert not df.at[person_id, "hp_persistent_hr1"] + + +def test_hpv_clearance_probability_in_valid_range(sim): + + module = sim.modules["HPV"] + + module.parameters["init_prev_hpv_hr1"] = 0.0 + module.parameters["init_prev_hpv_hr2"] = 0.0 + module.parameters["init_prev_hpv_hr3"] = 0.0 + + sim.make_initial_population(n=500) + df = sim.population.props + + eligible_idx = df.index[df.is_alive & (df.age_years >= 15)] + + if len(eligible_idx) == 0: + pytest.skip("No eligible person aged 15+ in the test population.") + + person_id = eligible_idx[0] + + p_clear = module._get_clearance_probability( + group="hr1", + person_id=person_id, + duration_months=12.0, + interval_months=6.0, + ) + + assert 0.0 <= p_clear <= 1.0 + + +def test_hiv_can_modify_hpv_clearance_probability(sim): + + module = sim.modules["HPV"] + + module.parameters["init_prev_hpv_hr1"] = 0.0 + module.parameters["init_prev_hpv_hr2"] = 0.0 + module.parameters["init_prev_hpv_hr3"] = 0.0 + module.parameters["rr_clear_hiv_no_art"] = 0.5 + + sim.make_initial_population(n=500) + df = sim.population.props + + eligible_idx = df.index[df.is_alive & (df.age_years >= 15)] + + if len(eligible_idx) == 0: + pytest.skip("No eligible person aged 15+ in the test population.") + + person_id = eligible_idx[0] + + # Baseline: HIV negative + if "hv_inf" in df.columns: + df.at[person_id, "hv_inf"] = False + + p_clear_baseline = module._get_clearance_probability( + group="hr1", + person_id=person_id, + duration_months=12.0, + interval_months=6.0, + ) + + # HIV positive and not on ART / not virally suppressed + if "hv_inf" not in df.columns: + pytest.skip("HIV infection column hv_inf not available.") + + df.at[person_id, "hv_inf"] = True + + if "hv_art" in df.columns: + df.at[person_id, "hv_art"] = "not" + + p_clear_hiv = module._get_clearance_probability( + group="hr1", + person_id=person_id, + duration_months=12.0, + interval_months=6.0, + ) + + assert 0.0 <= p_clear_hiv <= 1.0 + assert p_clear_hiv <= p_clear_baseline + + +@pytest.mark.slow +def test_hpv_simulation_runs_and_states_remain_consistent(sim): + + module = sim.modules["HPV"] + + module.parameters["init_prev_hpv_hr1"] = 0.2 + module.parameters["init_prev_hpv_hr2"] = 0.2 + module.parameters["init_prev_hpv_hr3"] = 0.2 + + popsize = 1000 + end_date = Date(2011, 1, 1) + + sim.make_initial_population(n=popsize) + check_dtypes(sim) + + sim.simulate(end_date=end_date) + check_dtypes(sim) + + assert_hpv_state_consistent(sim) + + df = sim.population.props + alive_15plus = df.is_alive & (df.age_years >= 15) + + any_hpv = get_any_hpv_infected(df) + total_prev = any_hpv.loc[alive_15plus].mean() + + assert 0.0 <= total_prev <= 1.0 + + +@pytest.mark.slow +def test_hpv_logging_columns_are_consistent(seed, tmp_path): + + test_log_config = { + "filename": "hpv_test", + "directory": tmp_path, + "custom_levels": { + "*": logging.WARNING, + "tlo.methods.hpv": logging.INFO, + }, + } + + sim = make_sim(seed=seed, log_config=test_log_config) + + module = sim.modules["HPV"] + module.parameters["init_prev_hpv_hr1"] = 0.2 + module.parameters["init_prev_hpv_hr2"] = 0.2 + module.parameters["init_prev_hpv_hr3"] = 0.2 + + sim.make_initial_population(n=1000) + + with warnings.catch_warnings(record=True) as caught_warnings: + warnings.simplefilter("always") + sim.simulate(end_date=Date(2012, 1, 1)) + + inconsistent_column_warnings = [ + w for w in caught_warnings + if ( + "InconsistentLoggedColumnsWarning" in w.category.__name__ + or "Inconsistent columns in logged values" in str(w.message) + ) + ] + + assert len(inconsistent_column_warnings) == 0 +