From 6bfabb41917755a839475fa1714981c37798747f Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Sun, 1 Mar 2026 22:31:07 +0000 Subject: [PATCH 1/3] Improve performance by reducing unnecessary data access - don't convert mother+newborn info dictionaries to dataframe (we only need one entry) - don't create unnecessary series/dataframes - get only the required properties for the model's predict function - update predict function to operate on single individual --- src/tlo/methods/postnatal_supervisor.py | 33 ++++++++++++---------- src/tlo/methods/postnatal_supervisor_lm.py | 22 ++++++++++++++- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/src/tlo/methods/postnatal_supervisor.py b/src/tlo/methods/postnatal_supervisor.py index c29e63f9d7..4e232468d7 100644 --- a/src/tlo/methods/postnatal_supervisor.py +++ b/src/tlo/methods/postnatal_supervisor.py @@ -739,31 +739,34 @@ def apply_risk_of_neonatal_complications_in_week_one(self, child_id, mother_id): :return: """ df = self.sim.population.props - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - nci = self.sim.modules['NewbornOutcomes'].newborn_care_info - mni_df = pd.DataFrame.from_dict(mni, orient='index') - nci_df = pd.DataFrame.from_dict(nci, orient='index') + pregnancy_supervisor = self.sim.modules['PregnancySupervisor'] + mother_and_newborn_info = pregnancy_supervisor.mother_and_newborn_info + newborn_care_info = self.sim.modules['NewbornOutcomes'].newborn_care_info - # Set external variables used in the linear model equation - maternal_prom = pd.Series(df.at[mother_id, 'ps_premature_rupture_of_membranes'], index=df.loc[[child_id]].index) - received_abx_for_prom = pd.Series(nci_df.at[child_id, 'abx_for_prom_given'], index=df.loc[[child_id]].index) + maternal_prom = df.at[mother_id, 'ps_premature_rupture_of_membranes'] + received_abx_for_prom = newborn_care_info[child_id]['abx_for_prom_given'] - if mother_id in mni_df.index: - chorio_in_preg = pd.Series(mni_df.at[mother_id, 'chorio_in_preg'], index=df.loc[[child_id]].index) + if mother_id in mother_and_newborn_info: + chorio_in_preg = mother_and_newborn_info[mother_id]['chorio_in_preg'] else: - chorio_in_preg = pd.Series(False, index=df.loc[[child_id]].index) + chorio_in_preg = False # We then apply a risk that this womans newborn will develop sepsis during week one risk_eons = self.pn_linear_models['early_onset_neonatal_sepsis_week_1'].predict( - df.loc[[child_id]], received_abx_for_prom=received_abx_for_prom, + df.loc[ + [child_id], + ['nb_early_preterm', 'nb_late_preterm', 'nb_clean_birth', 'nb_early_init_breastfeeding'] + ], + received_abx_for_prom=received_abx_for_prom, maternal_chorioamnionitis=chorio_in_preg, - maternal_prom=maternal_prom)[child_id] + maternal_prom=maternal_prom + )[child_id] - # Update the df, mni and log the case + # Update the df, mother_and_newborn_info and log the case if self.rng.random_sample() < risk_eons: df.at[child_id, 'pn_sepsis_early_neonatal'] = True - self.sim.modules['NewbornOutcomes'].newborn_care_info[child_id]['sepsis_postnatal'] = True - self.sim.modules['PregnancySupervisor'].mnh_outcome_counter['early_onset_sepsis'] += 1 + newborn_care_info[child_id]['sepsis_postnatal'] = True + pregnancy_supervisor.mnh_outcome_counter['early_onset_sepsis'] += 1 def set_postnatal_complications_neonates(self, upper_and_lower_day_limits): """ diff --git a/src/tlo/methods/postnatal_supervisor_lm.py b/src/tlo/methods/postnatal_supervisor_lm.py index e5abad49eb..ada2d79541 100644 --- a/src/tlo/methods/postnatal_supervisor_lm.py +++ b/src/tlo/methods/postnatal_supervisor_lm.py @@ -112,7 +112,27 @@ def predict_early_onset_neonatal_sepsis_week_1(self, df, rng=None, **externals): antibiotic therapy for PROM, clean birth practices, cord care and early initiation of breastfeeding """ params = self.parameters - result = pd.Series(data=params['prob_early_onset_neonatal_sepsis_week_1'], index=df.index) + series = df.iloc[0] + + result = params['prob_early_onset_neonatal_sepsis_week_1'] + if externals['maternal_chorioamnionitis']: + result *= params['rr_eons_maternal_chorio'] + if externals['maternal_prom']: + result *= params['rr_eons_maternal_prom'] + if series.nb_early_preterm: + result *= params['rr_eons_preterm_neonate'] + if series.nb_late_preterm: + result *= params['rr_eons_preterm_neonate'] + + if externals['received_abx_for_prom']: + result *= params['treatment_effect_abx_prom'] + if series.nb_clean_birth: + result *= params['treatment_effect_clean_birth'] + if series.nb_early_init_breastfeeding: + result *= params['treatment_effect_early_init_bf'] + + return pd.Series(data=result, index=df.index) + result[externals['maternal_chorioamnionitis']] *= params['rr_eons_maternal_chorio'] result[externals['maternal_prom']] *= params['rr_eons_maternal_prom'] From 80bfd986531af304030f8431c3bb076dce6ceb73 Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Mon, 2 Mar 2026 22:35:46 +0000 Subject: [PATCH 2/3] Reduce unnecessary data access, and don't repeatedly get new mother dataframes for linear models --- src/tlo/methods/postnatal_supervisor.py | 59 +++++++++++++++++-------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/src/tlo/methods/postnatal_supervisor.py b/src/tlo/methods/postnatal_supervisor.py index 4e232468d7..0c2d610448 100644 --- a/src/tlo/methods/postnatal_supervisor.py +++ b/src/tlo/methods/postnatal_supervisor.py @@ -1030,13 +1030,33 @@ def __init__(self, module, individual_id): def apply(self, individual_id): df = self.sim.population.props + + if not df.at[individual_id, 'is_alive']: + return + params = self.module.current_parameters mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info store_dalys_in_mni = pregnancy_helper_functions.store_dalys_in_mni - mother = df.loc[individual_id] - if not mother.is_alive: - return + properties = ['li_bmi', + 'la_is_postpartum', 'la_date_most_recent_delivery', 'la_iron_folic_acid_postnatal', + 'pn_anaemia_following_pregnancy', 'pn_htn_disorders'] + + if 'Hiv' in self.module.sim.modules: + properties += ['hv_inf', 'hv_art'] + + if 'Malaria' in self.module.sim.modules: + properties += ['ma_is_infected'] + + if 'CardioMetabolicDisorders' in self.module.sim.modules: + properties += ['nc_hypertension', 'nc_diabetes'] + + mother_df = df.loc[ + [individual_id], + properties + ] + + mother = mother_df.iloc[0] # Run some checks on the mothers arriving to this event after delivery if (not mother.la_is_postpartum or @@ -1055,14 +1075,14 @@ def apply(self, individual_id): # -----------------------------------MATERNAL SEPSIS -------------------------------------------------------- # Define external variable for linear model - mode_of_delivery = pd.Series(mni[individual_id]['mode_of_delivery'], index=df.loc[[individual_id]].index) + mode_of_delivery = pd.Series(mni[individual_id]['mode_of_delivery'], index=[individual_id]) # Determine individual risk of sepsis for each possible cause risk_sepsis_endometritis = self.module.pn_linear_models['sepsis_endometritis_late_postpartum'].predict( - df.loc[[individual_id]], mode_of_delivery=mode_of_delivery)[individual_id] + mother_df, mode_of_delivery=mode_of_delivery)[individual_id] risk_sepsis_skin_soft_tissue = self.module.pn_linear_models['sepsis_sst_late_postpartum'].predict( - df.loc[[individual_id]], mode_of_delivery=mode_of_delivery)[individual_id] + mother_df, mode_of_delivery=mode_of_delivery)[individual_id] risk_sepsis_urinary_tract = params['prob_late_sepsis_urinary_tract'] @@ -1083,10 +1103,10 @@ def apply(self, individual_id): # ---------------------------------------- SECONDARY PPH ------------------------------------------------ # Next we apply risk of secondary postpartum bleeding, first define external variables - endometritis = pd.Series(mni[individual_id]['endo_pp'], index=df.loc[[individual_id]].index) + endometritis = pd.Series(mni[individual_id]['endo_pp'], index=[individual_id]) - risk_secondary_pph = self.module.pn_linear_models['secondary_postpartum_haem'].predict(df.loc[[ - individual_id]], endometritis=endometritis)[individual_id] + risk_secondary_pph = self.module.pn_linear_models['secondary_postpartum_haem'].predict( + mother_df, endometritis=endometritis)[individual_id] if risk_secondary_pph > self.module.rng.random_sample(): df.at[individual_id, 'pn_postpartum_haem_secondary'] = True @@ -1097,7 +1117,7 @@ def apply(self, individual_id): # And then risk of developing anaemia... if mother.pn_anaemia_following_pregnancy == 'none': risk_anaemia_after_pregnancy = self.module.pn_linear_models['anaemia_after_pregnancy'].predict( - df.loc[[individual_id]])[individual_id] + mother_df)[individual_id] if risk_anaemia_after_pregnancy > self.module.rng.random_sample(): random_choice_severity = self.module.rng.choice(['mild', 'moderate', 'severe'], @@ -1144,7 +1164,7 @@ def apply(self, individual_id): current_status = df.loc[[individual_id], 'pn_htn_disorders'] new_status = util.transition_states(current_status, prob_matrix, self.module.rng) - df.loc[[individual_id], "pn_htn_disorders"] = new_status + df.at[individual_id, "pn_htn_disorders"] = new_status[individual_id] def log_new_progressed_cases(disease): assess_status_change = (current_status != disease) & (new_status == disease) @@ -1167,8 +1187,8 @@ def log_new_progressed_cases(disease): # ---------------------------- RISK OF POSTPARTUM PRE-ECLAMPSIA/HYPERTENSION ---------------------------- # Women who are normatensive after delivery may develop new hypertension for the first time after birth if df.at[individual_id, 'pn_htn_disorders'] == 'none': - risk_pe_after_pregnancy = self.module.pn_linear_models['pre_eclampsia_pn'].predict(df.loc[[ - individual_id]])[individual_id] + risk_pe_after_pregnancy = self.module.pn_linear_models['pre_eclampsia_pn'].predict( + mother_df)[individual_id] if risk_pe_after_pregnancy > self.module.rng.random_sample(): df.at[individual_id, 'pn_htn_disorders'] = 'mild_pre_eclamp' @@ -1193,12 +1213,15 @@ def log_new_progressed_cases(disease): # anyway she will attend now. If she was not predicted to attend but now develops complications she may # choose to seek care - if (df.at[individual_id, 'pn_sepsis_late_postpartum'] or + if ( + df.at[individual_id, 'pn_sepsis_late_postpartum'] or df.at[individual_id, 'pn_postpartum_haem_secondary'] or - ((df.at[individual_id, 'pn_htn_disorders'] == 'severe_pre_eclamp') and - mni[individual_id]['new_onset_spe']) or - (df.at[individual_id, 'pn_htn_disorders'] == 'eclampsia')): - + ( + (df.at[individual_id, 'pn_htn_disorders'] == 'severe_pre_eclamp') and + mni[individual_id]['new_onset_spe'] + ) or + (df.at[individual_id, 'pn_htn_disorders'] == 'eclampsia') + ): # We assume the probability of care seeking is higher in women with complications if (mni[individual_id]['will_receive_pnc'] == 'late') or (self.module.rng.random_sample() < params['prob_care_seeking_postnatal_emergency']): From f9535dfe25223a1979debd6046dd9f423226e954 Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Fri, 6 Mar 2026 09:52:57 +0000 Subject: [PATCH 3/3] Optimise postnatal_supervisor by eliminating DataFrame conversion and reducing data access - Access mother_and_newborn_info dict directly per person instead of DataFrame - Build Series from lists rather than creating intermediate DataFrames - Remove redundant Series wrapper for DataFrame column access --- src/tlo/methods/postnatal_supervisor.py | 28 ++++++++++--------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/src/tlo/methods/postnatal_supervisor.py b/src/tlo/methods/postnatal_supervisor.py index 0c2d610448..499584f94e 100644 --- a/src/tlo/methods/postnatal_supervisor.py +++ b/src/tlo/methods/postnatal_supervisor.py @@ -438,27 +438,21 @@ def apply_linear_model(self, lm, df): :return: Series with same index containing outcomes (bool) """ mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - mni_df = pd.DataFrame.from_dict(mni, orient='index') - # Here we define the external variables as series to pass to the linear model - mode_of_delivery = pd.Series(False, index=df.index) - received_abx_for_prom = pd.Series(False, index=df.index) - endometritis = pd.Series(False, index=df.index) - chorio_in_preg = pd.Series(False, index=df.index) + keys = ['mode_of_delivery', 'abx_for_prom_given', 'endo_pp', 'chorio_in_preg'] + data = {key: [] for key in keys} - if 'mode_of_delivery' in mni_df.columns: - mode_of_delivery = pd.Series(mni_df['mode_of_delivery'], index=df.index) + for person in df.index: + person_data = mni.get(person, {}) + for key in keys: + data[key].append(person_data.get(key, False)) - if 'abx_for_prom_given' in mni_df.columns: - received_abx_for_prom = pd.Series(mni_df['abx_for_prom_given'], index=df.index) + mode_of_delivery = pd.Series(data['mode_of_delivery'], index=df.index) + received_abx_for_prom = pd.Series(data['abx_for_prom_given'], index=df.index) + endometritis = pd.Series(data['endo_pp'], index=df.index) + chorio_in_preg = pd.Series(data['chorio_in_preg'], index=df.index) - if 'chorio_in_preg' in mni_df.columns: - chorio_in_preg = pd.Series(mni_df['chorio_in_preg'], index=df.index) - - if 'endo_pp' in mni_df.columns: - endometritis = pd.Series(mni_df['endo_pp'], index=df.index) - - maternal_prom = pd.Series(df['ps_premature_rupture_of_membranes'], index=df.index) + maternal_prom = df.ps_premature_rupture_of_membranes return self.rng.random_sample(len(df)) < lm.predict(df, mode_of_delivery=mode_of_delivery,