tompollard · tompollard · Apr 10, 2025 · Apr 10, 2025 · Apr 10, 2025 · Apr 10, 2025
diff --git a/tableone/statistics.py b/tableone/statistics.py
@@ -114,8 +114,8 @@ def _p_test(self, v: str,
                 is_categorical: bool,
                 is_normal: bool,
                 min_observed: int,
-                catlevels: list,
-                h_test: dict):
+                h_test: dict,
+                ttest_equal_var: bool):
         """
         Compute P-Values.
 
@@ -133,8 +133,6 @@ def _p_test(self, v: str,
                 True if the variable is normally distributed.
             min_observed : int
                 Minimum number of values across groups for the variable.
-            catlevels : list
-                Sorted list of levels for categorical variables.
 
         Returns
         ----------
@@ -165,9 +163,9 @@ def _p_test(self, v: str,
         # continuous
         if (is_continuous and is_normal and len(grouped_data) == 2
                 and min_observed >= 2):
-            ptest = 'Two Sample T-test'
+            ptest = 'Welch’s T-test' if not ttest_equal_var else 'Pooled T-test'
             test_stat, pval = stats.ttest_ind(*grouped_data.values(),
-                                              equal_var=False,
+                                              equal_var=ttest_equal_var,
                                               nan_policy="omit")
         elif is_continuous and is_normal:
             # normally distributed

diff --git a/tableone/tableone.py b/tableone/tableone.py
@@ -76,15 +76,15 @@ class TableOne:
         If the argument is set to None (or omitted), we attempt to detect
         continuous variables. Set to an empty list to indicate explicitly
         that there are no variables of this type to be included.
-    groupby : str, optional
-        Optional column for stratifying the final table (default: None).
-    nonnormal : list, optional
-        List of columns that contain non-normal variables (default: None).
+    groupby : str, default: None
+        Optional column for stratifying the final table.
+    nonnormal : list, default: None
+        List of columns that contain non-normal variables.
     min_max: list, optional
         List of variables that should report minimum and maximum, instead of
         standard deviation (for normal) or Q1-Q3 (for non-normal).
-    pval : bool, optional
-        Display computed P-Values (default: False).
+    pval : bool, default: False
+        Display computed P-Values.
     pval_adjust : str, optional
         Method used to adjust P-Values for multiple testing.
         The P-values from the unadjusted table (default when pval=True)
@@ -109,17 +109,21 @@ class TableOne:
         Threshold below which p-values are marked with an asterisk (*).
         For example, if set to 0.05, all p-values less than 0.05 will be
         displayed with a trailing asterisk (e.g., '0.012*').
-    htest_name : bool, optional
-        Display a column with the names of hypothesis tests (default: False).
+    htest_name : bool, default: False
+        Display a column with the names of hypothesis tests.
     htest : dict, optional
         Dictionary of custom hypothesis tests. Keys are variable names and
         values are functions. Functions must take a list of Numpy Arrays as
         the input argument and must return a test result.
         e.g. htest = {'age': myfunc}
-    missing : bool, optional
-        Display a count of null values (default: True).
-    ddof : int, optional
-        Degrees of freedom for standard deviation calculations (default: 1).
+    ttest_equal_var : bool, default=False
+        Whether to assume equal population variances when performing two-sample
+        t-tests. Set to False (default) to use Welch’s t-test, which is more robust
+        to unequal variances.
+    missing : bool, default: True
+        Display a count of null values.
+    ddof : int, default: 1
+        Degrees of freedom for standard deviation calculations.
     rename : dict, optional
         Dictionary of alternative names for variables.
         e.g. `rename = {'sex':'gender', 'trt':'treatment'}`
@@ -135,42 +139,38 @@ class TableOne:
     order : dict, optional
         Specify an order for categorical variables. Key is the variable, value
         is a list of values in order.  {e.g. 'sex': ['f', 'm', 'other']}
-    label_suffix : bool, optional
+    label_suffix : bool, default: True
         Append summary type (e.g. "mean (SD); median [Q1,Q3], n (%); ") to the
-        row label (default: True).
+        row label.
     decimals : int or dict, optional
         Number of decimal places to display. An integer applies the rule to all
         variables (default: 1). A dictionary (e.g. `decimals = {'age': 0)`)
         applies the rule per variable, defaulting to 1 place for unspecified
         variables. For continuous variables, applies to all summary statistics
         (e.g. mean and standard deviation). For categorical variables, applies
         to percentage only.
-    overall : bool, optional
+    overall : bool, default: True
         If True, add an "overall" column to the table. Smd and p-value
         calculations are performed only using stratified columns.
     row_percent : bool, optional
         If True, compute "n (%)" percentages for categorical variables across
         "groupby" rows rather than columns.
-    display_all : bool, optional
+    display_all : bool, default: False
         If True, set pd. display_options to display all columns and rows.
-        (default: False)
-    dip_test : bool, optional
+    dip_test : bool, default: False
         Run Hartigan's Dip Test for multimodality. If variables are found to
         have multimodal distributions, a remark will be added below the
         Table 1.
-        (default: False)
-    normal_test : bool, optional
+    normal_test : bool, default: False
         Test the null hypothesis that a sample come from a normal distribution.
         Uses scipy.stats.normaltest. If variables are found to have non-normal
         distributions, a remark will be added below the Table 1.
-        (default: False)
-    tukey_test : bool, optional
+    tukey_test : bool, default: False
         Run Tukey's test for far outliers. If variables are found to
         have far outliers, a remark will be added below the Table 1.
-        (default: False)
-    include_null : bool, optional
+    include_null : bool, default: True
         Include None/Null values for categorical variables by treating them as a
-        category level. (default: True)
+        category level.
 
 
     Attributes
@@ -225,7 +225,8 @@ def __init__(self, data: pd.DataFrame,
                  tukey_test: bool = False,
                  pval_threshold: Optional[float] = None,
                  include_null: Optional[bool] = True,
-                 pval_digits: int = 3) -> None:
+                 pval_digits: int = 3,
+                 ttest_equal_var: bool = False) -> None:
 
         # Warn about deprecated parameters
         handle_deprecated_parameters(labels, isnull, pval_test_name, remarks)
@@ -240,7 +241,7 @@ def __init__(self, data: pd.DataFrame,
                                                htest, missing, ddof, rename, sort, limit, order,
                                                label_suffix, decimals, smd, overall, row_percent,
                                                dip_test, normal_test, tukey_test, pval_threshold,
-                                               include_null, pval_digits)
+                                               include_null, pval_digits, ttest_equal_var)
 
         # Initialize intermediate tables
         self.initialize_intermediate_tables()
@@ -282,7 +283,7 @@ def initialize_core_attributes(self, data, columns, categorical, continuous, gro
                                    htest, missing, ddof, rename, sort, limit, order,
                                    label_suffix, decimals, smd, overall, row_percent, 
                                    dip_test, normal_test, tukey_test, pval_threshold,
-                                   include_null, pval_digits):
+                                   include_null, pval_digits, ttest_equal_var):
         """
         Initialize attributes.
         """
@@ -299,6 +300,7 @@ def initialize_core_attributes(self, data, columns, categorical, continuous, gro
         self._dip_test = dip_test
         self._groupby = groupby
         self._htest = htest
+        self._ttest_equal_var = ttest_equal_var
         self._isnull = missing
         self._label_suffix = label_suffix
         self._limit = limit
@@ -359,7 +361,8 @@ def create_intermediate_tables(self, data):
             self.htest_table = self.tables.create_htest_table(data, self._continuous, self._categorical,
                                                               self._nonnormal, self._groupby,
                                                               self._groupbylvls, self._htest,
-                                                              self._pval, self._pval_adjust)
+                                                              self._pval, self._pval_adjust,
+                                                              self._ttest_equal_var)
 
         # create overall tables if required
         if self._categorical and self._groupby and self._overall:

diff --git a/tableone/tables.py b/tableone/tables.py
@@ -31,7 +31,8 @@ def create_htest_table(self, data: pd.DataFrame,
                            groupbylvls,
                            htest,
                            pval,
-                           pval_adjust) -> pd.DataFrame:
+                           pval_adjust,
+                           ttest_equal_var) -> pd.DataFrame:
         """
         Create a table containing P-Values for significance tests. Add features
         of the distributions and the P-Values to the dataframe.
@@ -57,6 +58,9 @@ def create_htest_table(self, data: pd.DataFrame,
         df['nonnormal'] = np.where(df.index.isin(nonnormal), True, False)
 
         # list values for each variable, grouped by groupby levels
+        min_observed = 0
+        catlevels = None
+
         for v in df.index:
             is_continuous = df.loc[v]['continuous']
             is_categorical = ~df.loc[v]['continuous']
@@ -89,7 +93,7 @@ def create_htest_table(self, data: pd.DataFrame,
             (df.loc[v, 'P-Value'],
              df.loc[v, 'Test'],
              warning_msg) = self.statistics._p_test(v, grouped_data, is_continuous, is_categorical,  # type: ignore
-                                                    is_normal,  min_observed, catlevels, htest)  # type: ignore
+                                                    is_normal,  min_observed, htest, ttest_equal_var)  # type: ignore
 
             # TODO: Improve method for handling these warnings.
             # Write to logfile?

diff --git a/tests/unit/test_tableone.py b/tests/unit/test_tableone.py
@@ -1411,8 +1411,22 @@ def test_pval_digits_custom_formatting():
     pval = t2.tableone['Grouped by group']['P-Value'].iloc[1]
     assert pval == '0.233*'
 
-
     t3 = TableOne(df, columns=['y'], continuous=['y'], groupby='group', pval=True, pval_digits=1,
                   pval_threshold=0.3)
     pval = t3.tableone['Grouped by group']['P-Value'].iloc[1]
     assert pval == '<0.1*'
+
+
+def test_ttest_equal_var_flag():
+    df = pd.DataFrame({
+        'group': ['A', 'A', 'A', 'B', 'B', 'B'],
+        'x': [1.0, 2.0, 3.0, 20.0, 22.0, 24.0]
+    })
+
+    t1 = TableOne(df, columns=['x'], groupby='group', pval=True, ttest_equal_var=False, pval_digits=5)
+    pval_welch = t1.tableone[('Grouped by group', 'P-Value')].iloc[1]
+    assert pval_welch == "0.00065"
+
+    t2 = TableOne(df, columns=['x'], groupby='group', pval=True, ttest_equal_var=True, pval_digits=5)
+    pval = t2.tableone[('Grouped by group', 'P-Value')].iloc[1]
+    assert pval == "0.00010"