PythonPredictions · theunisjohnsopra · May 13, 2026 · May 20, 2026 · May 20, 2026
diff --git a/.DS_Store b/.DS_Store
diff --git a/.github/.DS_Store b/.github/.DS_Store
diff --git a/.github/workflows/development_CI.yaml b/.github/workflows/development_CI.yaml
@@ -14,12 +14,12 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
 
-    - name: Set up Python 3.8
-      uses: actions/setup-python@v2
+    - name: Set up Python 3.12
+      uses: actions/setup-python@v5
       with:
-        python-version: 3.8
+        python-version: "3.12"
 
     - name: Install dependencies
       run: |

diff --git a/README.md b/README.md
diff --git a/cobra/.DS_Store b/cobra/.DS_Store
diff --git a/cobra/evaluation/evaluator.py b/cobra/evaluation/evaluator.py
@@ -177,7 +177,7 @@ def plot_roc_curve(self, path: str=None, dim: tuple=(12, 8)):
 
         auc = float(self.scalar_metrics.loc["AUC"])
 
-        with plt.style.context("seaborn-whitegrid"):
+        with plt.style.context("seaborn-v0_8-whitegrid"):
 
             fig, ax = plt.subplots(figsize=dim)
 
@@ -255,7 +255,7 @@ def plot_cumulative_response_curve(self, path: str=None, dim: tuple=(12, 8)):
 
         lifts = np.array(lifts)*inc_rate*100
 
-        with plt.style.context("seaborn-ticks"):
+        with plt.style.context("seaborn-v0_8-ticks"):
             fig, ax = plt.subplots(figsize=dim)
 
             plt.bar(x_labels[::-1], lifts, align="center",
@@ -304,7 +304,7 @@ def plot_lift_curve(self, path: str=None, dim: tuple=(12, 8)):
 
         x_labels, lifts, _ = self.lift_curve
 
-        with plt.style.context("seaborn-ticks"):
+        with plt.style.context("seaborn-v0_8-ticks"):
             fig, ax = plt.subplots(figsize=dim)
 
             plt.bar(x_labels[::-1], lifts, align="center",
@@ -345,7 +345,7 @@ def plot_cumulative_gains(self, path: str=None, dim: tuple=(12, 8)):
             Tuple with width and length of the plot.
         """
 
-        with plt.style.context("seaborn-whitegrid"):
+        with plt.style.context("seaborn-v0_8-whitegrid"):
             fig, ax = plt.subplots(figsize=dim)
 
             ax.plot(self.cumulative_gains[0]*100, self.cumulative_gains[1]*100,
@@ -675,7 +675,7 @@ def plot_predictions(self, path: str=None, dim: tuple=(12, 8)):
         y_true = self.y_true
         y_pred = self.y_pred
 
-        with plt.style.context("seaborn-whitegrid"):
+        with plt.style.context("seaborn-v0_8-whitegrid"):
 
             fig, ax = plt.subplots(figsize=dim)
 
@@ -711,7 +711,7 @@ def plot_qq(self, path: str=None, dim: tuple=(12, 8)):
 
             raise NotFittedError(msg.format(self.__class__.__name__))
 
-        with plt.style.context("seaborn-whitegrid"):
+        with plt.style.context("seaborn-v0_8-whitegrid"):
 
             fig, ax = plt.subplots(figsize=dim)
 
@@ -733,4 +733,4 @@ def plot_qq(self, path: str=None, dim: tuple=(12, 8)):
             if path:
                 plt.savefig(path, format="png", dpi=300, bbox_inches="tight")
 
-        plt.show()
+        plt.show()
diff --git a/cobra/evaluation/pigs_tables.py b/cobra/evaluation/pigs_tables.py
@@ -48,6 +48,15 @@ def generate_pig_tables(basetable: pd.DataFrame,
         for column_name in sorted(preprocessed_predictors)
         if column_name not in no_predictor
     ]
+
+    if len(pigs) == 0:
+        raise ValueError(
+            "No preprocessed predictors were provided to generate_pig_tables. "
+            "Make sure you ran preprocessor.transform(...) successfully and "
+            "that preprocessed_predictors contains columns ending in '_bin' "
+            "or '_processed'."
+        )
+
     output = pd.concat(pigs, ignore_index=True)
     return output
 
@@ -145,16 +154,15 @@ def plot_incidence(pig_tables: pd.DataFrame,
                 'the same set of variables.')
 
         df_plot['label'] = df_plot['label'].astype('category')
-        df_plot['label'].cat.reorder_categories(column_order,
-                                                inplace=True)
+        df_plot['label'] = df_plot['label'].cat.reorder_categories(column_order)
 
         df_plot.sort_values(by=['label'], ascending=True, inplace=True)
         df_plot.reset_index(inplace=True)
     else:
         df_plot.sort_values(by=['avg_target'], ascending=False, inplace=True)
         df_plot.reset_index(inplace=True)
 
-    with plt.style.context("seaborn-ticks"):
+    with plt.style.context("seaborn-v0_8-ticks"):
         fig, ax = plt.subplots(figsize=dim)
 
         # --------------------------

diff --git a/cobra/evaluation/plotting_utils.py b/cobra/evaluation/plotting_utils.py
@@ -39,7 +39,7 @@ def plot_univariate_predictor_quality(df_metric: pd.DataFrame,
                  value_name=metric)
 
     # plot data
-    with plt.style.context("seaborn-ticks"):
+    with plt.style.context("seaborn-v0_8-ticks"):
         fig, ax = plt.subplots(figsize=dim)
 
         ax = sns.barplot(x=metric, y="predictor", hue="split", data=df)
@@ -122,7 +122,7 @@ def plot_performance_curves(model_performance: pd.DataFrame,
                               max(model_performance['selection_performance']),
                               max(model_performance['validation_performance'])), 1)
 
-    with plt.style.context("seaborn-whitegrid"):
+    with plt.style.context("seaborn-v0_8-whitegrid"):
 
         fig, ax = plt.subplots(figsize=dim)
 
@@ -178,7 +178,7 @@ def plot_variable_importance(df_variable_importance: pd.DataFrame,
     path : str, optional
         Path to store the figure.
     """
-    with plt.style.context("seaborn-ticks"):
+    with plt.style.context("seaborn-v0_8-ticks"):
         fig, ax = plt.subplots(figsize=dim)
         ax = sns.barplot(x="importance", y="predictor",
                          data=df_variable_importance,

diff --git a/cobra/preprocessing/categorical_data_processor.py b/cobra/preprocessing/categorical_data_processor.py
@@ -420,6 +420,10 @@ def _replace_missings(data: pd.DataFrame,
             temp = data[column_names]
         else:
             temp = data.copy()
+
+        # Cast to object first so mixed/string replacements remain valid
+        # for numeric and boolean categorical columns under newer pandas.
+        temp = temp.astype(object)
         temp = temp.fillna("Missing")
         temp = temp.replace(regex, "")
         temp = temp.replace("", "Missing")
@@ -462,7 +466,7 @@ def _compute_p_value(X: pd.Series, y: pd.Series, category: str,
 
         if model_type == "classification":
             contingency_table = pd.crosstab(index=df["other_categories"], columns=df["y"],
-                                            margins=False)
+                                            margins=False).astype(np.float64)
 
             # if true, we scale the "other" categories
             if scale_contingency_table:
@@ -471,7 +475,8 @@ def _compute_p_value(X: pd.Series, y: pd.Series, category: str,
 
                 contingency_table.iloc[1, 0] = (1-incidence_mean) * size_other_cats
                 contingency_table.iloc[1, 1] = incidence_mean * size_other_cats
-                contingency_table = contingency_table.values.astype(np.int64)
+
+            contingency_table = contingency_table.to_numpy(dtype=np.float64)
 
             pval = stats.chi2_contingency(contingency_table, correction=False)[1]
 

diff --git a/cobra/preprocessing/kbins_discretizer.py b/cobra/preprocessing/kbins_discretizer.py
@@ -314,24 +314,25 @@ def _transform_column(self, data: pd.DataFrame,
 
         column_name_bin = column_name + "_bin"
 
-        # use pd.cut to compute bins
-        data[column_name_bin] = pd.cut(x=data[column_name],
-                                              bins=interval_idx)
+        # Build the categorical Series fully first, then assign it once.
+        # Newer pandas is stricter about overwriting an existing categorical
+        # column with a different set of categories.
+        binned = pd.cut(x=data[column_name], bins=interval_idx)
 
         # Rename bins so that the output has a proper format
         bin_labels = self._create_bin_labels(bins)
+        binned = binned.cat.rename_categories(bin_labels)
 
-        data[column_name_bin] = (data[column_name_bin]
-                                        .cat.rename_categories(bin_labels))
-
-        if data[column_name_bin].isnull().sum() > 0:
+        if binned.isnull().sum() > 0:
 
             # Add an additional bin for missing values
-            data[column_name_bin]=data[column_name_bin].cat.add_categories(["Missing"])
+            binned = binned.cat.add_categories(["Missing"])
 
             # Replace NULL with "Missing"
             # Otherwise these will be ignored in groupby
-            data[column_name_bin].fillna("Missing", inplace=True)
+            binned = binned.fillna("Missing")
+
+        data[column_name_bin] = binned
 
         return data
 

diff --git a/cobra/preprocessing/preprocessor.py b/cobra/preprocessing/preprocessor.py
@@ -249,30 +249,42 @@ def get_continuous_and_discrete_columns(
                 "id_col_name is equal to None. If there is no id column ignore this warning"
             )
 
-        # find continuous_vars and discrete_vars in the dateframe
-        col_dtypes = df.dtypes
+        excluded_columns = {id_col_name, target_column_name}
+
         discrete_vars = [
             col
-            for col in col_dtypes[col_dtypes == object].index.tolist()
-            if col not in [id_col_name, target_column_name]
+            for col in df.columns
+            if col not in excluded_columns
+            and (
+                pd.api.types.is_object_dtype(df[col])
+                or pd.api.types.is_string_dtype(df[col])
+                or isinstance(df[col].dtype, pd.CategoricalDtype)
+                or pd.api.types.is_bool_dtype(df[col])
+            )
         ]
 
         for col in df.columns:
-            if col not in discrete_vars and col not in [
-                id_col_name,
-                target_column_name,
-            ]:  # omit discrete because a string, and target
-                val_counts = df[col].nunique()
-                if (
-                    val_counts > 1 and val_counts <= 10
-                ):  # the column contains less than 10 different values
-                    discrete_vars.append(col)
-
-        continuous_vars = list(
-            set(df.columns)
-            - set(discrete_vars)
-            - set([id_col_name, target_column_name])
-        )
+            if col in discrete_vars or col in excluded_columns:
+                continue
+            if not pd.api.types.is_numeric_dtype(df[col]):
+                continue
+            if pd.api.types.is_bool_dtype(df[col]):
+                continue
+
+            val_counts = df[col].nunique()
+            if (
+                val_counts > 1 and val_counts <= 10
+            ):  # the column contains less than 10 different values
+                discrete_vars.append(col)
+
+        continuous_vars = [
+            col
+            for col in df.columns
+            if col not in excluded_columns
+            and col not in discrete_vars
+            and pd.api.types.is_numeric_dtype(df[col])
+            and not pd.api.types.is_bool_dtype(df[col])
+        ]
         log.warning(
             f"""Cobra automaticaly assumes that following variables are 
             discrete: {discrete_vars}

diff --git a/cobra/preprocessing/target_encoder.py b/cobra/preprocessing/target_encoder.py
@@ -270,29 +270,25 @@ def _transform_column(self, data: pd.DataFrame,
         _data = data.copy()
         new_column = TargetEncoder._clean_column_name(column_name)
 
-        # Convert dtype to float, because when the original dtype
-        # is of type "category", the resulting dtype would otherwise also be of
-        # type "category":
-        _data[new_column] = (_data[column_name].map(self._mapping[column_name])
-                            .astype("float"))
+        # Convert dtype to float up front so encoded values are written into
+        # a fresh float Series, which avoids dtype collisions on newer pandas.
+        encoded = _data[column_name].map(self._mapping[column_name]).astype("float")
 
         # In case of categorical data, it could be that new categories will
         # emerge which were not present in the train set, so this will result
         # in missing values, which should be replaced according to the
         # configured imputation strategy:
-        if _data[new_column].isnull().sum() > 0:
+        if encoded.isnull().sum() > 0:
             if self.imputation_strategy == "mean":
-                _data[new_column].fillna(self._global_mean,
-                                        inplace=True)
+                encoded = encoded.fillna(self._global_mean)
             elif self.imputation_strategy == "min":
-                _data[new_column].fillna(_data[new_column].min(),
-                                        inplace=True)
+                encoded = encoded.fillna(encoded.min())
             elif self.imputation_strategy == "max":
-                _data[new_column].fillna(_data[new_column].max(),
-                                        inplace=True)
+                encoded = encoded.fillna(encoded.max())
             elif self.imputation_strategy == "median":
-                _data[new_column].fillna(_data[new_column].median(),
-                                        inplace=True)
+                encoded = encoded.fillna(encoded.median())
+
+        _data[new_column] = encoded
 
         return _data
 

diff --git a/cobra/version.py b/cobra/version.py
@@ -1 +1 @@
-__version__ = "1.1.1"
+__version__ = "1.1.2"
diff --git a/main.py b/main.py
@@ -0,0 +1,6 @@
+def main():
+    print("Hello from cobra!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "cobra"
+version = "1.1.2"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "ipykernel>=7.2.0",
+    "jupyter>=1.1.1",
+    "matplotlib>=3.8.0",
+    "numpy>=1.26.0",
+    "pandas>=2.1.0",
+    "pythonpredictions-cobra>=1.1.0",
+    "scikit-learn>=1.2.0",
+    "scipy>=1.11.2",
+    "seaborn>=0.13.2",
+    "tqdm>=4.62.2",
+]
diff --git a/requirements copy.txt b/requirements copy.txt
@@ -0,0 +1,7 @@
+numpy>=1.19.4
+pandas>=1.1.5,<2.0.0
+scipy>=1.5.4
+scikit-learn>=1.2.0
+matplotlib>=3.4.3
+seaborn>=0.11.0
+tqdm>=4.62.2
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,7 @@
-numpy>=1.19.4
-pandas>=1.1.5,<2.0.0
-scipy>=1.5.4
+numpy>=1.26.0
+pandas>=2.1.0
+scipy>=1.11.2
 scikit-learn>=1.2.0
-matplotlib>=3.4.3
-seaborn>=0.11.0
-tqdm>=4.62.2
+matplotlib>=3.8.0
+seaborn>=0.13.2
+tqdm>=4.62.2
diff --git a/setup.py b/setup.py
@@ -23,12 +23,13 @@
     license="MIT",
     author="Python Predictions",
     author_email="cobra@pythonpredictions.com",
+    python_requires=">=3.10",
     install_requires=[
-        "numpy>=1.19.4",
-        "pandas>=1.1.5,<2.0.0",
-        "scipy>=1.5.4",
-        "scikit-learn>=0.24.1",
-        "matplotlib>=3.4.3",
-        "seaborn>=0.11.0",
+        "numpy>=1.26.0",
+        "pandas>=2.1.0",
+        "scipy>=1.11.2",
+        "scikit-learn>=1.2.0",
+        "matplotlib>=3.8.0",
+        "seaborn>=0.13.2",
         "tqdm>=4.62.2"]
 )
diff --git a/tests/.DS_Store b/tests/.DS_Store