From d8cdccc56c878ed11cb15ab55364b73d0ca230a1 Mon Sep 17 00:00:00 2001 From: ugbotueferhire Date: Sun, 24 May 2026 23:07:16 +0100 Subject: [PATCH 1/2] Fix DataFrame feature name warnings in sklearn wrapper models (#540) --- pyod/models/gmm.py | 1 + pyod/models/iforest.py | 1 + pyod/models/lof.py | 1 + pyod/models/ocsvm.py | 1 + pyod/test/test_iforest.py | 21 +++++++++++++++++++++ 5 files changed, 25 insertions(+) diff --git a/pyod/models/gmm.py b/pyod/models/gmm.py index 3362abea..72e6ff4a 100644 --- a/pyod/models/gmm.py +++ b/pyod/models/gmm.py @@ -219,6 +219,7 @@ def decision_function(self, X): The anomaly score of the input samples. """ check_is_fitted(self, ["decision_scores_", "threshold_", "labels_"]) + X = check_array(X) # Invert outlier scores. Outliers come with higher outlier scores return invert_order(self.detector_.score_samples(X)) diff --git a/pyod/models/iforest.py b/pyod/models/iforest.py index ffba3d1d..86f49a0b 100644 --- a/pyod/models/iforest.py +++ b/pyod/models/iforest.py @@ -241,6 +241,7 @@ def decision_function(self, X): The anomaly score of the input samples. """ check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_']) + X = check_array(X) # invert outlier scores. Outliers comes with higher outlier scores return invert_order(self.detector_.decision_function(X)) diff --git a/pyod/models/lof.py b/pyod/models/lof.py index 2d9c8df8..2d80dec1 100644 --- a/pyod/models/lof.py +++ b/pyod/models/lof.py @@ -211,6 +211,7 @@ def decision_function(self, X): check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_']) + X = check_array(X) # Invert outlier scores. Outliers comes with higher outlier scores # noinspection PyProtectedMember try: diff --git a/pyod/models/ocsvm.py b/pyod/models/ocsvm.py index 04056245..7940e219 100644 --- a/pyod/models/ocsvm.py +++ b/pyod/models/ocsvm.py @@ -188,6 +188,7 @@ def decision_function(self, X): The anomaly score of the input samples. """ check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_']) + X = check_array(X) # Invert outlier scores. Outliers comes with higher outlier scores return invert_order(self.detector_.decision_function(X)) diff --git a/pyod/test/test_iforest.py b/pyod/test/test_iforest.py index 72dba220..f4c7e341 100644 --- a/pyod/test/test_iforest.py +++ b/pyod/test/test_iforest.py @@ -167,6 +167,27 @@ def test_feature_importances(self): feature_importances = self.clf.feature_importances_ assert (len(feature_importances) == 2) + def test_dataframe_no_feature_name_warning(self): + """Regression test for GitHub issue #540. + + When a pandas DataFrame is passed to fit/predict, no warning about + feature names should be raised by the underlying sklearn estimator. + """ + import pandas as pd + import warnings + + df_train = pd.DataFrame(self.X_train, columns=['f1', 'f2']) + df_test = pd.DataFrame(self.X_test, columns=['f1', 'f2']) + + clf = IForest(contamination=self.contamination, random_state=42) + clf.fit(df_train) + + with warnings.catch_warnings(): + warnings.simplefilter("error", UserWarning) + clf.decision_function(df_test) + clf.predict(df_test) + clf.predict_proba(df_test) + def tearDown(self): pass From 43faf2f7c58164e9081906d783d548cf83b6afff Mon Sep 17 00:00:00 2001 From: ugbotueferhire Date: Sun, 31 May 2026 14:14:15 +0100 Subject: [PATCH 2/2] Fix review feedback for sparse LOF and pandas test --- pyod/models/lof.py | 2 +- pyod/test/test_iforest.py | 4 +++- pyod/test/test_lof.py | 11 +++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/pyod/models/lof.py b/pyod/models/lof.py index 2d80dec1..89064408 100644 --- a/pyod/models/lof.py +++ b/pyod/models/lof.py @@ -211,7 +211,7 @@ def decision_function(self, X): check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_']) - X = check_array(X) + X = check_array(X, accept_sparse=True) # Invert outlier scores. Outliers comes with higher outlier scores # noinspection PyProtectedMember try: diff --git a/pyod/test/test_iforest.py b/pyod/test/test_iforest.py index f4c7e341..841572cb 100644 --- a/pyod/test/test_iforest.py +++ b/pyod/test/test_iforest.py @@ -173,9 +173,11 @@ def test_dataframe_no_feature_name_warning(self): When a pandas DataFrame is passed to fit/predict, no warning about feature names should be raised by the underlying sklearn estimator. """ - import pandas as pd + import pytest import warnings + pd = pytest.importorskip("pandas") + df_train = pd.DataFrame(self.X_train, columns=['f1', 'f2']) df_test = pd.DataFrame(self.X_test, columns=['f1', 'f2']) diff --git a/pyod/test/test_lof.py b/pyod/test/test_lof.py index 5e3bfed4..e5a88984 100644 --- a/pyod/test/test_lof.py +++ b/pyod/test/test_lof.py @@ -10,6 +10,7 @@ from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises +from scipy.sparse import csr_matrix from scipy.stats import rankdata from sklearn.base import clone from sklearn.metrics import roc_auc_score @@ -61,6 +62,16 @@ def test_prediction_scores(self): # check performance assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor) + def test_sparse_prediction_scores(self): + sparse_train = csr_matrix(self.X_train) + sparse_test = csr_matrix(self.X_test) + clf = LOF(contamination=self.contamination) + clf.fit(sparse_train) + + pred_scores = clf.decision_function(sparse_test) + + assert_equal(pred_scores.shape[0], self.X_test.shape[0]) + def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape)