diff --git a/pyod/models/gmm.py b/pyod/models/gmm.py index 3362abea..72e6ff4a 100644 --- a/pyod/models/gmm.py +++ b/pyod/models/gmm.py @@ -219,6 +219,7 @@ def decision_function(self, X): The anomaly score of the input samples. """ check_is_fitted(self, ["decision_scores_", "threshold_", "labels_"]) + X = check_array(X) # Invert outlier scores. Outliers come with higher outlier scores return invert_order(self.detector_.score_samples(X)) diff --git a/pyod/models/iforest.py b/pyod/models/iforest.py index ffba3d1d..86f49a0b 100644 --- a/pyod/models/iforest.py +++ b/pyod/models/iforest.py @@ -241,6 +241,7 @@ def decision_function(self, X): The anomaly score of the input samples. """ check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_']) + X = check_array(X) # invert outlier scores. Outliers comes with higher outlier scores return invert_order(self.detector_.decision_function(X)) diff --git a/pyod/models/lof.py b/pyod/models/lof.py index 2d9c8df8..89064408 100644 --- a/pyod/models/lof.py +++ b/pyod/models/lof.py @@ -211,6 +211,7 @@ def decision_function(self, X): check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_']) + X = check_array(X, accept_sparse=True) # Invert outlier scores. Outliers comes with higher outlier scores # noinspection PyProtectedMember try: diff --git a/pyod/models/ocsvm.py b/pyod/models/ocsvm.py index 04056245..7940e219 100644 --- a/pyod/models/ocsvm.py +++ b/pyod/models/ocsvm.py @@ -188,6 +188,7 @@ def decision_function(self, X): The anomaly score of the input samples. """ check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_']) + X = check_array(X) # Invert outlier scores. Outliers comes with higher outlier scores return invert_order(self.detector_.decision_function(X)) diff --git a/pyod/test/test_iforest.py b/pyod/test/test_iforest.py index 72dba220..841572cb 100644 --- a/pyod/test/test_iforest.py +++ b/pyod/test/test_iforest.py @@ -167,6 +167,29 @@ def test_feature_importances(self): feature_importances = self.clf.feature_importances_ assert (len(feature_importances) == 2) + def test_dataframe_no_feature_name_warning(self): + """Regression test for GitHub issue #540. + + When a pandas DataFrame is passed to fit/predict, no warning about + feature names should be raised by the underlying sklearn estimator. + """ + import pytest + import warnings + + pd = pytest.importorskip("pandas") + + df_train = pd.DataFrame(self.X_train, columns=['f1', 'f2']) + df_test = pd.DataFrame(self.X_test, columns=['f1', 'f2']) + + clf = IForest(contamination=self.contamination, random_state=42) + clf.fit(df_train) + + with warnings.catch_warnings(): + warnings.simplefilter("error", UserWarning) + clf.decision_function(df_test) + clf.predict(df_test) + clf.predict_proba(df_test) + def tearDown(self): pass diff --git a/pyod/test/test_lof.py b/pyod/test/test_lof.py index 5e3bfed4..e5a88984 100644 --- a/pyod/test/test_lof.py +++ b/pyod/test/test_lof.py @@ -10,6 +10,7 @@ from numpy.testing import assert_array_less from numpy.testing import assert_equal from numpy.testing import assert_raises +from scipy.sparse import csr_matrix from scipy.stats import rankdata from sklearn.base import clone from sklearn.metrics import roc_auc_score @@ -61,6 +62,16 @@ def test_prediction_scores(self): # check performance assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor) + def test_sparse_prediction_scores(self): + sparse_train = csr_matrix(self.X_train) + sparse_test = csr_matrix(self.X_test) + clf = LOF(contamination=self.contamination) + clf.fit(sparse_train) + + pred_scores = clf.decision_function(sparse_test) + + assert_equal(pred_scores.shape[0], self.X_test.shape[0]) + def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape)