Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions python-package/xgboost/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
Contributors: https://github.com/dmlc/xgboost/blob/master/CONTRIBUTORS.md
"""

from . import tracker # noqa
from . import collective
from . import (
collective,
interpret,
tracker, # noqa
)
from ._c_api import _py_version
from .core import (
Booster,
Expand Down Expand Up @@ -62,4 +65,6 @@
"XGBRFRegressor",
# collective
"collective",
# interpretability
"interpret",
]
153 changes: 153 additions & 0 deletions python-package/xgboost/interpret.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
"""Interpretability functions for XGBoost models."""

from typing import Optional, Tuple, Union

import numpy as np

from ._typing import ArrayLike, FeatureNames, IterationRange
from .core import Booster, DMatrix


def _as_booster(model: object) -> Booster:
if isinstance(model, Booster):
return model
get_booster = getattr(model, "get_booster", None)
if get_booster is None:
raise TypeError(
"`model` must be an xgboost.Booster or an object with get_booster()."
)
booster = get_booster()
if not isinstance(booster, Booster):
raise TypeError("`model.get_booster()` must return an xgboost.Booster.")
return booster


def _get_iteration_range(
model: object, iteration_range: Optional[IterationRange]
) -> IterationRange:
if iteration_range is None:
get_iteration_range = getattr(model, "_get_iteration_range", None)
if get_iteration_range is not None:
return get_iteration_range(iteration_range)
Comment thread
RAMitchell marked this conversation as resolved.
Outdated
return (0, 0)
return iteration_range


def _as_prediction_dmatrix(
model: object, X: Union[DMatrix, ArrayLike], feature_names: Optional[FeatureNames]
) -> DMatrix:
if isinstance(X, DMatrix):
if feature_names is not None:
X.feature_names = feature_names
return X
Comment thread
RAMitchell marked this conversation as resolved.

return DMatrix(
X,
missing=getattr(model, "missing", None),
nthread=getattr(model, "n_jobs", None),
feature_names=feature_names,
feature_types=getattr(model, "feature_types", None),
enable_categorical=getattr(model, "enable_categorical", False),
)


def _predict_contribs(
booster: Booster,
data: DMatrix,
*,
device: Optional[str],
kwargs: dict,
) -> np.ndarray:
if device is None:
return booster.predict(data, **kwargs)

config = booster.save_config()
try:
booster.set_param({"device": device})
return booster.predict(data, **kwargs)
finally:
booster.load_config(config)
Comment thread
RAMitchell marked this conversation as resolved.


def shap_values( # pylint: disable=too-many-arguments
model: object,
X: Union[DMatrix, ArrayLike],
*,
X_background: Optional[Union[DMatrix, ArrayLike]] = None,
device: Optional[str] = None,
output_margin: bool = False,
iteration_range: Optional[IterationRange] = None,
approx: bool = False,
validate_features: bool = True,
feature_names: Optional[FeatureNames] = None,
return_bias: bool = False,
) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
"""Return SHAP values for an XGBoost model.

This function accepts either a :py:class:`xgboost.Booster` or an sklearn-style
XGBoost model and wraps :py:meth:`xgboost.Booster.predict` with
``pred_contribs=True``. The final bias column returned by ``predict`` is
removed from the default return value.

Parameters
----------
model :
XGBoost booster or sklearn-style XGBoost model.
X :
Input data.
X_background :
Background data for interventional SHAP values. This is reserved for a
future implementation and is currently unsupported.
device :
Optional prediction device override, such as ``"cpu"``, ``"cuda"``, or
``"cuda:0"``. The model's original configuration is restored after
prediction.
output_margin :
Accepted for API compatibility. SHAP contributions currently correspond
to the model margin, matching ``Booster.predict(pred_contribs=True)``.
iteration_range :
Specifies which layer of trees are used in prediction.
approx :
Use approximate SHAP contributions.
validate_features :
Validate feature names between the model and input data.
feature_names :
Optional feature names used when constructing a DMatrix.
return_bias :
When True, return ``(values, bias)``.

Returns
-------
values :
Feature SHAP values, excluding the bias term.
values, bias :
Returned when ``return_bias`` is True.
"""
if X_background is not None:
raise NotImplementedError("`X_background` is not yet supported.")
# Existing SHAP prediction always returns margin contributions. Keep this
# argument in the initial API so callers can use the proposed signature.
_ = output_margin

booster = _as_booster(model)
data = _as_prediction_dmatrix(model, X, feature_names)
contribs = _predict_contribs(
booster,
data,
device=device,
kwargs={
"pred_contribs": True,
"approx_contribs": approx,
"validate_features": validate_features,
"iteration_range": _get_iteration_range(model, iteration_range),
},
)

values = contribs[..., :-1]
bias = contribs[..., -1]
if return_bias:
return values, bias
return values


__all__ = ["shap_values"]
76 changes: 76 additions & 0 deletions tests/python/test_interpret.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import numpy as np
import pytest
import xgboost as xgb
from xgboost import interpret


def test_shap_values_matches_predict() -> None:
rng = np.random.RandomState(1994)
X = rng.randn(16, 4)
y = rng.randn(16)
booster = xgb.train({"tree_method": "hist"}, xgb.DMatrix(X, label=y), 4)

values, bias = interpret.shap_values(booster, X, return_bias=True)
contribs = booster.predict(xgb.DMatrix(X), pred_contribs=True)

np.testing.assert_allclose(values, contribs[:, :-1])
np.testing.assert_allclose(bias, contribs[:, -1])
np.testing.assert_allclose(interpret.shap_values(booster, X), contribs[:, :-1])


def test_shap_values_accepts_sklearn_model() -> None:
rng = np.random.RandomState(1995)
X = rng.randn(16, 4)
y = rng.randn(16)
reg = xgb.XGBRegressor(n_estimators=4, tree_method="hist")
reg.fit(X, y)

values = interpret.shap_values(reg, X)
contribs = reg.get_booster().predict(xgb.DMatrix(X), pred_contribs=True)

np.testing.assert_allclose(values, contribs[:, :-1])


def test_shap_values_rejects_background_data() -> None:
rng = np.random.RandomState(1996)
X = rng.randn(16, 4)
y = rng.randn(16)
booster = xgb.train({"tree_method": "hist"}, xgb.DMatrix(X, label=y), 4)

with pytest.raises(NotImplementedError, match="X_background"):
interpret.shap_values(booster, X, X_background=X)


def test_shap_values_device_override_restores_config() -> None:
rng = np.random.RandomState(1997)
X = rng.randn(16, 4)
y = rng.randn(16)
booster = xgb.train({"tree_method": "hist"}, xgb.DMatrix(X, label=y), 4)
config = booster.save_config()

values = interpret.shap_values(booster, X, device="cpu")
contribs = booster.predict(xgb.DMatrix(X), pred_contribs=True)

np.testing.assert_allclose(values, contribs[:, :-1])
assert booster.save_config() == config


def test_shap_values_device_override_restores_config_on_error() -> None:
rng = np.random.RandomState(1998)
X = rng.randn(16, 4)
y = rng.randn(16)
booster = xgb.train(
{"tree_method": "hist"},
xgb.DMatrix(X, label=y, feature_names=["a", "b", "c", "d"]),
4,
)
config = booster.save_config()

with pytest.raises(ValueError, match="feature_names mismatch"):
interpret.shap_values(
booster,
xgb.DMatrix(X, feature_names=["q", "b", "c", "d"]),
device="cpu",
)

assert booster.save_config() == config
Loading