skrub-data · rcap107 · Jun 16, 2026 · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -140,6 +140,9 @@ Changes
   <jeromedockes>`.
 - The ``exclude_cols`` of :meth:`DataOp.skb.apply` can now be a DataOp.
   :pr:`2050` by :user:`Jérôme Dockès <jeromedockes>`.
+- Skrub estimators now correctly show links to the documentation in the HTML
+  representation that is generated for notebooks. :pr:`2036` by :user:`Riccardo
+  Cappuzzo <rcap107>`.
 
 Bugfixes
 --------

diff --git a/skrub/_agg_joiner.py b/skrub/_agg_joiner.py
@@ -10,14 +10,15 @@
 
 import numpy as np
 import pandas as pd
-from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.base import TransformerMixin
 from sklearn.utils.validation import check_is_fitted
 
 from skrub import _dataframe as sbd
 from skrub import _join_utils, _utils
 from skrub import selectors as s
 from skrub._dispatch import dispatch, raise_dispatch_unregistered_type
 
+from ._base import SkrubBaseTransformer
 from ._check_input import CheckInputDataFrame
 
 try:
@@ -168,7 +169,7 @@ def check_other_inputs(operations, suffix):
     return operations, suffix
 
 
-class AggJoiner(TransformerMixin, BaseEstimator):
+class AggJoiner(TransformerMixin, SkrubBaseTransformer):
     """Aggregate an auxiliary dataframe before joining it on a base dataframe.
 
     Apply numerical and categorical aggregation operations on the columns (i.e. `cols`)
@@ -407,7 +408,7 @@ def get_feature_names_out(self):
         return self.all_outputs_
 
 
-class AggTarget(TransformerMixin, BaseEstimator):
+class AggTarget(TransformerMixin, SkrubBaseTransformer):
     """Aggregate a target `y` before joining its aggregation on a base dataframe.
 
     Accepts :obj:`pandas.DataFrame` or :class:`polars.DataFrame` inputs.

diff --git a/skrub/_apply_to_cols.py b/skrub/_apply_to_cols.py
@@ -3,17 +3,19 @@
 based on the type of the transformer passed to it.
 """
 
-from sklearn.base import BaseEstimator, TransformerMixin, check_is_fitted
+from sklearn.base import TransformerMixin, check_is_fitted
 
 from . import selectors
 from ._apply_to_each_col import ApplyToEachCol
 from ._apply_to_sub_frame import ApplyToSubFrame
+from ._base import SkrubBaseTransformer
+from ._sklearn_compat import _VisualBlock
 from ._wrap_transformer import wrap_transformer
 
 _SELECT_ALL_COLUMNS = selectors.all()
 
 
-class ApplyToCols(TransformerMixin, BaseEstimator):
+class ApplyToCols(TransformerMixin, SkrubBaseTransformer):
     """
     Apply a transformer to selected columns in a dataframe.
 
@@ -431,6 +433,20 @@ def get_feature_names_out(self, input_features=None):
 
         return self._wrapped_transformer.get_feature_names_out(input_features)
 
+    def _sk_visual_block_(self):
+        # This is needed because cases like ApplyToCols(TableVectorizer())
+        # would show the TableVectorizer as a parallel block, which would not
+        # add the documentation link. With this override the problem is fixed.
+        # The same problem happens for ApplyToCols(ApplyToCols(...)) (not that
+        # someone should do that, but it is possible)
+
+        return _VisualBlock(
+            "serial",
+            [self.transformer],
+            names=[self.transformer.__class__.__name__],
+            name_details=[str(self.transformer)],
+        )
+
     def __getattr__(self, name):
         if name == "transformers_" and isinstance(
             getattr(self, "_wrapped_transformer", None), ApplyToSubFrame

diff --git a/skrub/_apply_to_each_col.py b/skrub/_apply_to_each_col.py
@@ -1,11 +1,12 @@
 import itertools
 
 from joblib import Parallel, delayed
-from sklearn.base import BaseEstimator, TransformerMixin, clone
+from sklearn.base import TransformerMixin, clone
 from sklearn.utils.validation import check_is_fitted
 
 from . import _dataframe as sbd
 from . import _utils, selectors
+from ._base import SkrubBaseTransformer
 from ._join_utils import pick_column_names
 from ._single_column_transformer import RejectColumn, is_single_column_transformer
 
@@ -15,7 +16,7 @@
 _SELECT_ALL_COLUMNS = selectors.all()
 
 
-class ApplyToEachCol(BaseEstimator, TransformerMixin):
+class ApplyToEachCol(SkrubBaseTransformer, TransformerMixin):
     """
     Map a transformer to columns in a dataframe.
 

diff --git a/skrub/_apply_to_sub_frame.py b/skrub/_apply_to_sub_frame.py
@@ -1,8 +1,9 @@
-from sklearn.base import BaseEstimator, TransformerMixin, clone
+from sklearn.base import TransformerMixin, clone
 from sklearn.utils.validation import check_is_fitted
 
 from . import _dataframe as sbd
 from . import _utils, selectors
+from ._base import SkrubBaseTransformer
 from ._join_utils import pick_column_names
 
 __all__ = ["ApplyToSubFrame"]
@@ -11,7 +12,7 @@
 _SELECT_ALL_COLUMNS = selectors.all()
 
 
-class ApplyToSubFrame(TransformerMixin, BaseEstimator):
+class ApplyToSubFrame(TransformerMixin, SkrubBaseTransformer):
     """Apply a transformer to part of a dataframe.
 
     A subset of the dataframe is selected and passed to the transformer (as a

diff --git a/skrub/_base.py b/skrub/_base.py
@@ -0,0 +1,24 @@
+from sklearn.base import BaseEstimator
+
+
+class SkrubBaseTransformer(BaseEstimator):
+    """Base class for all skrub transformers.
+
+    This is a class that all skrub transformers inherit from.
+    For the moment, it's only used for the documentation url, but eventually
+    it will be used for other things as well.
+    """
+
+    _doc_link_module = "skrub"
+
+    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
+    # which also defines _doc_link_template as a property, and we want to be able
+    # to override it.
+    @property
+    def _doc_link_template(self):
+        return getattr(
+            self,
+            "__doc_link_template",
+            "https://skrub-data.org/stable/reference/generated/"
+            "{estimator_module}.{estimator_name}.html",
+        )
diff --git a/skrub/_check_input.py b/skrub/_check_input.py
@@ -2,11 +2,12 @@
 
 import numpy as np
 import pandas as pd
-from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.base import TransformerMixin
 from sklearn.utils.validation import check_is_fitted
 
 from . import _dataframe as sbd
 from . import _join_utils, _utils
+from ._base import SkrubBaseTransformer
 from ._dispatch import dispatch
 
 __all__ = ["CheckInputDataFrame", "cast_column_names_to_strings"]
@@ -72,7 +73,7 @@ def _check_is_dataframe(df):
     return df
 
 
-class CheckInputDataFrame(TransformerMixin, BaseEstimator):
+class CheckInputDataFrame(TransformerMixin, SkrubBaseTransformer):
     """Check the dataframe entering a skrub pipeline.
 
     This transformer ensures that:

diff --git a/skrub/_data_ops/_estimator.py b/skrub/_data_ops/_estimator.py
@@ -16,6 +16,7 @@
 
 from .. import _dataframe as sbd
 from .. import _join_utils
+from .._base import SkrubBaseTransformer
 from .._sklearn_compat import _safe_indexing, _VisualBlock
 from .._utils import set_module
 from . import _evaluation
@@ -179,7 +180,7 @@ def _get_params_html(self, deep=True, doc_link=""):
 
 
 @set_module("skrub")
-class SkrubLearner(_DataOpWrapperMixin, BaseEstimator):
+class SkrubLearner(_DataOpWrapperMixin, SkrubBaseTransformer):
     """Learner that evaluates a skrub DataOp.
 
     This class is not meant to be instantiated manually, ``SkrubLearner``

diff --git a/skrub/_data_ops/tests/test_estimators.py b/skrub/_data_ops/tests/test_estimators.py
@@ -1460,3 +1460,13 @@ def load_data():
     pred = X.skb.apply(DummyClassifier(), y=y)
     search = pred.skb.make_grid_search(scoring="roc_auc").fit({})
     assert search.results_.shape[0] == 1
+
+
+def test_learner_docstring():
+    data_op, data = get_data_op_and_data("simple")
+    split = data_op.skb.train_test_split(data)
+    learner = data_op.skb.make_learner().fit(split["train"])
-    data_op, data = get_data_op_and_data("simple")
-    split = data_op.skb.train_test_split(data)
-    learner = data_op.skb.make_learner().fit(split["train"])
+    learner = skrub.var('a').skb.make_learner()
-    data_op, data = get_data_op_and_data("simple")
-    split = data_op.skb.train_test_split(data)
-    learner = data_op.skb.make_learner().fit(split["train"])
+    learner = skrub.var('a').skb.make_learner()
+    link = learner._get_doc_link()
+    assert link == (
+        "https://skrub-data.org/stable/reference/generated/skrub.SkrubLearner.html"
+    )
diff --git a/skrub/_drop_similar.py b/skrub/_drop_similar.py
@@ -9,11 +9,12 @@
     pass
 import numbers
 
-from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.base import TransformerMixin
 from sklearn.utils.validation import check_is_fitted
 
 from . import _dataframe as sbd
 from . import selectors as s
+from ._base import SkrubBaseTransformer
 from ._column_associations import column_associations
 from ._dataframe._common import raise_dispatch_unregistered_type
 from ._dispatch import dispatch
@@ -35,7 +36,7 @@ def _filter_associations_polars(obj, threshold):
     return obj.filter(pl.col("cramer_v") >= threshold)
 
 
-class DropSimilar(TransformerMixin, BaseEstimator):
+class DropSimilar(TransformerMixin, SkrubBaseTransformer):
     """Drop columns found too redundant to the rest of the dataframe,
     according to association defined by Cramér's V.
 

diff --git a/skrub/_interpolation_joiner.py b/skrub/_interpolation_joiner.py
@@ -2,7 +2,7 @@
 
 import joblib
 import numpy as np
-from sklearn.base import BaseEstimator, TransformerMixin, clone
+from sklearn.base import TransformerMixin, clone
 from sklearn.ensemble import (
     HistGradientBoostingClassifier,
     HistGradientBoostingRegressor,
@@ -11,6 +11,7 @@
 from . import _dataframe as sbd
 from . import _join_utils, _utils
 from . import selectors as s
+from ._base import SkrubBaseTransformer
 from ._minhash_encoder import MinHashEncoder
 from ._sklearn_compat import get_tags
 from ._table_vectorizer import TableVectorizer
@@ -20,7 +21,7 @@
 DEFAULT_VECTORIZER = TableVectorizer(high_cardinality=MinHashEncoder())
 
 
-class InterpolationJoiner(TransformerMixin, BaseEstimator):
+class InterpolationJoiner(TransformerMixin, SkrubBaseTransformer):
     """Join with a table augmented by machine-learning predictions.
 
     This is similar to a usual equi-join, but instead of looking for actual

diff --git a/skrub/_joiner.py b/skrub/_joiner.py
@@ -5,7 +5,7 @@
 from functools import partial
 
 import numpy as np
-from sklearn.base import BaseEstimator, TransformerMixin, clone
+from sklearn.base import TransformerMixin, clone
 from sklearn.compose import make_column_transformer
 from sklearn.feature_extraction.text import HashingVectorizer, TfidfTransformer
 from sklearn.pipeline import make_pipeline
@@ -15,6 +15,7 @@
 from . import _dataframe as sbd
 from . import _join_utils, _matching, _utils
 from . import selectors as s
+from ._base import SkrubBaseTransformer
 from ._check_input import CheckInputDataFrame
 from ._datetime_encoder import DatetimeEncoder
 from ._table_vectorizer import TableVectorizer
@@ -76,7 +77,7 @@ def _make_vectorizer(table, string_encoder, rescale):
     return make_pipeline(skrubber, make_column_transformer(*transformers))
 
 
-class Joiner(TransformerMixin, BaseEstimator):
+class Joiner(TransformerMixin, SkrubBaseTransformer):
     """Augment features in a main table by fuzzy-joining an auxiliary table to it.
 
     This transformer is initialized with an auxiliary table `aux_table`. It

diff --git a/skrub/_matching.py b/skrub/_matching.py
@@ -1,11 +1,12 @@
 import numpy as np
 from scipy import sparse
-from sklearn.base import BaseEstimator
 from sklearn.neighbors import NearestNeighbors
 from sklearn.utils import check_random_state
 
+from ._base import SkrubBaseTransformer
 
-class Matching(BaseEstimator):
+
+class Matching(SkrubBaseTransformer):
     """Base class for fuzzy-join matching & distance rescaling.
 
     This class is a helper for the ``Joiner`` and ``fuzzy_join``. It is

diff --git a/skrub/_multi_agg_joiner.py b/skrub/_multi_agg_joiner.py
@@ -2,13 +2,15 @@
 The MultiAggJoiner extends AggJoiner to multiple auxiliary tables.
 """
 
-from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.base import TransformerMixin
 from sklearn.utils.validation import check_is_fitted
 
 from skrub._agg_joiner import AggJoiner
 from skrub._dataframe import _common as sbd
 from skrub._utils import _is_array_like
 
+from ._base import SkrubBaseTransformer
+
 
 def _is_iterable_of_iterable_of_str(x):
     "Return True if x is an iterable of iterable of str and False otherwise."
@@ -17,7 +19,7 @@ def _is_iterable_of_iterable_of_str(x):
     )
 
 
-class MultiAggJoiner(TransformerMixin, BaseEstimator):
+class MultiAggJoiner(TransformerMixin, SkrubBaseTransformer):
     """Extension of the :class:`AggJoiner` to multiple auxiliary tables.
 
     Apply numerical and categorical aggregation operations on the `cols`

diff --git a/skrub/_select_cols.py b/skrub/_select_cols.py
@@ -1,10 +1,11 @@
-from sklearn.base import BaseEstimator, TransformerMixin, check_is_fitted
+from sklearn.base import TransformerMixin, check_is_fitted
 
 from . import selectors as s
+from ._base import SkrubBaseTransformer
 from ._single_column_transformer import SingleColumnTransformer
 
 
-class SelectCols(TransformerMixin, BaseEstimator):
+class SelectCols(TransformerMixin, SkrubBaseTransformer):
     """Select a subset of a DataFrame's columns.
 
     A ``ValueError`` is raised if any of the provided column names are not in the
@@ -99,7 +100,7 @@ def get_feature_names_out(self, input_features=None):
         return self.columns_
 
 
-class DropCols(TransformerMixin, BaseEstimator):
+class DropCols(TransformerMixin, SkrubBaseTransformer):
     """Drop a subset of a DataFrame's columns.
 
     The other columns are kept in their original order. A ``ValueError`` is raised if

diff --git a/skrub/_single_column_transformer.py b/skrub/_single_column_transformer.py
@@ -4,12 +4,12 @@
 import re
 import textwrap
 
-from sklearn.base import BaseEstimator
 from sklearn.pipeline import Pipeline
 from sklearn.utils.validation import check_is_fitted
 
 from . import _dataframe as sbd
 from . import _utils
+from ._base import SkrubBaseTransformer
 
 __all__ = ["SingleColumnTransformer", "RejectColumn"]
 
@@ -120,7 +120,7 @@ class RejectColumn(ValueError):
     pass
 
 
-class SingleColumnTransformer(BaseEstimator):
+class SingleColumnTransformer(SkrubBaseTransformer):
     """Base class for single-column transformers.
 
     Such transformers are applied independently to each column by