From 5fcf6297fbda310440b807be41d91553a04460b4 Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Tue, 21 Apr 2026 13:04:18 +0200
Subject: [PATCH 01/20] ENH - adding doc link to html repr of estimators

---
 skrub/_single_column_transformer.py | 17 +++++++++++++
 skrub/_table_vectorizer.py          | 38 +++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)

diff --git a/skrub/_single_column_transformer.py b/skrub/_single_column_transformer.py
index 9727f7d14..93890909b 100644
--- a/skrub/_single_column_transformer.py
+++ b/skrub/_single_column_transformer.py
@@ -143,6 +143,23 @@ class SingleColumnTransformer(BaseEstimator):
     """
 
     __single_column_transformer__ = True
+    _doc_link_module = "skrub"
+
+    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
+    # which also defines _doc_link_template as a property, and we want to be able
+    # to override it.
+    @property
+    def _doc_link_template(self):
+        return getattr(
+            self,
+            "__doc_link_template",
+            "https://skrub-data.org/stable/reference/generated/"
+            "{estimator_module}.{estimator_name}.html",
+        )
+
+    @_doc_link_template.setter
+    def _doc_link_template(self, value):
+        setattr(self, "__doc_link_template", value)
 
     def set_output(self, *, transform=None):
         """
diff --git a/skrub/_table_vectorizer.py b/skrub/_table_vectorizer.py
index 7b2f12993..cc4e9fe10 100644
--- a/skrub/_table_vectorizer.py
+++ b/skrub/_table_vectorizer.py
@@ -29,6 +29,8 @@
 
 
 class PassThrough(SingleColumnTransformer):
+    _doc_link_module = ""
+
     def fit_transform(self, column, y=None):
         return column
 
@@ -338,6 +340,24 @@ class Cleaner(TransformerMixin, BaseEstimator):
     [DropUninformative()]
     """
 
+    _doc_link_module = "skrub"
+
+    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
+    # which also defines _doc_link_template as a property, and we want to be able
+    # to override it.
+    @property
+    def _doc_link_template(self):
+        return getattr(
+            self,
+            "__doc_link_template",
+            "https://skrub-data.org/stable/reference/generated/"
+            "{estimator_module}.{estimator_name}.html",
+        )
+
+    @_doc_link_template.setter
+    def _doc_link_template(self, value):
+        setattr(self, "__doc_link_template", value)
+
     def __init__(
         self,
         drop_null_fraction=1.0,
@@ -796,6 +816,24 @@ class TableVectorizer(TransformerMixin, BaseEstimator):
     ValueError: Column 'A' used twice in 'specific_transformers', at indices 0 and 1.
     """  # noqa: E501
 
+    _doc_link_module = "skrub"
+
+    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
+    # which also defines _doc_link_template as a property, and we want to be able
+    # to override it.
+    @property
+    def _doc_link_template(self):
+        return getattr(
+            self,
+            "__doc_link_template",
+            "https://skrub-data.org/stable/reference/generated/"
+            "{estimator_module}.{estimator_name}.html",
+        )
+
+    @_doc_link_template.setter
+    def _doc_link_template(self, value):
+        setattr(self, "__doc_link_template", value)
+
     def __init__(
         self,
         *,

From baf63fbd76588811fe996c15e311ac3cfc0d01cd Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Tue, 21 Apr 2026 13:14:30 +0200
Subject: [PATCH 02/20] moving the new methods

---
 skrub/_apply_to_cols.py    | 18 ++++++++++++++
 skrub/_select_cols.py      | 36 ++++++++++++++++++++++++++++
 skrub/_table_vectorizer.py | 48 +++++++++++++++++++++++++-------------
 3 files changed, 86 insertions(+), 16 deletions(-)

diff --git a/skrub/_apply_to_cols.py b/skrub/_apply_to_cols.py
index 523e6a341..195c0564c 100644
--- a/skrub/_apply_to_cols.py
+++ b/skrub/_apply_to_cols.py
@@ -276,6 +276,24 @@ class ApplyToCols(TransformerMixin, BaseEstimator):
     1  10.0  100.0       1.0       1.0
     """
 
+    _doc_link_module = "skrub"
+
+    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
+    # which also defines _doc_link_template as a property, and we want to be able
+    # to override it.
+    @property
+    def _doc_link_template(self):
+        return getattr(
+            self,
+            "__doc_link_template",
+            "https://skrub-data.org/stable/reference/generated/"
+            "{estimator_module}.{estimator_name}.html",
+        )
+
+    @_doc_link_template.setter
+    def _doc_link_template(self, value):
+        setattr(self, "__doc_link_template", value)
+
     def __init__(
         self,
         transformer,
diff --git a/skrub/_select_cols.py b/skrub/_select_cols.py
index 34cc97387..da001f455 100644
--- a/skrub/_select_cols.py
+++ b/skrub/_select_cols.py
@@ -43,6 +43,8 @@ class SelectCols(TransformerMixin, BaseEstimator):
     ValueError: The following columns are requested for selection but missing from dataframe: ['X']
     """  # noqa: E501
 
+    _doc_link_module = "skrub"
+
     def __init__(self, cols):
         self.cols = cols
 
@@ -98,6 +100,22 @@ def get_feature_names_out(self, input_features=None):
         check_is_fitted(self, "columns_")
         return self.columns_
 
+    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
+    # which also defines _doc_link_template as a property, and we want to be able
+    # to override it.
+    @property
+    def _doc_link_template(self):
+        return getattr(
+            self,
+            "__doc_link_template",
+            "https://skrub-data.org/stable/reference/generated/"
+            "{estimator_module}.{estimator_name}.html",
+        )
+
+    @_doc_link_template.setter
+    def _doc_link_template(self, value):
+        setattr(self, "__doc_link_template", value)
+
 
 class DropCols(TransformerMixin, BaseEstimator):
     """Drop a subset of a DataFrame's columns.
@@ -138,6 +156,8 @@ class DropCols(TransformerMixin, BaseEstimator):
     ValueError: The following columns are requested for selection but missing from dataframe: ['X']
     """  # noqa: E501
 
+    _doc_link_module = "skrub"
+
     def __init__(self, cols):
         self.cols = cols
 
@@ -195,6 +215,22 @@ def get_feature_names_out(self, input_features=None):
         check_is_fitted(self, "kept_cols_")
         return self.kept_cols_
 
+    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
+    # which also defines _doc_link_template as a property, and we want to be able
+    # to override it.
+    @property
+    def _doc_link_template(self):
+        return getattr(
+            self,
+            "__doc_link_template",
+            "https://skrub-data.org/stable/reference/generated/"
+            "{estimator_module}.{estimator_name}.html",
+        )
+
+    @_doc_link_template.setter
+    def _doc_link_template(self, value):
+        setattr(self, "__doc_link_template", value)
+
 
 class Drop(SingleColumnTransformer):
     def fit_transform(self, column, y=None):
diff --git a/skrub/_table_vectorizer.py b/skrub/_table_vectorizer.py
index cc4e9fe10..697bab3f7 100644
--- a/skrub/_table_vectorizer.py
+++ b/skrub/_table_vectorizer.py
@@ -342,22 +342,6 @@ class Cleaner(TransformerMixin, BaseEstimator):
 
     _doc_link_module = "skrub"
 
-    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
-    # which also defines _doc_link_template as a property, and we want to be able
-    # to override it.
-    @property
-    def _doc_link_template(self):
-        return getattr(
-            self,
-            "__doc_link_template",
-            "https://skrub-data.org/stable/reference/generated/"
-            "{estimator_module}.{estimator_name}.html",
-        )
-
-    @_doc_link_template.setter
-    def _doc_link_template(self, value):
-        setattr(self, "__doc_link_template", value)
-
     def __init__(
         self,
         drop_null_fraction=1.0,
@@ -477,6 +461,22 @@ def get_feature_names_out(self, input_features=None):
         check_is_fitted(self, "all_outputs_")
         return np.asarray(self.all_outputs_)
 
+    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
+    # which also defines _doc_link_template as a property, and we want to be able
+    # to override it.
+    @property
+    def _doc_link_template(self):
+        return getattr(
+            self,
+            "__doc_link_template",
+            "https://skrub-data.org/stable/reference/generated/"
+            "{estimator_module}.{estimator_name}.html",
+        )
+
+    @_doc_link_template.setter
+    def _doc_link_template(self, value):
+        setattr(self, "__doc_link_template", value)
+
 
 class TableVectorizer(TransformerMixin, BaseEstimator):
     """Transform a dataframe to a numeric (vectorized) representation.
@@ -1114,3 +1114,19 @@ def get_feature_names_out(self, input_features=None):
         """
         check_is_fitted(self, "all_outputs_")
         return np.asarray(self.all_outputs_)
+
+    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
+    # which also defines _doc_link_template as a property, and we want to be able
+    # to override it.
+    @property
+    def _doc_link_template(self):
+        return getattr(
+            self,
+            "__doc_link_template",
+            "https://skrub-data.org/stable/reference/generated/"
+            "{estimator_module}.{estimator_name}.html",
+        )
+
+    @_doc_link_template.setter
+    def _doc_link_template(self, value):
+        setattr(self, "__doc_link_template", value)

From 0fde8fd401cedb3c9ad5369e451e806f3d262ca3 Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Tue, 21 Apr 2026 13:23:58 +0200
Subject: [PATCH 03/20] adding more

---
 skrub/_data_ops/_estimator.py | 15 +++++++++++++++
 skrub/_squashing_scaler.py    | 18 ++++++++++++++++++
 skrub/_table_vectorizer.py    | 16 ----------------
 3 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/skrub/_data_ops/_estimator.py b/skrub/_data_ops/_estimator.py
index 43988b96b..b2e8d4339 100644
--- a/skrub/_data_ops/_estimator.py
+++ b/skrub/_data_ops/_estimator.py
@@ -560,6 +560,21 @@ def describe_params(self):
         """
         return describe_params(eval_choices(self.data_op), choice_graph(self.data_op))
 
+    _doc_link_module = "skrub"
+
+    @property
+    def _doc_link_template(self):
+        return getattr(
+            self,
+            "__doc_link_template",
+            "https://skrub-data.org/stable/reference/generated/"
+            "{estimator_module}.{estimator_name}.html",
+        )
+
+    @_doc_link_template.setter
+    def _doc_link_template(self, value):
+        setattr(self, "__doc_link_template", value)
+
 
 def _to_Xy_pipeline(learner, environment):
     return learner.__skrub_to_Xy_pipeline__(environment)
diff --git a/skrub/_squashing_scaler.py b/skrub/_squashing_scaler.py
index a2acf24b7..ffc2fa104 100644
--- a/skrub/_squashing_scaler.py
+++ b/skrub/_squashing_scaler.py
@@ -335,3 +335,21 @@ def transform(self, X):
             X_tr = _set_zeros(X_tr, self.zero_cols_)
 
         return _soft_clip(X_tr, self.max_absolute_value, mask_inf)
+
+    _doc_link_module = "skrub"
+
+    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
+    # which also defines _doc_link_template as a property, and we want to be able
+    # to override it.
+    @property
+    def _doc_link_template(self):
+        return getattr(
+            self,
+            "__doc_link_template",
+            "https://skrub-data.org/stable/reference/generated/"
+            "{estimator_module}.{estimator_name}.html",
+        )
+
+    @_doc_link_template.setter
+    def _doc_link_template(self, value):
+        setattr(self, "__doc_link_template", value)
diff --git a/skrub/_table_vectorizer.py b/skrub/_table_vectorizer.py
index 697bab3f7..20cc88249 100644
--- a/skrub/_table_vectorizer.py
+++ b/skrub/_table_vectorizer.py
@@ -818,22 +818,6 @@ class TableVectorizer(TransformerMixin, BaseEstimator):
 
     _doc_link_module = "skrub"
 
-    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
-    # which also defines _doc_link_template as a property, and we want to be able
-    # to override it.
-    @property
-    def _doc_link_template(self):
-        return getattr(
-            self,
-            "__doc_link_template",
-            "https://skrub-data.org/stable/reference/generated/"
-            "{estimator_module}.{estimator_name}.html",
-        )
-
-    @_doc_link_template.setter
-    def _doc_link_template(self, value):
-        setattr(self, "__doc_link_template", value)
-
     def __init__(
         self,
         *,

From e40ecfaf1c6b9467e4cdd23870abab3f352af171 Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Tue, 21 Apr 2026 14:31:18 +0200
Subject: [PATCH 04/20] fixing applytocols

---
 skrub/_apply_to_cols.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/skrub/_apply_to_cols.py b/skrub/_apply_to_cols.py
index 195c0564c..55047586c 100644
--- a/skrub/_apply_to_cols.py
+++ b/skrub/_apply_to_cols.py
@@ -8,6 +8,7 @@
 from . import selectors
 from ._apply_to_each_col import ApplyToEachCol
 from ._apply_to_sub_frame import ApplyToSubFrame
+from ._sklearn_compat import _VisualBlock
 from ._wrap_transformer import wrap_transformer
 
 _SELECT_ALL_COLUMNS = selectors.all()
@@ -431,6 +432,18 @@ def get_feature_names_out(self, input_features=None):
 
         return self._wrapped_transformer.get_feature_names_out(input_features)
 
+    def _sk_visual_block_(self):
+        # This is needed because when ApplyToCols is used with a transformer like
+        # TableVectorizser then the estimator is shown as a parallel block, which
+        # would not add the documentation link.
+        # With this override the problem is fixed.
+        return _VisualBlock(
+            "serial",
+            [self.transformer],
+            names=[self.transformer.__class__.__name__],
+            name_details=[str(self.transformer)],
+        )
+
     def __getattr__(self, name):
         if name == "transformers_" and isinstance(
             getattr(self, "_wrapped_transformer", None), ApplyToSubFrame

From 3156828b9568d149ae8640853a6201266b005d7e Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Tue, 21 Apr 2026 16:10:18 +0200
Subject: [PATCH 05/20] fixing typo

---
 skrub/_apply_to_cols.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skrub/_apply_to_cols.py b/skrub/_apply_to_cols.py
index 55047586c..d87f3f0ad 100644
--- a/skrub/_apply_to_cols.py
+++ b/skrub/_apply_to_cols.py
@@ -434,7 +434,7 @@ def get_feature_names_out(self, input_features=None):
 
     def _sk_visual_block_(self):
         # This is needed because when ApplyToCols is used with a transformer like
-        # TableVectorizser then the estimator is shown as a parallel block, which
+        # TableVectorizer then the estimator is shown as a parallel block, which
         # would not add the documentation link.
         # With this override the problem is fixed.
         return _VisualBlock(

From 8213776322fca2976426ec67e4ff031156687583 Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Tue, 21 Apr 2026 17:02:34 +0200
Subject: [PATCH 06/20] adding tests

---
 skrub/tests/test_apply_to_cols.py             | 21 ++++++++++++++++-
 skrub/tests/test_single_column_transformer.py | 23 +++++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/skrub/tests/test_apply_to_cols.py b/skrub/tests/test_apply_to_cols.py
index ab0fd3b62..31336d136 100644
--- a/skrub/tests/test_apply_to_cols.py
+++ b/skrub/tests/test_apply_to_cols.py
@@ -1,11 +1,13 @@
 import datetime
+import re
 
 import numpy as np
 import pytest
 from sklearn.exceptions import NotFittedError
 from sklearn.preprocessing import OrdinalEncoder
+from sklearn.utils import estimator_html_repr
 
-from skrub import ApplyToCols
+from skrub import ApplyToCols, StringEncoder, TableVectorizer
 from skrub import _dataframe as sbd
 from skrub import selectors as s
 from skrub._to_datetime import ToDatetime
@@ -162,6 +164,23 @@ def test_get_feature_names_out_after_fit(df_module):
     assert feature_names == ["date_col"]
 
 
+def test_doc_link_wrapped_transformer_in_html_repr():
+    """The wrapped transformer's doc link appears in the HTML repr of ApplyToCols."""
+    html = estimator_html_repr(ApplyToCols(StringEncoder()))
+    links = set(re.findall(r'href="(https?://[^#"]+)"', html))
+    assert (
+        "https://skrub-data.org/stable/reference/generated/skrub.StringEncoder.html"
+        in links
+    )
+
+    html = estimator_html_repr(ApplyToCols(TableVectorizer()))
+    links = set(re.findall(r'href="(https?://[^#"]+)"', html))
+    assert (
+        "https://skrub-data.org/stable/reference/generated/skrub.TableVectorizer.html"
+        in links
+    )
+
+
 def test_getattr_raises_for_wrong_attribute(df_module):
     """Test __getattr__ raises proper AttributeError for wrong attributes."""
     # Test that accessing transformers_ on non-single-column transformer raises error
diff --git a/skrub/tests/test_single_column_transformer.py b/skrub/tests/test_single_column_transformer.py
index 12914df3e..6e49bd6fe 100644
--- a/skrub/tests/test_single_column_transformer.py
+++ b/skrub/tests/test_single_column_transformer.py
@@ -3,6 +3,7 @@
 from sklearn.pipeline import Pipeline, make_pipeline
 from sklearn.preprocessing import StandardScaler
 
+from skrub import GapEncoder
 from skrub import _dataframe as sbd
 from skrub._single_column_transformer import (
     SingleColumnTransformer,
@@ -91,6 +92,28 @@ def fit(self, column, y=None):
     assert transformer.get_feature_names_out() == [sbd.name(column)]
 
 
+def test_doc_link_skrub_class():
+    """Public skrub classes get a link to skrub documentation."""
+    link = GapEncoder()._get_doc_link()
+    assert link == (
+        "https://skrub-data.org/stable/reference/generated/skrub.GapEncoder.html"
+    )
+
+
+def test_doc_link_user_defined_subclass():
+    """User-defined subclasses outside skrub.* produce no link."""
+
+    class MyTransformer(SingleColumnTransformer):
+        def fit_transform(self, column, y=None):
+            return column
+
+        def transform(self, column):
+            return column
+
+    MyTransformer.__module__ = "user_package"
+    assert MyTransformer()._get_doc_link() == ""
+
+
 def test_is_single_column_transformer():
     class S:
         __single_column_transformer__ = True

From 4a41b3ff733a43857f37c48024cd9580e131e2d6 Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Tue, 21 Apr 2026 17:07:30 +0200
Subject: [PATCH 07/20] adding a comment

---
 skrub/tests/test_single_column_transformer.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/skrub/tests/test_single_column_transformer.py b/skrub/tests/test_single_column_transformer.py
index 6e49bd6fe..003b2b3f0 100644
--- a/skrub/tests/test_single_column_transformer.py
+++ b/skrub/tests/test_single_column_transformer.py
@@ -110,6 +110,10 @@ def fit_transform(self, column, y=None):
         def transform(self, column):
             return column
 
+    # Needed to simulate a user-defined class outside of skrub.*.
+    # Since this test is running in a module named
+    # "skrub.tests.test_single_column_transformer", that is the default modulee
+    # for MyTransformer, which would cause a doc link to be generated.
     MyTransformer.__module__ = "user_package"
     assert MyTransformer()._get_doc_link() == ""
 

From b3403c060e3f63f2a7ac091f123a33870f043921 Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Tue, 21 Apr 2026 17:12:05 +0200
Subject: [PATCH 08/20] changelog

---
 CHANGES.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGES.rst b/CHANGES.rst
index b5009cd15..339cbb569 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -26,6 +26,9 @@ Changes
 - The row indices of training and testing samples are now also included in the
   dictionaries produced by :meth:`DataOp.skb.iter_cv_splits`. :pr:`2012` by
   :user:`Jérôme Dockès <jeromedockes>`.
+- Skrub estimators now correctly show links to the documentation in the HTML
+  representation that is generated for notebooks. :pr:`2036` by :user:`Riccardo
+  Cappuzzo <rcap107>`.
 
 Bugfixes
 --------

From ade23b8a12ebcfc7d15db669ef0d905814a6c0d5 Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Tue, 21 Apr 2026 18:00:49 +0200
Subject: [PATCH 09/20] removing unneeded setter

---
 skrub/_apply_to_cols.py             | 4 ----
 skrub/_data_ops/_estimator.py       | 4 ----
 skrub/_select_cols.py               | 8 --------
 skrub/_single_column_transformer.py | 4 ----
 skrub/_squashing_scaler.py          | 4 ----
 skrub/_table_vectorizer.py          | 8 --------
 6 files changed, 32 deletions(-)

diff --git a/skrub/_apply_to_cols.py b/skrub/_apply_to_cols.py
index d87f3f0ad..f4dd6d7bf 100644
--- a/skrub/_apply_to_cols.py
+++ b/skrub/_apply_to_cols.py
@@ -291,10 +291,6 @@ def _doc_link_template(self):
             "{estimator_module}.{estimator_name}.html",
         )
 
-    @_doc_link_template.setter
-    def _doc_link_template(self, value):
-        setattr(self, "__doc_link_template", value)
-
     def __init__(
         self,
         transformer,
diff --git a/skrub/_data_ops/_estimator.py b/skrub/_data_ops/_estimator.py
index b2e8d4339..9a81c7a19 100644
--- a/skrub/_data_ops/_estimator.py
+++ b/skrub/_data_ops/_estimator.py
@@ -571,10 +571,6 @@ def _doc_link_template(self):
             "{estimator_module}.{estimator_name}.html",
         )
 
-    @_doc_link_template.setter
-    def _doc_link_template(self, value):
-        setattr(self, "__doc_link_template", value)
-
 
 def _to_Xy_pipeline(learner, environment):
     return learner.__skrub_to_Xy_pipeline__(environment)
diff --git a/skrub/_select_cols.py b/skrub/_select_cols.py
index da001f455..d92252c56 100644
--- a/skrub/_select_cols.py
+++ b/skrub/_select_cols.py
@@ -112,10 +112,6 @@ def _doc_link_template(self):
             "{estimator_module}.{estimator_name}.html",
         )
 
-    @_doc_link_template.setter
-    def _doc_link_template(self, value):
-        setattr(self, "__doc_link_template", value)
-
 
 class DropCols(TransformerMixin, BaseEstimator):
     """Drop a subset of a DataFrame's columns.
@@ -227,10 +223,6 @@ def _doc_link_template(self):
             "{estimator_module}.{estimator_name}.html",
         )
 
-    @_doc_link_template.setter
-    def _doc_link_template(self, value):
-        setattr(self, "__doc_link_template", value)
-
 
 class Drop(SingleColumnTransformer):
     def fit_transform(self, column, y=None):
diff --git a/skrub/_single_column_transformer.py b/skrub/_single_column_transformer.py
index 93890909b..d9883ec7d 100644
--- a/skrub/_single_column_transformer.py
+++ b/skrub/_single_column_transformer.py
@@ -157,10 +157,6 @@ def _doc_link_template(self):
             "{estimator_module}.{estimator_name}.html",
         )
 
-    @_doc_link_template.setter
-    def _doc_link_template(self, value):
-        setattr(self, "__doc_link_template", value)
-
     def set_output(self, *, transform=None):
         """
         Default no-op implementation for set_output.
diff --git a/skrub/_squashing_scaler.py b/skrub/_squashing_scaler.py
index ffc2fa104..fc44aa004 100644
--- a/skrub/_squashing_scaler.py
+++ b/skrub/_squashing_scaler.py
@@ -349,7 +349,3 @@ def _doc_link_template(self):
             "https://skrub-data.org/stable/reference/generated/"
             "{estimator_module}.{estimator_name}.html",
         )
-
-    @_doc_link_template.setter
-    def _doc_link_template(self, value):
-        setattr(self, "__doc_link_template", value)
diff --git a/skrub/_table_vectorizer.py b/skrub/_table_vectorizer.py
index 20cc88249..243b0c661 100644
--- a/skrub/_table_vectorizer.py
+++ b/skrub/_table_vectorizer.py
@@ -473,10 +473,6 @@ def _doc_link_template(self):
             "{estimator_module}.{estimator_name}.html",
         )
 
-    @_doc_link_template.setter
-    def _doc_link_template(self, value):
-        setattr(self, "__doc_link_template", value)
-
 
 class TableVectorizer(TransformerMixin, BaseEstimator):
     """Transform a dataframe to a numeric (vectorized) representation.
@@ -1110,7 +1106,3 @@ def _doc_link_template(self):
             "https://skrub-data.org/stable/reference/generated/"
             "{estimator_module}.{estimator_name}.html",
         )
-
-    @_doc_link_template.setter
-    def _doc_link_template(self, value):
-        setattr(self, "__doc_link_template", value)

From fe0fc1355a1da905c0e3631d62cf8c4386d94152 Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Wed, 22 Apr 2026 10:00:05 +0200
Subject: [PATCH 10/20] adding more tests for coverage

---
 skrub/tests/test_select_cols.py      | 12 ++++++++++++
 skrub/tests/test_squashing_scaler.py |  8 ++++++++
 skrub/tests/test_table_vectorizer.py | 12 ++++++++++++
 3 files changed, 32 insertions(+)

diff --git a/skrub/tests/test_select_cols.py b/skrub/tests/test_select_cols.py
index 739c86eb7..26cba9d3a 100644
--- a/skrub/tests/test_select_cols.py
+++ b/skrub/tests/test_select_cols.py
@@ -88,3 +88,15 @@ def test_get_feature_names_out(df):
     pipeline = make_pipeline(DropCols(cols=["A", "B"]), DummyClassifier())
     pipeline.fit(df, df["C"])
     assert pipeline[:-1].get_feature_names_out() == ["C"]
+
+
+def test_doc_link_skrub_class():
+    """Public skrub classes get a link to skrub documentation."""
+    link = SelectCols(cols=[])._get_doc_link()
+    assert link == (
+        "https://skrub-data.org/stable/reference/generated/skrub.SelectCols.html"
+    )
+    link = DropCols(cols=[])._get_doc_link()
+    assert link == (
+        "https://skrub-data.org/stable/reference/generated/skrub.DropCols.html"
+    )
diff --git a/skrub/tests/test_squashing_scaler.py b/skrub/tests/test_squashing_scaler.py
index f40ba1731..e74e909e3 100644
--- a/skrub/tests/test_squashing_scaler.py
+++ b/skrub/tests/test_squashing_scaler.py
@@ -144,3 +144,11 @@ def test_squashing_scaler_known_values(df_module):
         -1, 1
     )
     assert_almost_equal(X_target, X_out)
+
+
+def test_doc_link_skrub_class():
+    """Public skrub classes get a link to skrub documentation."""
+    link = SquashingScaler()._get_doc_link()
+    assert link == (
+        "https://skrub-data.org/stable/reference/generated/skrub.SquashingScaler.html"
+    )
diff --git a/skrub/tests/test_table_vectorizer.py b/skrub/tests/test_table_vectorizer.py
index fa5d4f13a..eaf1a109b 100644
--- a/skrub/tests/test_table_vectorizer.py
+++ b/skrub/tests/test_table_vectorizer.py
@@ -1109,3 +1109,15 @@ def test_pipeline_in_table_vectorizer(df_module):
     fit_transform_result = tv.fit_transform(df)
     transform_result = tv.transform(df)
     assert fit_transform_result.shape == transform_result.shape == (2, 4)
+
+
+def test_doc_link_skrub_class():
+    """Public skrub classes get a link to skrub documentation."""
+    link = TableVectorizer()._get_doc_link()
+    assert link == (
+        "https://skrub-data.org/stable/reference/generated/skrub.TableVectorizer.html"
+    )
+    link = Cleaner()._get_doc_link()
+    assert link == (
+        "https://skrub-data.org/stable/reference/generated/skrub.Cleaner.html"
+    )

From 1f1a82854b3a03dc338125dfcbd5d46962a23a38 Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Mon, 8 Jun 2026 16:48:16 +0200
Subject: [PATCH 11/20] moving changes to a single file

---
 skrub/_apply_to_cols.py              | 19 ++--------
 skrub/_base.py                       | 33 +++++++++++++++++
 skrub/_select_cols.py                |  7 ++--
 skrub/_squashing_scaler.py           | 17 ++-------
 skrub/_table_vectorizer.py           |  7 ++--
 skrub/tests/test_apply_to_cols.py    | 21 +----------
 skrub/tests/test_base.py             | 53 ++++++++++++++++++++++++++++
 skrub/tests/test_select_cols.py      | 12 -------
 skrub/tests/test_squashing_scaler.py |  8 -----
 skrub/tests/test_table_vectorizer.py | 12 -------
 10 files changed, 100 insertions(+), 89 deletions(-)
 create mode 100644 skrub/_base.py
 create mode 100644 skrub/tests/test_base.py

diff --git a/skrub/_apply_to_cols.py b/skrub/_apply_to_cols.py
index 029982850..77fb167cc 100644
--- a/skrub/_apply_to_cols.py
+++ b/skrub/_apply_to_cols.py
@@ -3,18 +3,19 @@
 based on the type of the transformer passed to it.
 """
 
-from sklearn.base import BaseEstimator, TransformerMixin, check_is_fitted
+from sklearn.base import TransformerMixin, check_is_fitted
 
 from . import selectors
 from ._apply_to_each_col import ApplyToEachCol
 from ._apply_to_sub_frame import ApplyToSubFrame
+from ._base import BaseTransformer
 from ._sklearn_compat import _VisualBlock
 from ._wrap_transformer import wrap_transformer
 
 _SELECT_ALL_COLUMNS = selectors.all()
 
 
-class ApplyToCols(TransformerMixin, BaseEstimator):
+class ApplyToCols(TransformerMixin, BaseTransformer):
     """
     Apply a transformer to selected columns in a dataframe.
 
@@ -292,20 +293,6 @@ class ApplyToCols(TransformerMixin, BaseEstimator):
     1  10.0  100.0       1.0       1.0
     """  # noqa: E501
 
-    _doc_link_module = "skrub"
-
-    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
-    # which also defines _doc_link_template as a property, and we want to be able
-    # to override it.
-    @property
-    def _doc_link_template(self):
-        return getattr(
-            self,
-            "__doc_link_template",
-            "https://skrub-data.org/stable/reference/generated/"
-            "{estimator_module}.{estimator_name}.html",
-        )
-
     def __init__(
         self,
         transformer,
diff --git a/skrub/_base.py b/skrub/_base.py
new file mode 100644
index 000000000..e8a48fc50
--- /dev/null
+++ b/skrub/_base.py
@@ -0,0 +1,33 @@
+from sklearn.base import BaseEstimator
+
+
+class BaseTransformer(BaseEstimator):
+    _doc_link_module = "skrub"
+
+    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
+    # which also defines _doc_link_template as a property, and we want to be able
+    # to override it.
+    @property
+    def _doc_link_template(self):
+        return getattr(
+            self,
+            "__doc_link_template",
+            "https://skrub-data.org/stable/reference/generated/"
+            "{estimator_module}.{estimator_name}.html",
+        )
+
+    def fit(self, X, y=None):
+        return self
+
+    def fit_transform(self, X, y=None):
+        return self.transform(X)
+
+    def transform(self, X):
+        # This method should be overridden by subclasses. We raise an error here to
+        # make it clear to users that they need to implement this method if they are
+        # creating a custom transformer class. We also catch the error in check_output
+        # to provide a more informative error message if the output of transform has the
+        # wrong type.
+        raise NotImplementedError(
+            f"{self.__class__.__name__} does not implement the 'transform' method."
+        )
diff --git a/skrub/_select_cols.py b/skrub/_select_cols.py
index d92252c56..86a33861c 100644
--- a/skrub/_select_cols.py
+++ b/skrub/_select_cols.py
@@ -1,10 +1,11 @@
-from sklearn.base import BaseEstimator, TransformerMixin, check_is_fitted
+from sklearn.base import TransformerMixin, check_is_fitted
 
 from . import selectors as s
+from ._base import BaseTransformer
 from ._single_column_transformer import SingleColumnTransformer
 
 
-class SelectCols(TransformerMixin, BaseEstimator):
+class SelectCols(TransformerMixin, BaseTransformer):
     """Select a subset of a DataFrame's columns.
 
     A ``ValueError`` is raised if any of the provided column names are not in the
@@ -113,7 +114,7 @@ def _doc_link_template(self):
         )
 
 
-class DropCols(TransformerMixin, BaseEstimator):
+class DropCols(TransformerMixin, BaseTransformer):
     """Drop a subset of a DataFrame's columns.
 
     The other columns are kept in their original order. A ``ValueError`` is raised if
diff --git a/skrub/_squashing_scaler.py b/skrub/_squashing_scaler.py
index fc44aa004..fcc055e40 100644
--- a/skrub/_squashing_scaler.py
+++ b/skrub/_squashing_scaler.py
@@ -5,6 +5,7 @@
 from sklearn.preprocessing import RobustScaler
 from sklearn.utils.validation import FLOAT_DTYPES, check_is_fitted
 
+from skrub._base import BaseTransformer
 from skrub._sklearn_compat import validate_data
 
 
@@ -82,7 +83,7 @@ def transform(self, X):
         return self.scale_ * (X - self.median_)
 
 
-class SquashingScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
+class SquashingScaler(OneToOneFeatureMixin, TransformerMixin, BaseTransformer):
     r"""Perform robust centering and scaling followed by soft clipping.
 
     When features have large outliers, smooth clipping prevents the outliers from
@@ -335,17 +336,3 @@ def transform(self, X):
             X_tr = _set_zeros(X_tr, self.zero_cols_)
 
         return _soft_clip(X_tr, self.max_absolute_value, mask_inf)
-
-    _doc_link_module = "skrub"
-
-    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
-    # which also defines _doc_link_template as a property, and we want to be able
-    # to override it.
-    @property
-    def _doc_link_template(self):
-        return getattr(
-            self,
-            "__doc_link_template",
-            "https://skrub-data.org/stable/reference/generated/"
-            "{estimator_module}.{estimator_name}.html",
-        )
diff --git a/skrub/_table_vectorizer.py b/skrub/_table_vectorizer.py
index 70193a9ba..b3b2e3e27 100644
--- a/skrub/_table_vectorizer.py
+++ b/skrub/_table_vectorizer.py
@@ -4,7 +4,7 @@
 from collections.abc import Iterable
 
 import numpy as np
-from sklearn.base import BaseEstimator, TransformerMixin, clone
+from sklearn.base import TransformerMixin, clone
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import OneHotEncoder
 from sklearn.utils.validation import check_is_fitted
@@ -12,6 +12,7 @@
 from . import _dataframe as sbd
 from . import _utils
 from . import selectors as s
+from ._base import BaseTransformer
 from ._check_input import CheckInputDataFrame
 from ._clean_categories import CleanCategories
 from ._clean_null_strings import CleanNullStrings
@@ -183,7 +184,7 @@ def _get_preprocessors(
     return steps
 
 
-class Cleaner(TransformerMixin, BaseEstimator):
+class Cleaner(TransformerMixin, BaseTransformer):
     """Column-wise consistency checks and sanitization of dtypes, null values and dates.
 
     The ``Cleaner`` performs some consistency checks and basic preprocessing
@@ -555,7 +556,7 @@ def _doc_link_template(self):
         )
 
 
-class TableVectorizer(TransformerMixin, BaseEstimator):
+class TableVectorizer(TransformerMixin, BaseTransformer):
     """Transform a dataframe to a numeric (vectorized) representation.
 
     This transformer preprocesses the given dataframe by first cleaning the data
diff --git a/skrub/tests/test_apply_to_cols.py b/skrub/tests/test_apply_to_cols.py
index 34e3e6a27..ab30926bd 100644
--- a/skrub/tests/test_apply_to_cols.py
+++ b/skrub/tests/test_apply_to_cols.py
@@ -1,14 +1,12 @@
 import datetime
-import re
 import sys
 
 import numpy as np
 import pytest
 from sklearn.exceptions import NotFittedError
 from sklearn.preprocessing import OrdinalEncoder, StandardScaler
-from sklearn.utils import estimator_html_repr
 
-from skrub import ApplyToCols, StringEncoder, TableVectorizer
+from skrub import ApplyToCols
 from skrub import _dataframe as sbd
 from skrub import selectors as s
 from skrub._to_datetime import ToDatetime
@@ -213,23 +211,6 @@ def test_get_feature_names_out_after_fit(df_module):
     assert feature_names == ["date_col"]
 
 
-def test_doc_link_wrapped_transformer_in_html_repr():
-    """The wrapped transformer's doc link appears in the HTML repr of ApplyToCols."""
-    html = estimator_html_repr(ApplyToCols(StringEncoder()))
-    links = set(re.findall(r'href="(https?://[^#"]+)"', html))
-    assert (
-        "https://skrub-data.org/stable/reference/generated/skrub.StringEncoder.html"
-        in links
-    )
-
-    html = estimator_html_repr(ApplyToCols(TableVectorizer()))
-    links = set(re.findall(r'href="(https?://[^#"]+)"', html))
-    assert (
-        "https://skrub-data.org/stable/reference/generated/skrub.TableVectorizer.html"
-        in links
-    )
-
-
 def test_getattr_raises_for_wrong_attribute(df_module):
     """Test __getattr__ raises proper AttributeError for wrong attributes."""
     # Test that accessing transformers_ on non-single-column transformer raises error
diff --git a/skrub/tests/test_base.py b/skrub/tests/test_base.py
new file mode 100644
index 000000000..b26d34ed8
--- /dev/null
+++ b/skrub/tests/test_base.py
@@ -0,0 +1,53 @@
+import re
+
+from sklearn.utils import estimator_html_repr
+
+from skrub import (
+    ApplyToCols,
+    Cleaner,
+    DropCols,
+    SelectCols,
+    StringEncoder,
+    TableVectorizer,
+)
+
+
+def test_doc_link_apply_to_cols():
+    """The wrapped transformer's doc link appears in the HTML repr of ApplyToCols."""
+    html = estimator_html_repr(ApplyToCols(StringEncoder()))
+    links = set(re.findall(r'href="(https?://[^#"]+)"', html))
+    assert (
+        "https://skrub-data.org/stable/reference/generated/skrub.StringEncoder.html"
+        in links
+    )
+
+    html = estimator_html_repr(ApplyToCols(TableVectorizer()))
+    links = set(re.findall(r'href="(https?://[^#"]+)"', html))
+    assert (
+        "https://skrub-data.org/stable/reference/generated/skrub.TableVectorizer.html"
+        in links
+    )
+
+
+def test_doc_link_skrub_class_select_cols():
+    """Public skrub classes get a link to skrub documentation."""
+    link = SelectCols(cols=[])._get_doc_link()
+    assert link == (
+        "https://skrub-data.org/stable/reference/generated/skrub.SelectCols.html"
+    )
+    link = DropCols(cols=[])._get_doc_link()
+    assert link == (
+        "https://skrub-data.org/stable/reference/generated/skrub.DropCols.html"
+    )
+
+
+def test_doc_link_table_vectorizer():
+    """Public skrub classes get a link to skrub documentation."""
+    link = TableVectorizer()._get_doc_link()
+    assert link == (
+        "https://skrub-data.org/stable/reference/generated/skrub.TableVectorizer.html"
+    )
+    link = Cleaner()._get_doc_link()
+    assert link == (
+        "https://skrub-data.org/stable/reference/generated/skrub.Cleaner.html"
+    )
diff --git a/skrub/tests/test_select_cols.py b/skrub/tests/test_select_cols.py
index 26cba9d3a..739c86eb7 100644
--- a/skrub/tests/test_select_cols.py
+++ b/skrub/tests/test_select_cols.py
@@ -88,15 +88,3 @@ def test_get_feature_names_out(df):
     pipeline = make_pipeline(DropCols(cols=["A", "B"]), DummyClassifier())
     pipeline.fit(df, df["C"])
     assert pipeline[:-1].get_feature_names_out() == ["C"]
-
-
-def test_doc_link_skrub_class():
-    """Public skrub classes get a link to skrub documentation."""
-    link = SelectCols(cols=[])._get_doc_link()
-    assert link == (
-        "https://skrub-data.org/stable/reference/generated/skrub.SelectCols.html"
-    )
-    link = DropCols(cols=[])._get_doc_link()
-    assert link == (
-        "https://skrub-data.org/stable/reference/generated/skrub.DropCols.html"
-    )
diff --git a/skrub/tests/test_squashing_scaler.py b/skrub/tests/test_squashing_scaler.py
index e74e909e3..f40ba1731 100644
--- a/skrub/tests/test_squashing_scaler.py
+++ b/skrub/tests/test_squashing_scaler.py
@@ -144,11 +144,3 @@ def test_squashing_scaler_known_values(df_module):
         -1, 1
     )
     assert_almost_equal(X_target, X_out)
-
-
-def test_doc_link_skrub_class():
-    """Public skrub classes get a link to skrub documentation."""
-    link = SquashingScaler()._get_doc_link()
-    assert link == (
-        "https://skrub-data.org/stable/reference/generated/skrub.SquashingScaler.html"
-    )
diff --git a/skrub/tests/test_table_vectorizer.py b/skrub/tests/test_table_vectorizer.py
index cb94fe965..43d438071 100644
--- a/skrub/tests/test_table_vectorizer.py
+++ b/skrub/tests/test_table_vectorizer.py
@@ -1250,18 +1250,6 @@ def test_pipeline_in_table_vectorizer(df_module):
     assert fit_transform_result.shape == transform_result.shape == (2, 4)
 
 
-def test_doc_link_skrub_class():
-    """Public skrub classes get a link to skrub documentation."""
-    link = TableVectorizer()._get_doc_link()
-    assert link == (
-        "https://skrub-data.org/stable/reference/generated/skrub.TableVectorizer.html"
-    )
-    link = Cleaner()._get_doc_link()
-    assert link == (
-        "https://skrub-data.org/stable/reference/generated/skrub.Cleaner.html"
-    )
-
-
 def test_duration_to_float(df_module):
     df = df_module.make_dataframe(
         {

From 79cc5e984966b2761371738a36e376ae9b0f4940 Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Mon, 8 Jun 2026 16:50:14 +0200
Subject: [PATCH 12/20] _

---
 skrub/_base.py | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/skrub/_base.py b/skrub/_base.py
index e8a48fc50..411bb7f49 100644
--- a/skrub/_base.py
+++ b/skrub/_base.py
@@ -15,19 +15,3 @@ def _doc_link_template(self):
             "https://skrub-data.org/stable/reference/generated/"
             "{estimator_module}.{estimator_name}.html",
         )
-
-    def fit(self, X, y=None):
-        return self
-
-    def fit_transform(self, X, y=None):
-        return self.transform(X)
-
-    def transform(self, X):
-        # This method should be overridden by subclasses. We raise an error here to
-        # make it clear to users that they need to implement this method if they are
-        # creating a custom transformer class. We also catch the error in check_output
-        # to provide a more informative error message if the output of transform has the
-        # wrong type.
-        raise NotImplementedError(
-            f"{self.__class__.__name__} does not implement the 'transform' method."
-        )

From 48949d2e137fd7737cf0d67d15604d049294e12c Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Mon, 8 Jun 2026 16:59:53 +0200
Subject: [PATCH 13/20] tests

---
 skrub/_select_cols.py               | 28 ----------------------------
 skrub/_single_column_transformer.py | 17 ++---------------
 skrub/tests/test_base.py            |  2 +-
 3 files changed, 3 insertions(+), 44 deletions(-)

diff --git a/skrub/_select_cols.py b/skrub/_select_cols.py
index 86a33861c..91fe7c658 100644
--- a/skrub/_select_cols.py
+++ b/skrub/_select_cols.py
@@ -44,8 +44,6 @@ class SelectCols(TransformerMixin, BaseTransformer):
     ValueError: The following columns are requested for selection but missing from dataframe: ['X']
     """  # noqa: E501
 
-    _doc_link_module = "skrub"
-
     def __init__(self, cols):
         self.cols = cols
 
@@ -101,18 +99,6 @@ def get_feature_names_out(self, input_features=None):
         check_is_fitted(self, "columns_")
         return self.columns_
 
-    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
-    # which also defines _doc_link_template as a property, and we want to be able
-    # to override it.
-    @property
-    def _doc_link_template(self):
-        return getattr(
-            self,
-            "__doc_link_template",
-            "https://skrub-data.org/stable/reference/generated/"
-            "{estimator_module}.{estimator_name}.html",
-        )
-
 
 class DropCols(TransformerMixin, BaseTransformer):
     """Drop a subset of a DataFrame's columns.
@@ -153,8 +139,6 @@ class DropCols(TransformerMixin, BaseTransformer):
     ValueError: The following columns are requested for selection but missing from dataframe: ['X']
     """  # noqa: E501
 
-    _doc_link_module = "skrub"
-
     def __init__(self, cols):
         self.cols = cols
 
@@ -212,18 +196,6 @@ def get_feature_names_out(self, input_features=None):
         check_is_fitted(self, "kept_cols_")
         return self.kept_cols_
 
-    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
-    # which also defines _doc_link_template as a property, and we want to be able
-    # to override it.
-    @property
-    def _doc_link_template(self):
-        return getattr(
-            self,
-            "__doc_link_template",
-            "https://skrub-data.org/stable/reference/generated/"
-            "{estimator_module}.{estimator_name}.html",
-        )
-
 
 class Drop(SingleColumnTransformer):
     def fit_transform(self, column, y=None):
diff --git a/skrub/_single_column_transformer.py b/skrub/_single_column_transformer.py
index 6d651c2e3..936b6ae39 100644
--- a/skrub/_single_column_transformer.py
+++ b/skrub/_single_column_transformer.py
@@ -4,12 +4,12 @@
 import re
 import textwrap
 
-from sklearn.base import BaseEstimator
 from sklearn.pipeline import Pipeline
 from sklearn.utils.validation import check_is_fitted
 
 from . import _dataframe as sbd
 from . import _utils
+from ._base import BaseTransformer
 
 __all__ = ["SingleColumnTransformer", "RejectColumn"]
 
@@ -120,7 +120,7 @@ class RejectColumn(ValueError):
     pass
 
 
-class SingleColumnTransformer(BaseEstimator):
+class SingleColumnTransformer(BaseTransformer):
     """Base class for single-column transformers.
 
     Such transformers are applied independently to each column by
@@ -144,19 +144,6 @@ class SingleColumnTransformer(BaseEstimator):
     """
 
     __single_column_transformer__ = True
-    _doc_link_module = "skrub"
-
-    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
-    # which also defines _doc_link_template as a property, and we want to be able
-    # to override it.
-    @property
-    def _doc_link_template(self):
-        return getattr(
-            self,
-            "__doc_link_template",
-            "https://skrub-data.org/stable/reference/generated/"
-            "{estimator_module}.{estimator_name}.html",
-        )
 
     def set_output(self, *, transform=None):
         """
diff --git a/skrub/tests/test_base.py b/skrub/tests/test_base.py
index b26d34ed8..ff002ad9c 100644
--- a/skrub/tests/test_base.py
+++ b/skrub/tests/test_base.py
@@ -17,7 +17,7 @@ def test_doc_link_apply_to_cols():
     html = estimator_html_repr(ApplyToCols(StringEncoder()))
     links = set(re.findall(r'href="(https?://[^#"]+)"', html))
     assert (
-        "https://skrub-data.org/stable/reference/generated/skrub.StringEncoder.html"
+        "https://skrub-data.org/stable/reference/generated/skrub.ApplyToCols.html"
         in links
     )
 

From f1f07d9f24533f7a6273d5402cf9d24b9e7dd7f3 Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Mon, 8 Jun 2026 17:07:54 +0200
Subject: [PATCH 14/20] removing unnecessary code

---
 skrub/_table_vectorizer.py | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/skrub/_table_vectorizer.py b/skrub/_table_vectorizer.py
index b3b2e3e27..823e8b4b6 100644
--- a/skrub/_table_vectorizer.py
+++ b/skrub/_table_vectorizer.py
@@ -395,8 +395,6 @@ class Cleaner(TransformerMixin, BaseTransformer):
     [DropUninformative()]
     """
 
-    _doc_link_module = "skrub"
-
     def __init__(
         self,
         drop_null_fraction=1.0,
@@ -543,18 +541,6 @@ def get_feature_names_out(self, input_features=None):
         check_is_fitted(self, "all_outputs_")
         return np.asarray(self.all_outputs_)
 
-    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
-    # which also defines _doc_link_template as a property, and we want to be able
-    # to override it.
-    @property
-    def _doc_link_template(self):
-        return getattr(
-            self,
-            "__doc_link_template",
-            "https://skrub-data.org/stable/reference/generated/"
-            "{estimator_module}.{estimator_name}.html",
-        )
-
 
 class TableVectorizer(TransformerMixin, BaseTransformer):
     """Transform a dataframe to a numeric (vectorized) representation.
@@ -899,8 +885,6 @@ class TableVectorizer(TransformerMixin, BaseTransformer):
     ValueError: Column 'A' used twice in 'specific_transformers', at indices 0 and 1.
     """  # noqa: E501
 
-    _doc_link_module = "skrub"
-
     def __init__(
         self,
         *,
@@ -1183,15 +1167,3 @@ def get_feature_names_out(self, input_features=None):
         """
         check_is_fitted(self, "all_outputs_")
         return np.asarray(self.all_outputs_)
-
-    # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
-    # which also defines _doc_link_template as a property, and we want to be able
-    # to override it.
-    @property
-    def _doc_link_template(self):
-        return getattr(
-            self,
-            "__doc_link_template",
-            "https://skrub-data.org/stable/reference/generated/"
-            "{estimator_module}.{estimator_name}.html",
-        )

From a1f520fbf797af2b1dcc9d6500a0908f2d97f911 Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Tue, 16 Jun 2026 11:46:23 +0200
Subject: [PATCH 15/20] addressing comments from review

---
 skrub/_apply_to_cols.py                  | 17 ++---------------
 skrub/_base.py                           |  9 ++++++++-
 skrub/_data_ops/_estimator.py            | 14 ++------------
 skrub/_data_ops/tests/test_estimators.py | 10 ++++++++++
 skrub/_select_cols.py                    |  6 +++---
 skrub/_single_column_transformer.py      |  4 ++--
 skrub/_squashing_scaler.py               |  4 ++--
 skrub/_table_vectorizer.py               |  6 +++---
 8 files changed, 32 insertions(+), 38 deletions(-)

diff --git a/skrub/_apply_to_cols.py b/skrub/_apply_to_cols.py
index bda6f086c..7dcd641ba 100644
--- a/skrub/_apply_to_cols.py
+++ b/skrub/_apply_to_cols.py
@@ -8,14 +8,13 @@
 from . import selectors
 from ._apply_to_each_col import ApplyToEachCol
 from ._apply_to_sub_frame import ApplyToSubFrame
-from ._base import BaseTransformer
-from ._sklearn_compat import _VisualBlock
+from ._base import SkrubBaseTransformer
 from ._wrap_transformer import wrap_transformer
 
 _SELECT_ALL_COLUMNS = selectors.all()
 
 
-class ApplyToCols(TransformerMixin, BaseTransformer):
+class ApplyToCols(TransformerMixin, SkrubBaseTransformer):
     """
     Apply a transformer to selected columns in a dataframe.
 
@@ -433,18 +432,6 @@ def get_feature_names_out(self, input_features=None):
 
         return self._wrapped_transformer.get_feature_names_out(input_features)
 
-    def _sk_visual_block_(self):
-        # This is needed because when ApplyToCols is used with a transformer like
-        # TableVectorizer then the estimator is shown as a parallel block, which
-        # would not add the documentation link.
-        # With this override the problem is fixed.
-        return _VisualBlock(
-            "serial",
-            [self.transformer],
-            names=[self.transformer.__class__.__name__],
-            name_details=[str(self.transformer)],
-        )
-
     def __getattr__(self, name):
         if name == "transformers_" and isinstance(
             getattr(self, "_wrapped_transformer", None), ApplyToSubFrame
diff --git a/skrub/_base.py b/skrub/_base.py
index 411bb7f49..241f4470e 100644
--- a/skrub/_base.py
+++ b/skrub/_base.py
@@ -1,7 +1,14 @@
 from sklearn.base import BaseEstimator
 
 
-class BaseTransformer(BaseEstimator):
+class SkrubBaseTransformer(BaseEstimator):
+    """Base class for all skrub transformers.
+
+    This is a class that all skrub transformers inherit from.
+    For the moment, it's only used for the documentation url, but eventually
+    it will be used for other things as well.
+    """
+
     _doc_link_module = "skrub"
 
     # Defining this as a property because it inherits from _HTMLDocumentationLinkMixin,
diff --git a/skrub/_data_ops/_estimator.py b/skrub/_data_ops/_estimator.py
index 960ab587c..d797f2dc3 100644
--- a/skrub/_data_ops/_estimator.py
+++ b/skrub/_data_ops/_estimator.py
@@ -16,6 +16,7 @@
 
 from .. import _dataframe as sbd
 from .. import _join_utils
+from .._base import SkrubBaseTransformer
 from .._sklearn_compat import _safe_indexing, _VisualBlock
 from .._utils import set_module
 from . import _evaluation
@@ -179,7 +180,7 @@ def _get_params_html(self, deep=True, doc_link=""):
 
 
 @set_module("skrub")
-class SkrubLearner(_DataOpWrapperMixin, BaseEstimator):
+class SkrubLearner(_DataOpWrapperMixin, SkrubBaseTransformer):
     """Learner that evaluates a skrub DataOp.
 
     This class is not meant to be instantiated manually, ``SkrubLearner``
@@ -787,17 +788,6 @@ def describe_params(self):
         """
         return describe_params(eval_choices(self.data_op), choice_graph(self.data_op))
 
-    _doc_link_module = "skrub"
-
-    @property
-    def _doc_link_template(self):
-        return getattr(
-            self,
-            "__doc_link_template",
-            "https://skrub-data.org/stable/reference/generated/"
-            "{estimator_module}.{estimator_name}.html",
-        )
-
 
 def _to_Xy_pipeline(learner, environment):
     return learner.__skrub_to_Xy_pipeline__(environment)
diff --git a/skrub/_data_ops/tests/test_estimators.py b/skrub/_data_ops/tests/test_estimators.py
index 34d84f1b9..007a17a0a 100644
--- a/skrub/_data_ops/tests/test_estimators.py
+++ b/skrub/_data_ops/tests/test_estimators.py
@@ -1460,3 +1460,13 @@ def load_data():
     pred = X.skb.apply(DummyClassifier(), y=y)
     search = pred.skb.make_grid_search(scoring="roc_auc").fit({})
     assert search.results_.shape[0] == 1
+
+
+def test_learner_docstring():
+    data_op, data = get_data_op_and_data("simple")
+    split = data_op.skb.train_test_split(data)
+    learner = data_op.skb.make_learner().fit(split["train"])
+    link = learner._get_doc_link()
+    assert link == (
+        "https://skrub-data.org/stable/reference/generated/skrub.SkrubLearner.html"
+    )
diff --git a/skrub/_select_cols.py b/skrub/_select_cols.py
index e2aaf4509..4dac4d1dc 100644
--- a/skrub/_select_cols.py
+++ b/skrub/_select_cols.py
@@ -1,11 +1,11 @@
 from sklearn.base import TransformerMixin, check_is_fitted
 
 from . import selectors as s
-from ._base import BaseTransformer
+from ._base import SkrubBaseTransformer
 from ._single_column_transformer import SingleColumnTransformer
 
 
-class SelectCols(TransformerMixin, BaseTransformer):
+class SelectCols(TransformerMixin, SkrubBaseTransformer):
     """Select a subset of a DataFrame's columns.
 
     A ``ValueError`` is raised if any of the provided column names are not in the
@@ -100,7 +100,7 @@ def get_feature_names_out(self, input_features=None):
         return self.columns_
 
 
-class DropCols(TransformerMixin, BaseTransformer):
+class DropCols(TransformerMixin, SkrubBaseTransformer):
     """Drop a subset of a DataFrame's columns.
 
     The other columns are kept in their original order. A ``ValueError`` is raised if
diff --git a/skrub/_single_column_transformer.py b/skrub/_single_column_transformer.py
index 936b6ae39..f7f1e913d 100644
--- a/skrub/_single_column_transformer.py
+++ b/skrub/_single_column_transformer.py
@@ -9,7 +9,7 @@
 
 from . import _dataframe as sbd
 from . import _utils
-from ._base import BaseTransformer
+from ._base import SkrubBaseTransformer
 
 __all__ = ["SingleColumnTransformer", "RejectColumn"]
 
@@ -120,7 +120,7 @@ class RejectColumn(ValueError):
     pass
 
 
-class SingleColumnTransformer(BaseTransformer):
+class SingleColumnTransformer(SkrubBaseTransformer):
     """Base class for single-column transformers.
 
     Such transformers are applied independently to each column by
diff --git a/skrub/_squashing_scaler.py b/skrub/_squashing_scaler.py
index fcc055e40..4eae99355 100644
--- a/skrub/_squashing_scaler.py
+++ b/skrub/_squashing_scaler.py
@@ -5,7 +5,7 @@
 from sklearn.preprocessing import RobustScaler
 from sklearn.utils.validation import FLOAT_DTYPES, check_is_fitted
 
-from skrub._base import BaseTransformer
+from skrub._base import SkrubBaseTransformer
 from skrub._sklearn_compat import validate_data
 
 
@@ -83,7 +83,7 @@ def transform(self, X):
         return self.scale_ * (X - self.median_)
 
 
-class SquashingScaler(OneToOneFeatureMixin, TransformerMixin, BaseTransformer):
+class SquashingScaler(OneToOneFeatureMixin, TransformerMixin, SkrubBaseTransformer):
     r"""Perform robust centering and scaling followed by soft clipping.
 
     When features have large outliers, smooth clipping prevents the outliers from
diff --git a/skrub/_table_vectorizer.py b/skrub/_table_vectorizer.py
index 823e8b4b6..59910dafe 100644
--- a/skrub/_table_vectorizer.py
+++ b/skrub/_table_vectorizer.py
@@ -12,7 +12,7 @@
 from . import _dataframe as sbd
 from . import _utils
 from . import selectors as s
-from ._base import BaseTransformer
+from ._base import SkrubBaseTransformer
 from ._check_input import CheckInputDataFrame
 from ._clean_categories import CleanCategories
 from ._clean_null_strings import CleanNullStrings
@@ -184,7 +184,7 @@ def _get_preprocessors(
     return steps
 
 
-class Cleaner(TransformerMixin, BaseTransformer):
+class Cleaner(TransformerMixin, SkrubBaseTransformer):
     """Column-wise consistency checks and sanitization of dtypes, null values and dates.
 
     The ``Cleaner`` performs some consistency checks and basic preprocessing
@@ -542,7 +542,7 @@ def get_feature_names_out(self, input_features=None):
         return np.asarray(self.all_outputs_)
 
 
-class TableVectorizer(TransformerMixin, BaseTransformer):
+class TableVectorizer(TransformerMixin, SkrubBaseTransformer):
     """Transform a dataframe to a numeric (vectorized) representation.
 
     This transformer preprocesses the given dataframe by first cleaning the data

From 20702bd8292f7108da44cb86b2d1b9abec757dd4 Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Tue, 16 Jun 2026 12:08:57 +0200
Subject: [PATCH 16/20] bringing back code block and better comment

---
 skrub/_apply_to_cols.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/skrub/_apply_to_cols.py b/skrub/_apply_to_cols.py
index 7dcd641ba..cd4eaf427 100644
--- a/skrub/_apply_to_cols.py
+++ b/skrub/_apply_to_cols.py
@@ -9,6 +9,7 @@
 from ._apply_to_each_col import ApplyToEachCol
 from ._apply_to_sub_frame import ApplyToSubFrame
 from ._base import SkrubBaseTransformer
+from ._sklearn_compat import _VisualBlock
 from ._wrap_transformer import wrap_transformer
 
 _SELECT_ALL_COLUMNS = selectors.all()
@@ -432,6 +433,20 @@ def get_feature_names_out(self, input_features=None):
 
         return self._wrapped_transformer.get_feature_names_out(input_features)
 
+    def _sk_visual_block_(self):
+        # This is needed because cases like ApplyToCols(TableVectorizer())
+        # would show the TableVectorizer as a parallel block, which would not
+        # add the documentation link. With this override the problem is fixed.
+        # The same problem happens for ApplyToCols(ApplyToCols(...)) (not that
+        # someone should do that, but it is possible)
+
+        return _VisualBlock(
+            "serial",
+            [self.transformer],
+            names=[self.transformer.__class__.__name__],
+            name_details=[str(self.transformer)],
+        )
+
     def __getattr__(self, name):
         if name == "transformers_" and isinstance(
             getattr(self, "_wrapped_transformer", None), ApplyToSubFrame

From ce9dc05d62f70b1307c9939962c9e1888c6e3afc Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Tue, 16 Jun 2026 12:19:42 +0200
Subject: [PATCH 17/20] addressing all missing files

---
 skrub/_agg_joiner.py           | 7 ++++---
 skrub/_apply_to_each_col.py    | 6 ++++--
 skrub/_apply_to_sub_frame.py   | 6 ++++--
 skrub/_check_input.py          | 6 ++++--
 skrub/_drop_similar.py         | 5 +++--
 skrub/_interpolation_joiner.py | 6 ++++--
 skrub/_joiner.py               | 6 ++++--
 skrub/_matching.py             | 5 +++--
 skrub/_multi_agg_joiner.py     | 5 +++--
 skrub/_squashing_scaler.py     | 4 ++--
 10 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/skrub/_agg_joiner.py b/skrub/_agg_joiner.py
index 46ea4b06c..d03f54ef1 100644
--- a/skrub/_agg_joiner.py
+++ b/skrub/_agg_joiner.py
@@ -10,12 +10,13 @@
 
 import numpy as np
 import pandas as pd
-from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.base import TransformerMixin
 from sklearn.utils.validation import check_is_fitted
 
 from skrub import _dataframe as sbd
 from skrub import _join_utils, _utils
 from skrub import selectors as s
+from skrub._base import SkrubBaseTransformer
 from skrub._dispatch import dispatch, raise_dispatch_unregistered_type
 
 from ._check_input import CheckInputDataFrame
@@ -168,7 +169,7 @@ def check_other_inputs(operations, suffix):
     return operations, suffix
 
 
-class AggJoiner(TransformerMixin, BaseEstimator):
+class AggJoiner(TransformerMixin, SkrubBaseTransformer):
     """Aggregate an auxiliary dataframe before joining it on a base dataframe.
 
     Apply numerical and categorical aggregation operations on the columns (i.e. `cols`)
@@ -407,7 +408,7 @@ def get_feature_names_out(self):
         return self.all_outputs_
 
 
-class AggTarget(TransformerMixin, BaseEstimator):
+class AggTarget(TransformerMixin, SkrubBaseTransformer):
     """Aggregate a target `y` before joining its aggregation on a base dataframe.
 
     Accepts :obj:`pandas.DataFrame` or :class:`polars.DataFrame` inputs.
diff --git a/skrub/_apply_to_each_col.py b/skrub/_apply_to_each_col.py
index 70e99cfdd..fcd46b66f 100644
--- a/skrub/_apply_to_each_col.py
+++ b/skrub/_apply_to_each_col.py
@@ -1,9 +1,11 @@
 import itertools
 
 from joblib import Parallel, delayed
-from sklearn.base import BaseEstimator, TransformerMixin, clone
+from sklearn.base import TransformerMixin, clone
 from sklearn.utils.validation import check_is_fitted
 
+from skrub._base import SkrubBaseTransformer
+
 from . import _dataframe as sbd
 from . import _utils, selectors
 from ._join_utils import pick_column_names
@@ -15,7 +17,7 @@
 _SELECT_ALL_COLUMNS = selectors.all()
 
 
-class ApplyToEachCol(BaseEstimator, TransformerMixin):
+class ApplyToEachCol(SkrubBaseTransformer, TransformerMixin):
     """
     Map a transformer to columns in a dataframe.
 
diff --git a/skrub/_apply_to_sub_frame.py b/skrub/_apply_to_sub_frame.py
index 793969eea..270465ae7 100644
--- a/skrub/_apply_to_sub_frame.py
+++ b/skrub/_apply_to_sub_frame.py
@@ -1,6 +1,8 @@
-from sklearn.base import BaseEstimator, TransformerMixin, clone
+from sklearn.base import TransformerMixin, clone
 from sklearn.utils.validation import check_is_fitted
 
+from skrub._base import SkrubBaseTransformer
+
 from . import _dataframe as sbd
 from . import _utils, selectors
 from ._join_utils import pick_column_names
@@ -11,7 +13,7 @@
 _SELECT_ALL_COLUMNS = selectors.all()
 
 
-class ApplyToSubFrame(TransformerMixin, BaseEstimator):
+class ApplyToSubFrame(TransformerMixin, SkrubBaseTransformer):
     """Apply a transformer to part of a dataframe.
 
     A subset of the dataframe is selected and passed to the transformer (as a
diff --git a/skrub/_check_input.py b/skrub/_check_input.py
index c1dd86768..92dd206b4 100644
--- a/skrub/_check_input.py
+++ b/skrub/_check_input.py
@@ -2,9 +2,11 @@
 
 import numpy as np
 import pandas as pd
-from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.base import TransformerMixin
 from sklearn.utils.validation import check_is_fitted
 
+from skrub._base import SkrubBaseTransformer
+
 from . import _dataframe as sbd
 from . import _join_utils, _utils
 from ._dispatch import dispatch
@@ -72,7 +74,7 @@ def _check_is_dataframe(df):
     return df
 
 
-class CheckInputDataFrame(TransformerMixin, BaseEstimator):
+class CheckInputDataFrame(TransformerMixin, SkrubBaseTransformer):
     """Check the dataframe entering a skrub pipeline.
 
     This transformer ensures that:
diff --git a/skrub/_drop_similar.py b/skrub/_drop_similar.py
index 75906807e..207f70675 100644
--- a/skrub/_drop_similar.py
+++ b/skrub/_drop_similar.py
@@ -9,11 +9,12 @@
     pass
 import numbers
 
-from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.base import TransformerMixin
 from sklearn.utils.validation import check_is_fitted
 
 from . import _dataframe as sbd
 from . import selectors as s
+from ._base import SkrubBaseTransformer
 from ._column_associations import column_associations
 from ._dataframe._common import raise_dispatch_unregistered_type
 from ._dispatch import dispatch
@@ -35,7 +36,7 @@ def _filter_associations_polars(obj, threshold):
     return obj.filter(pl.col("cramer_v") >= threshold)
 
 
-class DropSimilar(TransformerMixin, BaseEstimator):
+class DropSimilar(TransformerMixin, SkrubBaseTransformer):
     """Drop columns found too redundant to the rest of the dataframe,
     according to association defined by Cramér's V.
 
diff --git a/skrub/_interpolation_joiner.py b/skrub/_interpolation_joiner.py
index f08c97eb4..7775e021a 100644
--- a/skrub/_interpolation_joiner.py
+++ b/skrub/_interpolation_joiner.py
@@ -2,12 +2,14 @@
 
 import joblib
 import numpy as np
-from sklearn.base import BaseEstimator, TransformerMixin, clone
+from sklearn.base import TransformerMixin, clone
 from sklearn.ensemble import (
     HistGradientBoostingClassifier,
     HistGradientBoostingRegressor,
 )
 
+from skrub._base import SkrubBaseTransformer
+
 from . import _dataframe as sbd
 from . import _join_utils, _utils
 from . import selectors as s
@@ -20,7 +22,7 @@
 DEFAULT_VECTORIZER = TableVectorizer(high_cardinality=MinHashEncoder())
 
 
-class InterpolationJoiner(TransformerMixin, BaseEstimator):
+class InterpolationJoiner(TransformerMixin, SkrubBaseTransformer):
     """Join with a table augmented by machine-learning predictions.
 
     This is similar to a usual equi-join, but instead of looking for actual
diff --git a/skrub/_joiner.py b/skrub/_joiner.py
index 895f52bc3..90de69fb2 100644
--- a/skrub/_joiner.py
+++ b/skrub/_joiner.py
@@ -5,13 +5,15 @@
 from functools import partial
 
 import numpy as np
-from sklearn.base import BaseEstimator, TransformerMixin, clone
+from sklearn.base import TransformerMixin, clone
 from sklearn.compose import make_column_transformer
 from sklearn.feature_extraction.text import HashingVectorizer, TfidfTransformer
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import FunctionTransformer, StandardScaler
 from sklearn.utils.validation import check_is_fitted
 
+from skrub._base import SkrubBaseTransformer
+
 from . import _dataframe as sbd
 from . import _join_utils, _matching, _utils
 from . import selectors as s
@@ -76,7 +78,7 @@ def _make_vectorizer(table, string_encoder, rescale):
     return make_pipeline(skrubber, make_column_transformer(*transformers))
 
 
-class Joiner(TransformerMixin, BaseEstimator):
+class Joiner(TransformerMixin, SkrubBaseTransformer):
     """Augment features in a main table by fuzzy-joining an auxiliary table to it.
 
     This transformer is initialized with an auxiliary table `aux_table`. It
diff --git a/skrub/_matching.py b/skrub/_matching.py
index 9c9933eff..8f60db25f 100644
--- a/skrub/_matching.py
+++ b/skrub/_matching.py
@@ -1,11 +1,12 @@
 import numpy as np
 from scipy import sparse
-from sklearn.base import BaseEstimator
 from sklearn.neighbors import NearestNeighbors
 from sklearn.utils import check_random_state
 
+from skrub._base import SkrubBaseTransformer
 
-class Matching(BaseEstimator):
+
+class Matching(SkrubBaseTransformer):
     """Base class for fuzzy-join matching & distance rescaling.
 
     This class is a helper for the ``Joiner`` and ``fuzzy_join``. It is
diff --git a/skrub/_multi_agg_joiner.py b/skrub/_multi_agg_joiner.py
index 3c0dff5d8..ccca0720e 100644
--- a/skrub/_multi_agg_joiner.py
+++ b/skrub/_multi_agg_joiner.py
@@ -2,10 +2,11 @@
 The MultiAggJoiner extends AggJoiner to multiple auxiliary tables.
 """
 
-from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.base import TransformerMixin
 from sklearn.utils.validation import check_is_fitted
 
 from skrub._agg_joiner import AggJoiner
+from skrub._base import SkrubBaseTransformer
 from skrub._dataframe import _common as sbd
 from skrub._utils import _is_array_like
 
@@ -17,7 +18,7 @@ def _is_iterable_of_iterable_of_str(x):
     )
 
 
-class MultiAggJoiner(TransformerMixin, BaseEstimator):
+class MultiAggJoiner(TransformerMixin, SkrubBaseTransformer):
     """Extension of the :class:`AggJoiner` to multiple auxiliary tables.
 
     Apply numerical and categorical aggregation operations on the `cols`
diff --git a/skrub/_squashing_scaler.py b/skrub/_squashing_scaler.py
index 4eae99355..f78f91609 100644
--- a/skrub/_squashing_scaler.py
+++ b/skrub/_squashing_scaler.py
@@ -1,7 +1,7 @@
 import numbers
 
 import numpy as np
-from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin
+from sklearn.base import OneToOneFeatureMixin, TransformerMixin
 from sklearn.preprocessing import RobustScaler
 from sklearn.utils.validation import FLOAT_DTYPES, check_is_fitted
 
@@ -52,7 +52,7 @@ def _soft_clip(X, max_absolute_value, mask_inf):
     return X
 
 
-class _MinMaxScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
+class _MinMaxScaler(OneToOneFeatureMixin, TransformerMixin, SkrubBaseTransformer):
     """A variation of scikit-learn MinMaxScaler.
 
     A simple min-max scaler that centers the median to zero and scales

From 0fe8479bbc133a462b5cf945dba9daa126bd0a9e Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Tue, 16 Jun 2026 13:04:49 +0200
Subject: [PATCH 18/20] fixing relative imports

---
 skrub/_agg_joiner.py           | 2 +-
 skrub/_apply_to_each_col.py    | 3 +--
 skrub/_apply_to_sub_frame.py   | 3 +--
 skrub/_check_input.py          | 3 +--
 skrub/_interpolation_joiner.py | 3 +--
 skrub/_joiner.py               | 3 +--
 skrub/_matching.py             | 2 +-
 skrub/_multi_agg_joiner.py     | 3 ++-
 skrub/_squashing_scaler.py     | 3 ++-
 9 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/skrub/_agg_joiner.py b/skrub/_agg_joiner.py
index d03f54ef1..2826707a2 100644
--- a/skrub/_agg_joiner.py
+++ b/skrub/_agg_joiner.py
@@ -16,9 +16,9 @@
 from skrub import _dataframe as sbd
 from skrub import _join_utils, _utils
 from skrub import selectors as s
-from skrub._base import SkrubBaseTransformer
 from skrub._dispatch import dispatch, raise_dispatch_unregistered_type
 
+from ._base import SkrubBaseTransformer
 from ._check_input import CheckInputDataFrame
 
 try:
diff --git a/skrub/_apply_to_each_col.py b/skrub/_apply_to_each_col.py
index fcd46b66f..36aa5b1ee 100644
--- a/skrub/_apply_to_each_col.py
+++ b/skrub/_apply_to_each_col.py
@@ -4,10 +4,9 @@
 from sklearn.base import TransformerMixin, clone
 from sklearn.utils.validation import check_is_fitted
 
-from skrub._base import SkrubBaseTransformer
-
 from . import _dataframe as sbd
 from . import _utils, selectors
+from ._base import SkrubBaseTransformer
 from ._join_utils import pick_column_names
 from ._single_column_transformer import RejectColumn, is_single_column_transformer
 
diff --git a/skrub/_apply_to_sub_frame.py b/skrub/_apply_to_sub_frame.py
index 270465ae7..1a0458fed 100644
--- a/skrub/_apply_to_sub_frame.py
+++ b/skrub/_apply_to_sub_frame.py
@@ -1,10 +1,9 @@
 from sklearn.base import TransformerMixin, clone
 from sklearn.utils.validation import check_is_fitted
 
-from skrub._base import SkrubBaseTransformer
-
 from . import _dataframe as sbd
 from . import _utils, selectors
+from ._base import SkrubBaseTransformer
 from ._join_utils import pick_column_names
 
 __all__ = ["ApplyToSubFrame"]
diff --git a/skrub/_check_input.py b/skrub/_check_input.py
index 92dd206b4..deb6d99ef 100644
--- a/skrub/_check_input.py
+++ b/skrub/_check_input.py
@@ -5,10 +5,9 @@
 from sklearn.base import TransformerMixin
 from sklearn.utils.validation import check_is_fitted
 
-from skrub._base import SkrubBaseTransformer
-
 from . import _dataframe as sbd
 from . import _join_utils, _utils
+from ._base import SkrubBaseTransformer
 from ._dispatch import dispatch
 
 __all__ = ["CheckInputDataFrame", "cast_column_names_to_strings"]
diff --git a/skrub/_interpolation_joiner.py b/skrub/_interpolation_joiner.py
index 7775e021a..4cc1e0ba9 100644
--- a/skrub/_interpolation_joiner.py
+++ b/skrub/_interpolation_joiner.py
@@ -8,11 +8,10 @@
     HistGradientBoostingRegressor,
 )
 
-from skrub._base import SkrubBaseTransformer
-
 from . import _dataframe as sbd
 from . import _join_utils, _utils
 from . import selectors as s
+from ._base import SkrubBaseTransformer
 from ._minhash_encoder import MinHashEncoder
 from ._sklearn_compat import get_tags
 from ._table_vectorizer import TableVectorizer
diff --git a/skrub/_joiner.py b/skrub/_joiner.py
index 90de69fb2..de27526ca 100644
--- a/skrub/_joiner.py
+++ b/skrub/_joiner.py
@@ -12,11 +12,10 @@
 from sklearn.preprocessing import FunctionTransformer, StandardScaler
 from sklearn.utils.validation import check_is_fitted
 
-from skrub._base import SkrubBaseTransformer
-
 from . import _dataframe as sbd
 from . import _join_utils, _matching, _utils
 from . import selectors as s
+from ._base import SkrubBaseTransformer
 from ._check_input import CheckInputDataFrame
 from ._datetime_encoder import DatetimeEncoder
 from ._table_vectorizer import TableVectorizer
diff --git a/skrub/_matching.py b/skrub/_matching.py
index 8f60db25f..6b3d78772 100644
--- a/skrub/_matching.py
+++ b/skrub/_matching.py
@@ -3,7 +3,7 @@
 from sklearn.neighbors import NearestNeighbors
 from sklearn.utils import check_random_state
 
-from skrub._base import SkrubBaseTransformer
+from ._base import SkrubBaseTransformer
 
 
 class Matching(SkrubBaseTransformer):
diff --git a/skrub/_multi_agg_joiner.py b/skrub/_multi_agg_joiner.py
index ccca0720e..6c78de957 100644
--- a/skrub/_multi_agg_joiner.py
+++ b/skrub/_multi_agg_joiner.py
@@ -6,10 +6,11 @@
 from sklearn.utils.validation import check_is_fitted
 
 from skrub._agg_joiner import AggJoiner
-from skrub._base import SkrubBaseTransformer
 from skrub._dataframe import _common as sbd
 from skrub._utils import _is_array_like
 
+from ._base import SkrubBaseTransformer
+
 
 def _is_iterable_of_iterable_of_str(x):
     "Return True if x is an iterable of iterable of str and False otherwise."
diff --git a/skrub/_squashing_scaler.py b/skrub/_squashing_scaler.py
index f78f91609..606eb96e6 100644
--- a/skrub/_squashing_scaler.py
+++ b/skrub/_squashing_scaler.py
@@ -5,9 +5,10 @@
 from sklearn.preprocessing import RobustScaler
 from sklearn.utils.validation import FLOAT_DTYPES, check_is_fitted
 
-from skrub._base import SkrubBaseTransformer
 from skrub._sklearn_compat import validate_data
 
+from ._base import SkrubBaseTransformer
+
 
 def _mask_inf(X):
     """Replace infinite values with NaN and return their sign."""

From 1752ffab0d740655571ecc2bbca92523ec61a055 Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <riccardo.cappuzzo@gmail.com>
Date: Tue, 16 Jun 2026 13:21:22 +0200
Subject: [PATCH 19/20] renaming to estimator, fixing order, adding to
 paramsearch

---
 skrub/_agg_joiner.py                | 6 +++---
 skrub/_apply_to_cols.py             | 4 ++--
 skrub/_apply_to_each_col.py         | 4 ++--
 skrub/_apply_to_sub_frame.py        | 4 ++--
 skrub/_base.py                      | 2 +-
 skrub/_check_input.py               | 4 ++--
 skrub/_data_ops/_estimator.py       | 6 +++---
 skrub/_drop_similar.py              | 4 ++--
 skrub/_interpolation_joiner.py      | 4 ++--
 skrub/_joiner.py                    | 4 ++--
 skrub/_matching.py                  | 4 ++--
 skrub/_multi_agg_joiner.py          | 4 ++--
 skrub/_select_cols.py               | 6 +++---
 skrub/_single_column_transformer.py | 4 ++--
 skrub/_squashing_scaler.py          | 6 +++---
 skrub/_table_vectorizer.py          | 6 +++---
 16 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/skrub/_agg_joiner.py b/skrub/_agg_joiner.py
index 2826707a2..43e8d4db6 100644
--- a/skrub/_agg_joiner.py
+++ b/skrub/_agg_joiner.py
@@ -18,7 +18,7 @@
 from skrub import selectors as s
 from skrub._dispatch import dispatch, raise_dispatch_unregistered_type
 
-from ._base import SkrubBaseTransformer
+from ._base import SkrubBaseEstimator
 from ._check_input import CheckInputDataFrame
 
 try:
@@ -169,7 +169,7 @@ def check_other_inputs(operations, suffix):
     return operations, suffix
 
 
-class AggJoiner(TransformerMixin, SkrubBaseTransformer):
+class AggJoiner(TransformerMixin, SkrubBaseEstimator):
     """Aggregate an auxiliary dataframe before joining it on a base dataframe.
 
     Apply numerical and categorical aggregation operations on the columns (i.e. `cols`)
@@ -408,7 +408,7 @@ def get_feature_names_out(self):
         return self.all_outputs_
 
 
-class AggTarget(TransformerMixin, SkrubBaseTransformer):
+class AggTarget(TransformerMixin, SkrubBaseEstimator):
     """Aggregate a target `y` before joining its aggregation on a base dataframe.
 
     Accepts :obj:`pandas.DataFrame` or :class:`polars.DataFrame` inputs.
diff --git a/skrub/_apply_to_cols.py b/skrub/_apply_to_cols.py
index cd4eaf427..7168cf70b 100644
--- a/skrub/_apply_to_cols.py
+++ b/skrub/_apply_to_cols.py
@@ -8,14 +8,14 @@
 from . import selectors
 from ._apply_to_each_col import ApplyToEachCol
 from ._apply_to_sub_frame import ApplyToSubFrame
-from ._base import SkrubBaseTransformer
+from ._base import SkrubBaseEstimator
 from ._sklearn_compat import _VisualBlock
 from ._wrap_transformer import wrap_transformer
 
 _SELECT_ALL_COLUMNS = selectors.all()
 
 
-class ApplyToCols(TransformerMixin, SkrubBaseTransformer):
+class ApplyToCols(TransformerMixin, SkrubBaseEstimator):
     """
     Apply a transformer to selected columns in a dataframe.
 
diff --git a/skrub/_apply_to_each_col.py b/skrub/_apply_to_each_col.py
index 36aa5b1ee..dfb054291 100644
--- a/skrub/_apply_to_each_col.py
+++ b/skrub/_apply_to_each_col.py
@@ -6,7 +6,7 @@
 
 from . import _dataframe as sbd
 from . import _utils, selectors
-from ._base import SkrubBaseTransformer
+from ._base import SkrubBaseEstimator
 from ._join_utils import pick_column_names
 from ._single_column_transformer import RejectColumn, is_single_column_transformer
 
@@ -16,7 +16,7 @@
 _SELECT_ALL_COLUMNS = selectors.all()
 
 
-class ApplyToEachCol(SkrubBaseTransformer, TransformerMixin):
+class ApplyToEachCol(TransformerMixin, SkrubBaseEstimator):
     """
     Map a transformer to columns in a dataframe.
 
diff --git a/skrub/_apply_to_sub_frame.py b/skrub/_apply_to_sub_frame.py
index 1a0458fed..f8e595590 100644
--- a/skrub/_apply_to_sub_frame.py
+++ b/skrub/_apply_to_sub_frame.py
@@ -3,7 +3,7 @@
 
 from . import _dataframe as sbd
 from . import _utils, selectors
-from ._base import SkrubBaseTransformer
+from ._base import SkrubBaseEstimator
 from ._join_utils import pick_column_names
 
 __all__ = ["ApplyToSubFrame"]
@@ -12,7 +12,7 @@
 _SELECT_ALL_COLUMNS = selectors.all()
 
 
-class ApplyToSubFrame(TransformerMixin, SkrubBaseTransformer):
+class ApplyToSubFrame(TransformerMixin, SkrubBaseEstimator):
     """Apply a transformer to part of a dataframe.
 
     A subset of the dataframe is selected and passed to the transformer (as a
diff --git a/skrub/_base.py b/skrub/_base.py
index 241f4470e..901fb46d9 100644
--- a/skrub/_base.py
+++ b/skrub/_base.py
@@ -1,7 +1,7 @@
 from sklearn.base import BaseEstimator
 
 
-class SkrubBaseTransformer(BaseEstimator):
+class SkrubBaseEstimator(BaseEstimator):
     """Base class for all skrub transformers.
 
     This is a class that all skrub transformers inherit from.
diff --git a/skrub/_check_input.py b/skrub/_check_input.py
index deb6d99ef..972f9e3a7 100644
--- a/skrub/_check_input.py
+++ b/skrub/_check_input.py
@@ -7,7 +7,7 @@
 
 from . import _dataframe as sbd
 from . import _join_utils, _utils
-from ._base import SkrubBaseTransformer
+from ._base import SkrubBaseEstimator
 from ._dispatch import dispatch
 
 __all__ = ["CheckInputDataFrame", "cast_column_names_to_strings"]
@@ -73,7 +73,7 @@ def _check_is_dataframe(df):
     return df
 
 
-class CheckInputDataFrame(TransformerMixin, SkrubBaseTransformer):
+class CheckInputDataFrame(TransformerMixin, SkrubBaseEstimator):
     """Check the dataframe entering a skrub pipeline.
 
     This transformer ensures that:
diff --git a/skrub/_data_ops/_estimator.py b/skrub/_data_ops/_estimator.py
index d797f2dc3..b7bb598c1 100644
--- a/skrub/_data_ops/_estimator.py
+++ b/skrub/_data_ops/_estimator.py
@@ -16,7 +16,7 @@
 
 from .. import _dataframe as sbd
 from .. import _join_utils
-from .._base import SkrubBaseTransformer
+from .._base import SkrubBaseEstimator
 from .._sklearn_compat import _safe_indexing, _VisualBlock
 from .._utils import set_module
 from . import _evaluation
@@ -180,7 +180,7 @@ def _get_params_html(self, deep=True, doc_link=""):
 
 
 @set_module("skrub")
-class SkrubLearner(_DataOpWrapperMixin, SkrubBaseTransformer):
+class SkrubLearner(_DataOpWrapperMixin, SkrubBaseEstimator):
     """Learner that evaluates a skrub DataOp.
 
     This class is not meant to be instantiated manually, ``SkrubLearner``
@@ -1197,7 +1197,7 @@ def iter_cv_splits(data_op, environment, *, keep_subsampling=False, cv=None):
         yield split_info
 
 
-class _BaseParamSearch(_DataOpWrapperMixin, BaseEstimator):
+class _BaseParamSearch(_DataOpWrapperMixin, SkrubBaseEstimator):
     """Base class for hyperparameter search objects.
 
     It defines some default implementations for getting results, plotting, and
diff --git a/skrub/_drop_similar.py b/skrub/_drop_similar.py
index 207f70675..cd7f3d6ac 100644
--- a/skrub/_drop_similar.py
+++ b/skrub/_drop_similar.py
@@ -14,7 +14,7 @@
 
 from . import _dataframe as sbd
 from . import selectors as s
-from ._base import SkrubBaseTransformer
+from ._base import SkrubBaseEstimator
 from ._column_associations import column_associations
 from ._dataframe._common import raise_dispatch_unregistered_type
 from ._dispatch import dispatch
@@ -36,7 +36,7 @@ def _filter_associations_polars(obj, threshold):
     return obj.filter(pl.col("cramer_v") >= threshold)
 
 
-class DropSimilar(TransformerMixin, SkrubBaseTransformer):
+class DropSimilar(TransformerMixin, SkrubBaseEstimator):
     """Drop columns found too redundant to the rest of the dataframe,
     according to association defined by Cramér's V.
 
diff --git a/skrub/_interpolation_joiner.py b/skrub/_interpolation_joiner.py
index 4cc1e0ba9..ea2f4e5d9 100644
--- a/skrub/_interpolation_joiner.py
+++ b/skrub/_interpolation_joiner.py
@@ -11,7 +11,7 @@
 from . import _dataframe as sbd
 from . import _join_utils, _utils
 from . import selectors as s
-from ._base import SkrubBaseTransformer
+from ._base import SkrubBaseEstimator
 from ._minhash_encoder import MinHashEncoder
 from ._sklearn_compat import get_tags
 from ._table_vectorizer import TableVectorizer
@@ -21,7 +21,7 @@
 DEFAULT_VECTORIZER = TableVectorizer(high_cardinality=MinHashEncoder())
 
 
-class InterpolationJoiner(TransformerMixin, SkrubBaseTransformer):
+class InterpolationJoiner(TransformerMixin, SkrubBaseEstimator):
     """Join with a table augmented by machine-learning predictions.
 
     This is similar to a usual equi-join, but instead of looking for actual
diff --git a/skrub/_joiner.py b/skrub/_joiner.py
index de27526ca..3b9642d7a 100644
--- a/skrub/_joiner.py
+++ b/skrub/_joiner.py
@@ -15,7 +15,7 @@
 from . import _dataframe as sbd
 from . import _join_utils, _matching, _utils
 from . import selectors as s
-from ._base import SkrubBaseTransformer
+from ._base import SkrubBaseEstimator
 from ._check_input import CheckInputDataFrame
 from ._datetime_encoder import DatetimeEncoder
 from ._table_vectorizer import TableVectorizer
@@ -77,7 +77,7 @@ def _make_vectorizer(table, string_encoder, rescale):
     return make_pipeline(skrubber, make_column_transformer(*transformers))
 
 
-class Joiner(TransformerMixin, SkrubBaseTransformer):
+class Joiner(TransformerMixin, SkrubBaseEstimator):
     """Augment features in a main table by fuzzy-joining an auxiliary table to it.
 
     This transformer is initialized with an auxiliary table `aux_table`. It
diff --git a/skrub/_matching.py b/skrub/_matching.py
index 6b3d78772..1a1a7d991 100644
--- a/skrub/_matching.py
+++ b/skrub/_matching.py
@@ -3,10 +3,10 @@
 from sklearn.neighbors import NearestNeighbors
 from sklearn.utils import check_random_state
 
-from ._base import SkrubBaseTransformer
+from ._base import SkrubBaseEstimator
 
 
-class Matching(SkrubBaseTransformer):
+class Matching(SkrubBaseEstimator):
     """Base class for fuzzy-join matching & distance rescaling.
 
     This class is a helper for the ``Joiner`` and ``fuzzy_join``. It is
diff --git a/skrub/_multi_agg_joiner.py b/skrub/_multi_agg_joiner.py
index 6c78de957..7b99c5e38 100644
--- a/skrub/_multi_agg_joiner.py
+++ b/skrub/_multi_agg_joiner.py
@@ -9,7 +9,7 @@
 from skrub._dataframe import _common as sbd
 from skrub._utils import _is_array_like
 
-from ._base import SkrubBaseTransformer
+from ._base import SkrubBaseEstimator
 
 
 def _is_iterable_of_iterable_of_str(x):
@@ -19,7 +19,7 @@ def _is_iterable_of_iterable_of_str(x):
     )
 
 
-class MultiAggJoiner(TransformerMixin, SkrubBaseTransformer):
+class MultiAggJoiner(TransformerMixin, SkrubBaseEstimator):
     """Extension of the :class:`AggJoiner` to multiple auxiliary tables.
 
     Apply numerical and categorical aggregation operations on the `cols`
diff --git a/skrub/_select_cols.py b/skrub/_select_cols.py
index 4dac4d1dc..e940d551b 100644
--- a/skrub/_select_cols.py
+++ b/skrub/_select_cols.py
@@ -1,11 +1,11 @@
 from sklearn.base import TransformerMixin, check_is_fitted
 
 from . import selectors as s
-from ._base import SkrubBaseTransformer
+from ._base import SkrubBaseEstimator
 from ._single_column_transformer import SingleColumnTransformer
 
 
-class SelectCols(TransformerMixin, SkrubBaseTransformer):
+class SelectCols(TransformerMixin, SkrubBaseEstimator):
     """Select a subset of a DataFrame's columns.
 
     A ``ValueError`` is raised if any of the provided column names are not in the
@@ -100,7 +100,7 @@ def get_feature_names_out(self, input_features=None):
         return self.columns_
 
 
-class DropCols(TransformerMixin, SkrubBaseTransformer):
+class DropCols(TransformerMixin, SkrubBaseEstimator):
     """Drop a subset of a DataFrame's columns.
 
     The other columns are kept in their original order. A ``ValueError`` is raised if
diff --git a/skrub/_single_column_transformer.py b/skrub/_single_column_transformer.py
index f7f1e913d..ea6f52712 100644
--- a/skrub/_single_column_transformer.py
+++ b/skrub/_single_column_transformer.py
@@ -9,7 +9,7 @@
 
 from . import _dataframe as sbd
 from . import _utils
-from ._base import SkrubBaseTransformer
+from ._base import SkrubBaseEstimator
 
 __all__ = ["SingleColumnTransformer", "RejectColumn"]
 
@@ -120,7 +120,7 @@ class RejectColumn(ValueError):
     pass
 
 
-class SingleColumnTransformer(SkrubBaseTransformer):
+class SingleColumnTransformer(SkrubBaseEstimator):
     """Base class for single-column transformers.
 
     Such transformers are applied independently to each column by
diff --git a/skrub/_squashing_scaler.py b/skrub/_squashing_scaler.py
index 606eb96e6..c823918cf 100644
--- a/skrub/_squashing_scaler.py
+++ b/skrub/_squashing_scaler.py
@@ -7,7 +7,7 @@
 
 from skrub._sklearn_compat import validate_data
 
-from ._base import SkrubBaseTransformer
+from ._base import SkrubBaseEstimator
 
 
 def _mask_inf(X):
@@ -53,7 +53,7 @@ def _soft_clip(X, max_absolute_value, mask_inf):
     return X
 
 
-class _MinMaxScaler(OneToOneFeatureMixin, TransformerMixin, SkrubBaseTransformer):
+class _MinMaxScaler(OneToOneFeatureMixin, TransformerMixin, SkrubBaseEstimator):
     """A variation of scikit-learn MinMaxScaler.
 
     A simple min-max scaler that centers the median to zero and scales
@@ -84,7 +84,7 @@ def transform(self, X):
         return self.scale_ * (X - self.median_)
 
 
-class SquashingScaler(OneToOneFeatureMixin, TransformerMixin, SkrubBaseTransformer):
+class SquashingScaler(OneToOneFeatureMixin, TransformerMixin, SkrubBaseEstimator):
     r"""Perform robust centering and scaling followed by soft clipping.
 
     When features have large outliers, smooth clipping prevents the outliers from
diff --git a/skrub/_table_vectorizer.py b/skrub/_table_vectorizer.py
index 59910dafe..9af7bdee0 100644
--- a/skrub/_table_vectorizer.py
+++ b/skrub/_table_vectorizer.py
@@ -12,7 +12,7 @@
 from . import _dataframe as sbd
 from . import _utils
 from . import selectors as s
-from ._base import SkrubBaseTransformer
+from ._base import SkrubBaseEstimator
 from ._check_input import CheckInputDataFrame
 from ._clean_categories import CleanCategories
 from ._clean_null_strings import CleanNullStrings
@@ -184,7 +184,7 @@ def _get_preprocessors(
     return steps
 
 
-class Cleaner(TransformerMixin, SkrubBaseTransformer):
+class Cleaner(TransformerMixin, SkrubBaseEstimator):
     """Column-wise consistency checks and sanitization of dtypes, null values and dates.
 
     The ``Cleaner`` performs some consistency checks and basic preprocessing
@@ -542,7 +542,7 @@ def get_feature_names_out(self, input_features=None):
         return np.asarray(self.all_outputs_)
 
 
-class TableVectorizer(TransformerMixin, SkrubBaseTransformer):
+class TableVectorizer(TransformerMixin, SkrubBaseEstimator):
     """Transform a dataframe to a numeric (vectorized) representation.
 
     This transformer preprocesses the given dataframe by first cleaning the data

From e4458b43457b3eb643cde63fefe7035c69ecd67b Mon Sep 17 00:00:00 2001
From: Riccardo Cappuzzo <7548232+rcap107@users.noreply.github.com>
Date: Tue, 16 Jun 2026 13:21:45 +0200
Subject: [PATCH 20/20] Apply suggestion from @jeromedockes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Jérôme Dockès <jerome@dockes.org>
---
 skrub/_base.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/skrub/_base.py b/skrub/_base.py
index 241f4470e..3d140a79d 100644
--- a/skrub/_base.py
+++ b/skrub/_base.py
@@ -2,11 +2,12 @@
 
 
 class SkrubBaseTransformer(BaseEstimator):
-    """Base class for all skrub transformers.
+    """Base class for all skrub estimators.
 
     This is a class that all skrub transformers inherit from.
-    For the moment, it's only used for the documentation url, but eventually
-    it will be used for other things as well.
+    For the moment, it's only used to set the documentation url for estimator diagrams.
+    
+    Think twice before adding anything to this class: it is a base class of *all* skrub estimators, including meta-estimators like ApplyToCols, the SingleColumnTransformer base class, and the SkrubLearners created by DataOps.
     """
 
     _doc_link_module = "skrub"