From 050acdd2977c09cb305a2a4d3db40dff9bd75e2c Mon Sep 17 00:00:00 2001 From: Myles Scolnick Date: Wed, 20 May 2026 14:38:44 -0400 Subject: [PATCH 1/5] perf(autoreload): skip stdlib/site-packages on per-cell check Every cell run with auto_reload enabled was stat-ing every entry in sys.modules (often 1000+), adding 16-80ms of overhead per cell. Add an opt-in skip_non_user_modules flag on ModuleReloader.check that caches stdlib/site-packages module names in a persistent skip set. AutoreloadManager.cell_scope opts in; the background ModuleWatcher keeps the default full scan so edits inside installed packages remain detectable at watcher latency. Fixes #9628 --- marimo/_runtime/reload/autoreload.py | 47 ++++++++- marimo/_runtime/reload/manager.py | 8 +- tests/_runtime/reload/test_autoreload.py | 127 +++++++++++++++++++++++ 3 files changed, 180 insertions(+), 2 deletions(-) diff --git a/marimo/_runtime/reload/autoreload.py b/marimo/_runtime/reload/autoreload.py index ada9c1f819c..6cb048ef47d 100644 --- a/marimo/_runtime/reload/autoreload.py +++ b/marimo/_runtime/reload/autoreload.py @@ -14,6 +14,7 @@ import modulefinder import os import sys +import sysconfig import threading import traceback import types @@ -73,6 +74,19 @@ def safe_hasattr(obj: M, attr: str) -> bool: return False +def _non_user_module_roots() -> tuple[str, ...]: + """Filesystem prefixes that hold stdlib + site-packages modules.""" + roots: set[str] = set() + for key in ("stdlib", "platstdlib", "purelib", "platlib"): + p = sysconfig.get_path(key) + if p: + roots.add(p) + # Covers stdlib for non-venv interpreters, where sysconfig's stdlib path + # may not match the runtime location. + roots.add(sys.base_prefix) + return tuple(roots) + + def modules_imported_by_cell( cell: CellImpl, sys_modules: dict[str, types.ModuleType] ) -> set[str]: @@ -160,10 +174,26 @@ def __init__(self) -> None: # for thread-safety self.lock = threading.Lock() self._module_dependency_finder = ModuleDependencyFinder() + # Names known to live in stdlib/site-packages; populated lazily on the + # hot per-cell path. Entries are never evicted: a module whose + # __file__ moves between roots at runtime would not be re-evaluated. + self._skip: set[str] = set() + self._non_user_roots = _non_user_module_roots() # Timestamp existing modules self.check(modules=sys.modules, reload=False) + def _is_user_module(self, module: types.ModuleType) -> bool: + """True for modules whose source lives outside stdlib/site-packages. + + Editable installs (e.g. ``pip install -e .``) point ``__file__`` at the + source tree, so they are correctly classified as user code. + """ + f = safe_getattr(module, "__file__", None) + if not f: + return False + return not f.startswith(self._non_user_roots) + def filename_and_mtime( self, module: types.ModuleType ) -> ModuleMTime | None: @@ -206,12 +236,21 @@ def cell_uses_stale_modules(self, cell: CellImpl) -> bool: ) def check( - self, modules: dict[str, types.ModuleType], reload: bool + self, + modules: dict[str, types.ModuleType], + reload: bool, + *, + skip_non_user_modules: bool = False, ) -> set[types.ModuleType]: """Check timestamps of modules, optionally reload them. Also patches existing objects with hot-reloaded ones. + When ``skip_non_user_modules`` is True, stdlib and site-packages + modules are skipped via a persistent cache — intended for the per-cell + hot path. The background ``ModuleWatcher`` keeps the default so that + edits to installed packages remain detectable at watcher latency. + Returns a set of modules that were found to have been modified. """ @@ -222,12 +261,18 @@ def check( # and also iterates over it with self.lock: modified_modules: set[types.ModuleType] = set() + skip = self._skip if skip_non_user_modules else None # materialize the module keys, since we'll be reloading while # iterating for modname in list(modules.keys()): + if skip is not None and modname in skip: + continue m = modules.get(modname, None) if m is None: continue + if skip is not None and not self._is_user_module(m): + skip.add(modname) + continue module_mtime = self.filename_and_mtime(m) if module_mtime is None: diff --git a/marimo/_runtime/reload/manager.py b/marimo/_runtime/reload/manager.py index 7d99638608b..b3dada99438 100644 --- a/marimo/_runtime/reload/manager.py +++ b/marimo/_runtime/reload/manager.py @@ -85,7 +85,12 @@ def cell_scope(self) -> Iterator[None]: yield return snapshot = set(sys.modules) - self._reloader.check(modules=sys.modules, reload=True) + # Hot path: skip stdlib/site-packages. Edits inside an installed + # package are still picked up by the background ModuleWatcher's + # full scan, just at watcher latency rather than per-cell. + self._reloader.check( + modules=sys.modules, reload=True, skip_non_user_modules=True + ) try: yield finally: @@ -93,6 +98,7 @@ def cell_scope(self) -> Iterator[None]: self._reloader.check( modules={m: sys.modules[m] for m in new_modules}, reload=False, + skip_non_user_modules=True, ) def _on_finish_hook(self, ctx: OnFinishHookContext) -> None: diff --git a/tests/_runtime/reload/test_autoreload.py b/tests/_runtime/reload/test_autoreload.py index eabb9425131..f7e46ff36c4 100644 --- a/tests/_runtime/reload/test_autoreload.py +++ b/tests/_runtime/reload/test_autoreload.py @@ -425,6 +425,133 @@ def test_check_reload_clears_stale_modules( assert len(reloader.stale_modules) == 0 +class TestSkipCache: + def test_is_user_module_stdlib(self): + reloader = ModuleReloader() + assert reloader._is_user_module(sys.modules["os"]) is False + assert reloader._is_user_module(sys.modules["pathlib"]) is False + + def test_is_user_module_builtin_has_no_file(self): + reloader = ModuleReloader() + assert reloader._is_user_module(sys.modules["sys"]) is False + assert reloader._is_user_module(sys.modules["builtins"]) is False + + def test_is_user_module_user_code( + self, tmp_path: pathlib.Path, py_modname: str + ): + sys.path.append(str(tmp_path)) + py_file = tmp_path / pathlib.Path(py_modname + ".py") + py_file.write_text("x = 1") + mod = importlib.import_module(py_modname) + reloader = ModuleReloader() + assert reloader._is_user_module(mod) is True + + def test_default_check_does_not_populate_skip(self): + # The watcher path must keep scanning everything so that edits inside + # installed packages remain detectable. + reloader = ModuleReloader() + assert reloader._skip == set() + + def test_hot_path_populates_skip_with_stdlib(self): + reloader = ModuleReloader() + reloader.check(sys.modules, reload=False, skip_non_user_modules=True) + assert "os" in reloader._skip + assert "pathlib" in reloader._skip + + def test_user_module_not_skipped_on_hot_path( + self, tmp_path: pathlib.Path, py_modname: str + ): + sys.path.append(str(tmp_path)) + py_file = tmp_path / pathlib.Path(py_modname + ".py") + py_file.write_text("x = 1") + importlib.import_module(py_modname) + reloader = ModuleReloader() + reloader.check(sys.modules, reload=False, skip_non_user_modules=True) + assert py_modname not in reloader._skip + + def test_skipped_modules_are_not_restated(self, monkeypatch): + reloader = ModuleReloader() + reloader.check(sys.modules, reload=False, skip_non_user_modules=True) + assert "os" in reloader._skip + + calls: list[str] = [] + orig = reloader.filename_and_mtime + + def spy(module): + calls.append(getattr(module, "__name__", "?")) + return orig(module) + + monkeypatch.setattr(reloader, "filename_and_mtime", spy) + reloader.check(sys.modules, reload=False, skip_non_user_modules=True) + assert "os" not in calls + assert "pathlib" not in calls + + def test_watcher_path_still_sees_installed_packages( + self, tmp_path: pathlib.Path, py_modname: str, monkeypatch + ): + # Regression guard for the most surprising user-visible behavior: + # editing a file inside site-packages must still be detected, just + # via the background watcher's full scan instead of the hot path. + # We simulate "this module is in site-packages" by treating tmp_path + # as a non-user root for the duration of the test. + sys.path.append(str(tmp_path)) + py_file = tmp_path / pathlib.Path(py_modname + ".py") + py_file.write_text("x = 1") + mod = importlib.import_module(py_modname) + + reloader = ModuleReloader() + monkeypatch.setattr( + reloader, + "_non_user_roots", + (str(tmp_path),) + reloader._non_user_roots, + ) + assert reloader._is_user_module(mod) is False + + # Watcher path (default) sees it. + reloader.check(sys.modules, reload=False) + assert py_modname in reloader.modules_mtimes + update_file(py_file, "x = 2") + assert any(m is mod for m in reloader.check(sys.modules, reload=False)) + + # Hot path skips it. + reloader2 = ModuleReloader() + monkeypatch.setattr( + reloader2, + "_non_user_roots", + (str(tmp_path),) + reloader2._non_user_roots, + ) + reloader2.check(sys.modules, reload=False, skip_non_user_modules=True) + assert py_modname in reloader2._skip + + def test_user_module_reload_still_works( + self, tmp_path: pathlib.Path, py_modname: str + ): + sys.path.append(str(tmp_path)) + py_file = tmp_path / pathlib.Path(py_modname + ".py") + py_file.write_text( + textwrap.dedent( + """ + def foo(): + return 1 + """ + ) + ) + mod = importlib.import_module(py_modname) + reloader = ModuleReloader() + reloader.check(sys.modules, reload=False) + assert mod.foo() == 1 + + update_file( + py_file, + """ + def foo(): + return 2 + """, + ) + reloader.check(sys.modules, reload=True) + assert mod.foo() == 2 + + class TestUpdateFunctions: """Tests for update_* functions""" From 3b4237b9c712e58a3221f06cf10830e57959eb6a Mon Sep 17 00:00:00 2001 From: Myles Scolnick Date: Wed, 20 May 2026 15:52:03 -0400 Subject: [PATCH 2/5] comments --- marimo/_runtime/reload/autoreload.py | 52 +++++++++++++++-------- marimo/_runtime/reload/manager.py | 10 +++-- tests/_runtime/reload/test_autoreload.py | 53 +++++++++++++++--------- 3 files changed, 73 insertions(+), 42 deletions(-) diff --git a/marimo/_runtime/reload/autoreload.py b/marimo/_runtime/reload/autoreload.py index 6cb048ef47d..153a9abbbc2 100644 --- a/marimo/_runtime/reload/autoreload.py +++ b/marimo/_runtime/reload/autoreload.py @@ -75,16 +75,28 @@ def safe_hasattr(obj: M, attr: str) -> bool: def _non_user_module_roots() -> tuple[str, ...]: - """Filesystem prefixes that hold stdlib + site-packages modules.""" + """Filesystem prefixes that hold stdlib + site-packages modules. + + Each entry is normalized and terminated with a separator so that a raw + prefix check on a normalized path matches whole directory boundaries + (e.g. `/usr/lib/python3.13/` does not match `/usr/lib/python3.13-mine/`). + """ roots: set[str] = set() for key in ("stdlib", "platstdlib", "purelib", "platlib"): p = sysconfig.get_path(key) if p: roots.add(p) - # Covers stdlib for non-venv interpreters, where sysconfig's stdlib path - # may not match the runtime location. - roots.add(sys.base_prefix) - return tuple(roots) + # Fallback for builds where sysconfig's stdlib path is missing or + # differs from the runtime location of the stdlib. + roots.add(os.path.dirname(os.__file__)) + + normalized: set[str] = set() + for r in roots: + n = os.path.normcase(os.path.realpath(r)) + if not n.endswith(os.sep): + n += os.sep + normalized.add(n) + return tuple(normalized) def modules_imported_by_cell( @@ -174,9 +186,11 @@ def __init__(self) -> None: # for thread-safety self.lock = threading.Lock() self._module_dependency_finder = ModuleDependencyFinder() - # Names known to live in stdlib/site-packages; populated lazily on the - # hot per-cell path. Entries are never evicted: a module whose - # __file__ moves between roots at runtime would not be re-evaluated. + # Names known to live in stdlib/site-packages. Populated lazily by + # callers that pass `skip_non_user_modules=True` (the hot per-cell + # path), and then consulted by every `check()` call. Entries are + # never evicted: a module whose `__file__` moves between roots at + # runtime would not be re-evaluated. self._skip: set[str] = set() self._non_user_roots = _non_user_module_roots() @@ -186,13 +200,14 @@ def __init__(self) -> None: def _is_user_module(self, module: types.ModuleType) -> bool: """True for modules whose source lives outside stdlib/site-packages. - Editable installs (e.g. ``pip install -e .``) point ``__file__`` at the + Editable installs (e.g. `pip install -e .`) point `__file__` at the source tree, so they are correctly classified as user code. """ f = safe_getattr(module, "__file__", None) if not f: return False - return not f.startswith(self._non_user_roots) + path = os.path.normcase(os.path.realpath(f)) + return not path.startswith(self._non_user_roots) def filename_and_mtime( self, module: types.ModuleType @@ -246,10 +261,12 @@ def check( Also patches existing objects with hot-reloaded ones. - When ``skip_non_user_modules`` is True, stdlib and site-packages - modules are skipped via a persistent cache — intended for the per-cell - hot path. The background ``ModuleWatcher`` keeps the default so that - edits to installed packages remain detectable at watcher latency. + When `skip_non_user_modules` is True, modules whose `__file__` is + under stdlib/site-packages are added to a persistent skip set and + won't be stat-ed on this or future calls. Intended for the per-cell + hot path. Once populated, the skip set short-circuits every caller + — including the background `ModuleWatcher` — so edits inside + installed packages are not hot-reloaded. Returns a set of modules that were found to have been modified. """ @@ -261,17 +278,16 @@ def check( # and also iterates over it with self.lock: modified_modules: set[types.ModuleType] = set() - skip = self._skip if skip_non_user_modules else None # materialize the module keys, since we'll be reloading while # iterating for modname in list(modules.keys()): - if skip is not None and modname in skip: + if modname in self._skip: continue m = modules.get(modname, None) if m is None: continue - if skip is not None and not self._is_user_module(m): - skip.add(modname) + if skip_non_user_modules and not self._is_user_module(m): + self._skip.add(modname) continue module_mtime = self.filename_and_mtime(m) diff --git a/marimo/_runtime/reload/manager.py b/marimo/_runtime/reload/manager.py index b3dada99438..ebb72e0b901 100644 --- a/marimo/_runtime/reload/manager.py +++ b/marimo/_runtime/reload/manager.py @@ -85,20 +85,22 @@ def cell_scope(self) -> Iterator[None]: yield return snapshot = set(sys.modules) - # Hot path: skip stdlib/site-packages. Edits inside an installed - # package are still picked up by the background ModuleWatcher's - # full scan, just at watcher latency rather than per-cell. + # Entry: skip stdlib/site-packages so cells don't pay for stat-ing + # them. This is the perf-critical call. self._reloader.check( modules=sys.modules, reload=True, skip_non_user_modules=True ) try: yield finally: + # Exit: record mtimes for modules the cell just imported. Don't + # skip here — `new_modules` is small (typically 0-3) and we need + # an mtime baseline for newly-imported installed packages so the + # next edit isn't silently treated as the initial state. new_modules = set(sys.modules) - snapshot self._reloader.check( modules={m: sys.modules[m] for m in new_modules}, reload=False, - skip_non_user_modules=True, ) def _on_finish_hook(self, ctx: OnFinishHookContext) -> None: diff --git a/tests/_runtime/reload/test_autoreload.py b/tests/_runtime/reload/test_autoreload.py index f7e46ff36c4..8ed9d174579 100644 --- a/tests/_runtime/reload/test_autoreload.py +++ b/tests/_runtime/reload/test_autoreload.py @@ -2,6 +2,7 @@ import gc import importlib +import os import pathlib import sys import textwrap @@ -486,42 +487,54 @@ def spy(module): assert "os" not in calls assert "pathlib" not in calls - def test_watcher_path_still_sees_installed_packages( + def test_default_check_does_not_populate_skip_on_its_own( self, tmp_path: pathlib.Path, py_modname: str, monkeypatch ): - # Regression guard for the most surprising user-visible behavior: - # editing a file inside site-packages must still be detected, just - # via the background watcher's full scan instead of the hot path. - # We simulate "this module is in site-packages" by treating tmp_path - # as a non-user root for the duration of the test. + # The watcher uses `skip_non_user_modules=False`. It must not add + # anything to `_skip` on its own — populating the cache is the hot + # path's responsibility. + sys.path.append(str(tmp_path)) + (tmp_path / pathlib.Path(py_modname + ".py")).write_text("x = 1") + importlib.import_module(py_modname) + + reloader = ModuleReloader() + tmp_root = os.path.normcase(os.path.realpath(str(tmp_path))) + os.sep + monkeypatch.setattr( + reloader, + "_non_user_roots", + (tmp_root,) + reloader._non_user_roots, + ) + reloader.check(sys.modules, reload=False) + assert py_modname not in reloader._skip + + def test_hot_path_population_short_circuits_subsequent_calls( + self, tmp_path: pathlib.Path, py_modname: str, monkeypatch + ): + # Once the hot path has classified a module as non-user, every later + # `check()` skips it — including the default (watcher) path. This is + # a documented tradeoff: it means edits inside an installed package + # are not hot-reloaded once a cell has run. sys.path.append(str(tmp_path)) py_file = tmp_path / pathlib.Path(py_modname + ".py") py_file.write_text("x = 1") mod = importlib.import_module(py_modname) reloader = ModuleReloader() + tmp_root = os.path.normcase(os.path.realpath(str(tmp_path))) + os.sep monkeypatch.setattr( reloader, "_non_user_roots", - (str(tmp_path),) + reloader._non_user_roots, + (tmp_root,) + reloader._non_user_roots, ) assert reloader._is_user_module(mod) is False - # Watcher path (default) sees it. - reloader.check(sys.modules, reload=False) - assert py_modname in reloader.modules_mtimes - update_file(py_file, "x = 2") - assert any(m is mod for m in reloader.check(sys.modules, reload=False)) + reloader.check(sys.modules, reload=False, skip_non_user_modules=True) + assert py_modname in reloader._skip - # Hot path skips it. - reloader2 = ModuleReloader() - monkeypatch.setattr( - reloader2, - "_non_user_roots", - (str(tmp_path),) + reloader2._non_user_roots, + update_file(py_file, "x = 2") + assert not any( + m is mod for m in reloader.check(sys.modules, reload=False) ) - reloader2.check(sys.modules, reload=False, skip_non_user_modules=True) - assert py_modname in reloader2._skip def test_user_module_reload_still_works( self, tmp_path: pathlib.Path, py_modname: str From 3155f3539eacc730c1f0ee56fa45bf3dac12dda6 Mon Sep 17 00:00:00 2001 From: Myles Scolnick Date: Wed, 20 May 2026 16:22:37 -0400 Subject: [PATCH 3/5] comments --- marimo/_runtime/reload/autoreload.py | 37 ++++++++------ tests/_runtime/reload/test_autoreload.py | 64 +++++++++--------------- 2 files changed, 46 insertions(+), 55 deletions(-) diff --git a/marimo/_runtime/reload/autoreload.py b/marimo/_runtime/reload/autoreload.py index 153a9abbbc2..6a2fc6c53ac 100644 --- a/marimo/_runtime/reload/autoreload.py +++ b/marimo/_runtime/reload/autoreload.py @@ -9,6 +9,7 @@ from __future__ import annotations +import functools import gc import io import modulefinder @@ -74,6 +75,7 @@ def safe_hasattr(obj: M, attr: str) -> bool: return False +@functools.cache def _non_user_module_roots() -> tuple[str, ...]: """Filesystem prefixes that hold stdlib + site-packages modules. @@ -186,13 +188,12 @@ def __init__(self) -> None: # for thread-safety self.lock = threading.Lock() self._module_dependency_finder = ModuleDependencyFinder() - # Names known to live in stdlib/site-packages. Populated lazily by - # callers that pass `skip_non_user_modules=True` (the hot per-cell - # path), and then consulted by every `check()` call. Entries are - # never evicted: a module whose `__file__` moves between roots at - # runtime would not be re-evaluated. + # Names known to live in stdlib/site-packages. Populated by every + # `check()` call (memoizing the `_is_user_module` classification); + # consumed only when `skip_non_user_modules=True`. Entries are never + # evicted: a module whose `__file__` moves between roots at runtime + # would not be re-evaluated. self._skip: set[str] = set() - self._non_user_roots = _non_user_module_roots() # Timestamp existing modules self.check(modules=sys.modules, reload=False) @@ -207,7 +208,7 @@ def _is_user_module(self, module: types.ModuleType) -> bool: if not f: return False path = os.path.normcase(os.path.realpath(f)) - return not path.startswith(self._non_user_roots) + return not path.startswith(_non_user_module_roots()) def filename_and_mtime( self, module: types.ModuleType @@ -262,11 +263,12 @@ def check( Also patches existing objects with hot-reloaded ones. When `skip_non_user_modules` is True, modules whose `__file__` is - under stdlib/site-packages are added to a persistent skip set and - won't be stat-ed on this or future calls. Intended for the per-cell - hot path. Once populated, the skip set short-circuits every caller - — including the background `ModuleWatcher` — so edits inside - installed packages are not hot-reloaded. + under stdlib/site-packages are skipped — intended for the per-cell + hot path. The background `ModuleWatcher` leaves it False so it still + stats every module on its 1s loop, which is what keeps edits inside + installed packages detectable. Both paths populate the same skip + cache, so the hot path benefits from classifications the watcher + has already done. Returns a set of modules that were found to have been modified. """ @@ -281,13 +283,18 @@ def check( # materialize the module keys, since we'll be reloading while # iterating for modname in list(modules.keys()): - if modname in self._skip: - continue m = modules.get(modname, None) if m is None: continue - if skip_non_user_modules and not self._is_user_module(m): + # Classify (memoized via `_skip`). The hot path uses the + # cache to short-circuit; the watcher always falls through + # to the stat check so that edits inside installed packages + # are still picked up. + is_non_user = modname in self._skip + if not is_non_user and not self._is_user_module(m): self._skip.add(modname) + is_non_user = True + if is_non_user and skip_non_user_modules: continue module_mtime = self.filename_and_mtime(m) diff --git a/tests/_runtime/reload/test_autoreload.py b/tests/_runtime/reload/test_autoreload.py index 8ed9d174579..06b6c44f0b1 100644 --- a/tests/_runtime/reload/test_autoreload.py +++ b/tests/_runtime/reload/test_autoreload.py @@ -447,17 +447,16 @@ def test_is_user_module_user_code( reloader = ModuleReloader() assert reloader._is_user_module(mod) is True - def test_default_check_does_not_populate_skip(self): - # The watcher path must keep scanning everything so that edits inside - # installed packages remain detectable. + def test_both_paths_populate_skip(self): + # The cache is shared memoization for the classification step; + # whichever path sees a module first records the verdict. reloader = ModuleReloader() - assert reloader._skip == set() - - def test_hot_path_populates_skip_with_stdlib(self): - reloader = ModuleReloader() - reloader.check(sys.modules, reload=False, skip_non_user_modules=True) + reloader.check(sys.modules, reload=False) assert "os" in reloader._skip - assert "pathlib" in reloader._skip + + reloader2 = ModuleReloader() + reloader2.check(sys.modules, reload=False, skip_non_user_modules=True) + assert "os" in reloader2._skip def test_user_module_not_skipped_on_hot_path( self, tmp_path: pathlib.Path, py_modname: str @@ -487,54 +486,39 @@ def spy(module): assert "os" not in calls assert "pathlib" not in calls - def test_default_check_does_not_populate_skip_on_its_own( + def test_watcher_path_still_sees_installed_packages( self, tmp_path: pathlib.Path, py_modname: str, monkeypatch ): - # The watcher uses `skip_non_user_modules=False`. It must not add - # anything to `_skip` on its own — populating the cache is the hot - # path's responsibility. - sys.path.append(str(tmp_path)) - (tmp_path / pathlib.Path(py_modname + ".py")).write_text("x = 1") - importlib.import_module(py_modname) - - reloader = ModuleReloader() - tmp_root = os.path.normcase(os.path.realpath(str(tmp_path))) + os.sep - monkeypatch.setattr( - reloader, - "_non_user_roots", - (tmp_root,) + reloader._non_user_roots, - ) - reloader.check(sys.modules, reload=False) - assert py_modname not in reloader._skip + # Regression guard: even after the hot path has classified a module + # as non-user (and cached it in `_skip`), the watcher's + # `skip_non_user_modules=False` call must still stat it and detect + # edits. Without this, `auto_reload` users editing files inside an + # installed package would silently stop getting hot reloads. + import marimo._runtime.reload.autoreload as autoreload_mod - def test_hot_path_population_short_circuits_subsequent_calls( - self, tmp_path: pathlib.Path, py_modname: str, monkeypatch - ): - # Once the hot path has classified a module as non-user, every later - # `check()` skips it — including the default (watcher) path. This is - # a documented tradeoff: it means edits inside an installed package - # are not hot-reloaded once a cell has run. sys.path.append(str(tmp_path)) py_file = tmp_path / pathlib.Path(py_modname + ".py") py_file.write_text("x = 1") mod = importlib.import_module(py_modname) - reloader = ModuleReloader() + real_roots = autoreload_mod._non_user_module_roots() tmp_root = os.path.normcase(os.path.realpath(str(tmp_path))) + os.sep monkeypatch.setattr( - reloader, - "_non_user_roots", - (tmp_root,) + reloader._non_user_roots, + autoreload_mod, + "_non_user_module_roots", + lambda: (tmp_root,) + real_roots, ) + + reloader = ModuleReloader() assert reloader._is_user_module(mod) is False + # Hot path classifies and caches. reloader.check(sys.modules, reload=False, skip_non_user_modules=True) assert py_modname in reloader._skip + # Watcher path falls through to the stat check despite the cache. update_file(py_file, "x = 2") - assert not any( - m is mod for m in reloader.check(sys.modules, reload=False) - ) + assert any(m is mod for m in reloader.check(sys.modules, reload=False)) def test_user_module_reload_still_works( self, tmp_path: pathlib.Path, py_modname: str From d1ff1a7142383723df75a257a207f3ff3c582e2b Mon Sep 17 00:00:00 2001 From: Myles Scolnick Date: Wed, 20 May 2026 17:12:46 -0400 Subject: [PATCH 4/5] comments --- marimo/_runtime/reload/autoreload.py | 28 ++++++++++++++++-------- tests/_runtime/reload/test_autoreload.py | 27 +++++++++++++++++++++++ 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/marimo/_runtime/reload/autoreload.py b/marimo/_runtime/reload/autoreload.py index 6a2fc6c53ac..2d7fbdcebbe 100644 --- a/marimo/_runtime/reload/autoreload.py +++ b/marimo/_runtime/reload/autoreload.py @@ -188,12 +188,13 @@ def __init__(self) -> None: # for thread-safety self.lock = threading.Lock() self._module_dependency_finder = ModuleDependencyFinder() - # Names known to live in stdlib/site-packages. Populated by every - # `check()` call (memoizing the `_is_user_module` classification); - # consumed only when `skip_non_user_modules=True`. Entries are never - # evicted: a module whose `__file__` moves between roots at runtime - # would not be re-evaluated. - self._skip: set[str] = set() + # modname -> cached `__file__` for modules classified as non-user. + # Populated by every `check()` call (memoizing `_is_user_module`); + # consumed only when `skip_non_user_modules=True`. Stored value is + # used to invalidate the entry if `sys.modules[modname]` is later + # rebound to a module with a different `__file__` (e.g. a user + # module shadowing an installed package). + self._skip: dict[str, str | None] = {} # Timestamp existing modules self.check(modules=sys.modules, reload=False) @@ -289,10 +290,19 @@ def check( # Classify (memoized via `_skip`). The hot path uses the # cache to short-circuit; the watcher always falls through # to the stat check so that edits inside installed packages - # are still picked up. - is_non_user = modname in self._skip + # are still picked up. The cached entry stores `__file__`, + # so a module rebound to a new location gets reclassified. + current_file = safe_getattr(m, "__file__", None) + if modname in self._skip: + if self._skip[modname] == current_file: + is_non_user = True + else: + del self._skip[modname] + is_non_user = False + else: + is_non_user = False if not is_non_user and not self._is_user_module(m): - self._skip.add(modname) + self._skip[modname] = current_file is_non_user = True if is_non_user and skip_non_user_modules: continue diff --git a/tests/_runtime/reload/test_autoreload.py b/tests/_runtime/reload/test_autoreload.py index 06b6c44f0b1..3be51e7ce16 100644 --- a/tests/_runtime/reload/test_autoreload.py +++ b/tests/_runtime/reload/test_autoreload.py @@ -520,6 +520,33 @@ def test_watcher_path_still_sees_installed_packages( update_file(py_file, "x = 2") assert any(m is mod for m in reloader.check(sys.modules, reload=False)) + def test_skip_cache_invalidates_when_module_rebound( + self, tmp_path: pathlib.Path, py_modname: str + ): + # If `sys.modules[modname]` is rebound to a module with a different + # `__file__` (e.g. a user file shadows an installed package), the + # cached non-user verdict must not stick. + sys.path.append(str(tmp_path)) + user_file = tmp_path / pathlib.Path(py_modname + ".py") + user_file.write_text("x = 1") + user_mod = importlib.import_module(py_modname) + + # Plant a fake "installed" version under the same name first. + fake_installed = types.ModuleType(py_modname) + fake_installed.__file__ = os.path.join( + os.path.dirname(os.__file__), py_modname + ".py" + ) + sys.modules[py_modname] = fake_installed + + reloader = ModuleReloader() + reloader.check(sys.modules, reload=False, skip_non_user_modules=True) + assert py_modname in reloader._skip + + # Rebind to the real user module. + sys.modules[py_modname] = user_mod + reloader.check(sys.modules, reload=False, skip_non_user_modules=True) + assert py_modname not in reloader._skip + def test_user_module_reload_still_works( self, tmp_path: pathlib.Path, py_modname: str ): From fb1ae74f16b11c3b1f8c428b969a2acb1a8cf670 Mon Sep 17 00:00:00 2001 From: Myles Scolnick Date: Wed, 20 May 2026 18:13:12 -0400 Subject: [PATCH 5/5] comments --- marimo/_runtime/reload/autoreload.py | 5 +++++ tests/_runtime/reload/test_autoreload.py | 11 +++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/marimo/_runtime/reload/autoreload.py b/marimo/_runtime/reload/autoreload.py index 2d7fbdcebbe..5292a86a60d 100644 --- a/marimo/_runtime/reload/autoreload.py +++ b/marimo/_runtime/reload/autoreload.py @@ -297,7 +297,12 @@ def check( if self._skip[modname] == current_file: is_non_user = True else: + # Rebound to a different file — drop all cached + # state for this name so the new module starts + # from a clean mtime baseline. del self._skip[modname] + self.modules_mtimes.pop(modname, None) + self.stale_modules.discard(modname) is_non_user = False else: is_non_user = False diff --git a/tests/_runtime/reload/test_autoreload.py b/tests/_runtime/reload/test_autoreload.py index 3be51e7ce16..d30c43cec56 100644 --- a/tests/_runtime/reload/test_autoreload.py +++ b/tests/_runtime/reload/test_autoreload.py @@ -525,7 +525,9 @@ def test_skip_cache_invalidates_when_module_rebound( ): # If `sys.modules[modname]` is rebound to a module with a different # `__file__` (e.g. a user file shadows an installed package), the - # cached non-user verdict must not stick. + # cached non-user verdict must not stick — and any cached mtime + # from the old module must be cleared too, otherwise an older user + # file would be silently treated as unchanged. sys.path.append(str(tmp_path)) user_file = tmp_path / pathlib.Path(py_modname + ".py") user_file.write_text("x = 1") @@ -539,13 +541,18 @@ def test_skip_cache_invalidates_when_module_rebound( sys.modules[py_modname] = fake_installed reloader = ModuleReloader() - reloader.check(sys.modules, reload=False, skip_non_user_modules=True) + # Watcher-style call: classifies as non-user AND records a + # (synthetic) far-future mtime so we can detect stale-cache leakage. + reloader.check(sys.modules, reload=False) assert py_modname in reloader._skip + reloader.modules_mtimes[py_modname] = 1e12 # Rebind to the real user module. sys.modules[py_modname] = user_mod reloader.check(sys.modules, reload=False, skip_non_user_modules=True) assert py_modname not in reloader._skip + # Stale mtime is cleared so the next edit isn't masked by it. + assert reloader.modules_mtimes.get(py_modname, 0) < 1e12 def test_user_module_reload_still_works( self, tmp_path: pathlib.Path, py_modname: str