From faf81ee0d1e814d448a02844f5a15c43448e1966 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Thu, 15 Jan 2026 23:28:03 +0100
Subject: [PATCH 01/17] Work around ckan/ckanapi#218

---
 transport_data/__init__.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/transport_data/__init__.py b/transport_data/__init__.py
index ffad20d..34853c1 100644
--- a/transport_data/__init__.py
+++ b/transport_data/__init__.py
@@ -1,10 +1,13 @@
 import logging
 import sys
+from warnings import filterwarnings
 
 from .config import Config
 from .store import UnionStore
 from .util.pluggy import register_internal
 
+filterwarnings("ignore", "pkg_resources is deprecated", UserWarning, "ckanapi.version")
+
 log = logging.getLogger(__name__)
 log.setLevel(logging.INFO)
 log.addHandler(logging.StreamHandler(sys.stdout))

From 4dddc60ceb41bd0232661f5566269b6c97ce693b Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Thu, 15 Jan 2026 23:29:42 +0100
Subject: [PATCH 02/17] Improve .util.ckan.Package

- Convert "resources" collection to instances of Resource.
  - Adjust ckan_package_to_mdr() to match.
- Add portal_url() method.
- Add type hints for known members/attributes.
---
 transport_data/org/ckan.py  | 17 +++++++++++++----
 transport_data/util/ckan.py | 25 ++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/transport_data/org/ckan.py b/transport_data/org/ckan.py
index a636171..8b06fc8 100644
--- a/transport_data/org/ckan.py
+++ b/transport_data/org/ckan.py
@@ -121,7 +121,7 @@ def get_msd() -> "v21.MetadataStructureDefinition":
     return msd
 
 
-def ckan_package_to_mdr(p) -> "v21.MetadataReport":
+def ckan_package_to_mdr(package: Package) -> "v21.MetadataReport":
     """Convert a :class:`.Package` instance to a MetadataReport."""
     from sdmx.model import v21
 
@@ -134,10 +134,19 @@ def ckan_package_to_mdr(p) -> "v21.MetadataReport":
     for mda in msd.report_structure["ALL"]:
         av = ONEAV(value_for=mda)
         if mda.id == "JSON":
-            av.value = repr(p.asdict())
+            # All JSON data
+            av.value = repr(package.asdict())
         else:
-            value = getattr(p, mda.id)
-            av.value = value if isinstance(value, str) else repr(value)
+            value = getattr(package, mda.id)
+            match value:
+                case str():
+                    av.value = value
+                case list() if len(value) and isinstance(value[0], ModelProxy):
+                    # Restore ModelProxy contents to JSON-like instead of short __repr__
+                    av.value = repr([obj.asdict() for obj in value])
+                case _:
+                    av.value = repr(value)
+
         mdr.metadata.append(av)
 
     return mdr
diff --git a/transport_data/util/ckan.py b/transport_data/util/ckan.py
index bb919f5..86697f4 100644
--- a/transport_data/util/ckan.py
+++ b/transport_data/util/ckan.py
@@ -94,7 +94,7 @@ def get_item(self, name: str, index: int | None = None):
         data = self.__dict__[name][index]
         cls = get_class(name)
         assert cls
-        return cls(data)
+        return data if isinstance(data, cls) else cls(data)
 
     def _process_collections(self) -> None:
         """Convert the :attr:`_collections` to the designated types."""
@@ -174,6 +174,29 @@ class Package(ModelProxy):
     <https://github.com/ckan/ckan/blob/master/ckan/model/package.py>`_.
     """
 
+    _collections = {
+        "resources": (list, "Resource"),
+    }
+
+    # Type hints
+    name: str
+    organization: dict[str, str]
+    resources: list["Resource"]
+    tdc_category: str
+
+    def portal_url(self) -> str:
+        """Infer the TDC Portal URL for the package.
+
+        The URL is not provided by the API, so we construct it with similar logic to
+        the portal.
+        """
+        return (
+            "https://portal.transport-data.org/@"
+            + self.organization["title"].lower()
+            + "/"
+            + self.name
+        )
+
 
 class Resource(ModelProxy):
     """Proxy for `ckan.model.Resource

From cc145349990e9ec7add9818c931922db4dd0bf91 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Thu, 15 Jan 2026 23:35:51 +0100
Subject: [PATCH 03/17] Improve .util.ckan.Resource

- Add .fetch() method.
- Add type hints for known attributes.
---
 transport_data/util/ckan.py | 62 +++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/transport_data/util/ckan.py b/transport_data/util/ckan.py
index 86697f4..50ab61a 100644
--- a/transport_data/util/ckan.py
+++ b/transport_data/util/ckan.py
@@ -14,6 +14,7 @@ class that provides conveniences used by other code in :mod:`transport_data`.
 from functools import partialmethod
 from importlib.metadata import version
 from itertools import count
+from pathlib import Path
 from typing import TYPE_CHECKING, ClassVar, TypeVar
 from warnings import filterwarnings
 
@@ -203,6 +204,67 @@ class Resource(ModelProxy):
     <https://github.com/ckan/ckan/blob/master/ckan/model/resource.py>`_.
     """
 
+    # Type hints
+    hash: str
+    name: str
+    size: int
+    url: str
+
+    def fetch(self, max_size: int = 10_000_000) -> Path:
+        """Fetch the resource file and cache it locally.
+
+        Parameters
+        ----------
+        max_size
+            Maximum size of file to download.
+
+        Raises
+        ------
+        AssertionError
+            if the size of the file is equal to or greater than `max_size`.
+        """
+        from hashlib import file_digest
+
+        import requests
+
+        from transport_data import CONFIG
+
+        assert (self.size or 0) <= max_size, (
+            f"File size {self.size} >= maxiumum {max_size} B"
+        )
+
+        # Identify the target local path. Use directory hierarchy to avoid directories
+        # with many files.
+        assert self.id is not None
+        target = CONFIG.cache_path.joinpath(
+            "resource", self.id[0], self.id[:2], self.id, self.name
+        )
+
+        # Ensure the target directory exists
+        target.parent.mkdir(parents=True, exist_ok=True)
+
+        file_hash = ""
+        try:
+            # Check existence and hash of local file
+            with open(target, "rb") as fd:
+                file_hash = file_digest(fd, "md5").hexdigest()
+
+            # Allow that self.hash is empty; don't force download in this case
+            assert self.hash in ("", file_hash)
+        except (AssertionError, FileNotFoundError) as e:
+            # Hash does not match or file does not exist
+            if isinstance(e, AssertionError):
+                print(
+                    f"Hash {file_hash} of {target} does not match expected {self.hash};"
+                    " will re-download"
+                )
+            response = requests.get(self.url, stream=True)
+            with open(target, "wb") as fd:
+                for chunk in response.iter_content():
+                    fd.write(chunk)
+
+        return target
+
 
 class Tag(ModelProxy):
     """Proxy for the CKAN 'Tag' model.

From e4d394ce9bbb6720b64de8cba1c551dcd490382c Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Thu, 15 Jan 2026 23:36:40 +0100
Subject: [PATCH 04/17] Add `tdc check-record` CLI command

---
 transport_data/cli/__init__.py | 85 ++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/transport_data/cli/__init__.py b/transport_data/cli/__init__.py
index f2e6dad..c0898ec 100644
--- a/transport_data/cli/__init__.py
+++ b/transport_data/cli/__init__.py
@@ -1,12 +1,19 @@
 """Command-line interface."""
 
+from collections import defaultdict
+from collections.abc import MutableMapping
+from dataclasses import dataclass
 from importlib import import_module
 from pathlib import Path
+from typing import TYPE_CHECKING, Any
 
 import click
 
 from transport_data import CONFIG  # noqa: F401
 
+if TYPE_CHECKING:
+    from transport_data.util.ckan import Package
+
 
 @click.group("tdc")
 def main():
@@ -171,3 +178,81 @@ def check(structure_urn: str, path: Path, sheets, verbose, **options):  # noqa:
                 print(sdmx.to_pandas(ds))
             else:
                 print(sdmx.to_pandas(ds).to_string())
+
+
+@main.command()
+@click.argument("id")
+def check_record(id: str) -> None:
+    """Check record NAME on the TDC."""
+    # TODO Use .org.ckan.instance_option
+    from transport_data.org.ckan import PROD
+
+    # Retrieve the record, converted to an instance of Package
+    package = PROD.package_show(id)
+
+    # Print general package information
+    print(
+        f"{package!r}",
+        package.portal_url(),
+        f"Title: {package.title!r}",
+        f"Category: {package.tdc_category}",
+        sep="\n- ",
+    )
+
+    check_package0(package)
+
+
+SUFFIXES = {
+    "data": {".xlsx", ".csv"},
+}
+
+
+def check_package0(package: "Package") -> None:
+    """Print some checks about a `package`."""
+    # Convert resource file names to Path instances; count suffixes
+    files = []
+    suffix_count: MutableMapping[str, int] = defaultdict(lambda: 0)
+    for resource in package.resources:
+        path = Path(resource.name)
+        files.append(path)
+        suffix_count[path.suffix.lower()] += 1
+
+    @dataclass
+    class Check:
+        label: str
+        value: Any
+
+        def __str__(self) -> str:
+            return f"{self.label}: {self.value}"
+
+    c0 = Check(
+        "Number of files by extension",
+        ", ".join(f"{c} {s}" for s, c in sorted(suffix_count.items())),
+    )
+    c1 = Check(
+        "Number of data files",
+        sum(c for s, c in suffix_count.items() if s in SUFFIXES["data"]),
+    )
+    c2 = Check("Number of possible SDMX-CSV files", suffix_count[".csv"])
+    checks = [c0, c1, c2]
+
+    lines = [""] + [f"- {check}" for check in checks] + [""]
+
+    lines.append("Criteria for a TDC Formatted record:")
+    c3 = Check("At least one file in CSV format", c2.value >= 1)
+    c4 = Check(
+        "Correct category assigned",
+        package.tdc_category in {"tdc_formatted", "tdc_harmonized"},
+    )
+    c5 = Check("CSV file(s) are in SDMX-CSV format (not implemented yet)", True)
+    c6 = Check("Overall", "YES" if (c3.value and c4.value and c5.value) else "NO")
+
+    lines.extend(f"- {check}" for check in (c3, c4, c5, c6))
+
+    lines.extend(["", "Criteria for a TDC Harmonized record—all of the above, plus:"])
+    c7 = Check("Correct category assigned", package.tdc_category == "tdc_harmonized")
+    c8 = Check("Overall", "YES" if c7.value else "NO")
+
+    lines.extend(f"- {check}" for check in (c7, c8))
+
+    print(*lines, sep="\n")

From cce0e2240535861c3a95964cd44f9224de54b628 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 16 Jan 2026 11:42:06 +0100
Subject: [PATCH 05/17] Add cli_modules() plugin hook

- Use in existing modules.
---
 transport_data/ipcc/__init__.py  |  6 +++---
 transport_data/other/__init__.py |  4 ++--
 transport_data/util/hooks.py     | 12 +++++++++++-
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/transport_data/ipcc/__init__.py b/transport_data/ipcc/__init__.py
index 2183019..b88ce6c 100644
--- a/transport_data/ipcc/__init__.py
+++ b/transport_data/ipcc/__init__.py
@@ -1,9 +1,9 @@
 """Intergovernmental Panel on Climate Change metadata provider."""
 
-from transport_data.util.pluggy import hookimpl
+from transport_data import hook
 
 
-@hookimpl
+@hook
 def get_agencies():
     """Return the IPCC :class:`.Agency`."""
     from sdmx.model import common
@@ -16,7 +16,7 @@ def get_agencies():
     return (a,)
 
 
-@hookimpl
+@hook
 def provides():
     return (
         "Codelist=TDCI:CL_IPCC_2006_V2_T3.1.1",
diff --git a/transport_data/other/__init__.py b/transport_data/other/__init__.py
index ba634ea..b3a8f46 100644
--- a/transport_data/other/__init__.py
+++ b/transport_data/other/__init__.py
@@ -1,9 +1,9 @@
 """Other data providers."""
 
-from transport_data.util.pluggy import hookimpl
+from transport_data import hook
 
 
-@hookimpl
+@hook
 def get_agencies():
     from sdmx.model.common import Agency, Contact
 
diff --git a/transport_data/util/hooks.py b/transport_data/util/hooks.py
index e23aed6..a9a883f 100644
--- a/transport_data/util/hooks.py
+++ b/transport_data/util/hooks.py
@@ -7,9 +7,19 @@
 if TYPE_CHECKING:
     import sdmx.model.v21
 
+
 hookspec = pluggy.HookspecMarker("transport_data")
 
 
+@hookspec
+def cli_modules() -> str | Iterable[str]:
+    """Return the fully-qualified name(s) of (a) module(s) with :mod:`click` commands.
+
+    The module(s) **must** contain a :class:`click.Group` or command named :py:`main`.
+    """
+    raise NotImplementedError
+
+
 @hookspec
 def get_agencies() -> Iterable["sdmx.model.v21.Agency"]:
     """Return :class:`sdmx.model.common.Agency` identifying (meta)data provider(s).
@@ -21,5 +31,5 @@ def get_agencies() -> Iterable["sdmx.model.v21.Agency"]:
 
 @hookspec
 def provides() -> Iterable[str]:
-    """Return the URNs of SDMX artefacts available from a module."""
+    """Return 0 or more URNs of SDMX artefacts available from a module."""
     raise NotImplementedError

From 6e241fcdfe16491d9c4f3f0b52cb1531b2d6cd0b Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 16 Jan 2026 11:42:31 +0100
Subject: [PATCH 06/17] Add transport_data.hook as a top-level item

---
 transport_data/__init__.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/transport_data/__init__.py b/transport_data/__init__.py
index 34853c1..47300b9 100644
--- a/transport_data/__init__.py
+++ b/transport_data/__init__.py
@@ -4,8 +4,15 @@
 
 from .config import Config
 from .store import UnionStore
+from .util.pluggy import hookimpl as hook
 from .util.pluggy import register_internal
 
+__all__ = [
+    "CONFIG",
+    "STORE",
+    "hook",
+]
+
 filterwarnings("ignore", "pkg_resources is deprecated", UserWarning, "ckanapi.version")
 
 log = logging.getLogger(__name__)

From 02b63808c03ee473241ad241d7efef02ad822c03 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 16 Jan 2026 11:46:10 +0100
Subject: [PATCH 07/17] Add cli_modules() hooks to existing modules

- Adjust test.
---
 transport_data/ato/__init__.py           | 11 ++++++++---
 transport_data/estat/__init__.py         |  8 +++++++-
 transport_data/iamc/__init__.py          | 11 ++++++++---
 transport_data/iso/__init__.py           |  9 +++++++--
 transport_data/itdp/__init__.py          |  9 +++++++--
 transport_data/jrc/__init__.py           | 12 ++++++++----
 transport_data/oica/__init__.py          | 11 ++++++++---
 transport_data/org/__init__.py           | 12 +++++++++---
 transport_data/tests/util/test_pluggy.py |  4 ++--
 9 files changed, 64 insertions(+), 23 deletions(-)

diff --git a/transport_data/ato/__init__.py b/transport_data/ato/__init__.py
index 2f0cb07..620778c 100644
--- a/transport_data/ato/__init__.py
+++ b/transport_data/ato/__init__.py
@@ -12,7 +12,7 @@
 import sdmx.model.v21 as m
 from sdmx.model import common, v21
 
-from transport_data.util.pluggy import hookimpl
+from transport_data import hook
 from transport_data.util.pooch import Pooch
 from transport_data.util.sdmx import anno_generated
 
@@ -373,7 +373,12 @@ def format_data_provider(value: str) -> str:
         return value + "—republished by ATO"
 
 
-@hookimpl
+@hook
+def cli_modules():
+    return f"{__name__}.cli"
+
+
+@hook
 def get_agencies():
     a = m.Agency(
         id="ATO",
@@ -392,7 +397,7 @@ def get_agencies():
     return (a,)
 
 
-@hookimpl
+@hook
 def provides():
     return (
         "Codelist=TDCI:CL_ATO_ECONOMY",
diff --git a/transport_data/estat/__init__.py b/transport_data/estat/__init__.py
index 0b87e0a..10747c2 100644
--- a/transport_data/estat/__init__.py
+++ b/transport_data/estat/__init__.py
@@ -12,7 +12,13 @@
 import click
 import sdmx
 
-from transport_data import STORE
+from transport_data import STORE, hook
+
+
+@hook
+def cli_modules():
+    return __name__
+
 
 # General functions
 
diff --git a/transport_data/iamc/__init__.py b/transport_data/iamc/__init__.py
index b0776b3..eb4708a 100644
--- a/transport_data/iamc/__init__.py
+++ b/transport_data/iamc/__init__.py
@@ -10,12 +10,17 @@
 import sdmx.model.v21 as m
 from sdmx.message import StructureMessage
 
-from transport_data.util.pluggy import hookimpl
+from transport_data import hook
 
 log = logging.getLogger(__name__)
 
 
-@hookimpl
+@hook
+def cli_modules():
+    return f"{__name__}.cli"
+
+
+@hook
 def get_agencies():
     a = m.Agency(
         id="IAMC",
@@ -25,7 +30,7 @@ def get_agencies():
     return (a,)
 
 
-@hookimpl
+@hook
 def provides():
     return ("ConceptScheme=TDCI:CS_IAMC",)
 
diff --git a/transport_data/iso/__init__.py b/transport_data/iso/__init__.py
index f2ab05c..3ea0bda 100644
--- a/transport_data/iso/__init__.py
+++ b/transport_data/iso/__init__.py
@@ -6,7 +6,7 @@
 
 from sdmx.model import common, v21
 
-from transport_data.util.pluggy import hookimpl
+from transport_data import hook
 from transport_data.util.pycountry import LOCALIZABLE, get_database, load_translations
 
 if TYPE_CHECKING:
@@ -16,7 +16,12 @@
 log = logging.getLogger(__name__)
 
 
-@hookimpl
+@hook
+def cli_modules():
+    return f"{__name__}.cli"
+
+
+@hook
 def get_agencies():
     """Return the ``ISO`` :class:`~.sdmx.model.common.Agency`."""
     a = common.Agency(
diff --git a/transport_data/itdp/__init__.py b/transport_data/itdp/__init__.py
index 6c9f0f5..a7258e5 100644
--- a/transport_data/itdp/__init__.py
+++ b/transport_data/itdp/__init__.py
@@ -1,9 +1,14 @@
 """Institute for Transport & Development Policy (ITDP) provider."""
 
-from transport_data.util.pluggy import hookimpl
+from transport_data import hook
 
 
-@hookimpl
+@hook
+def cli_modules():
+    return f"{__name__}.cli"
+
+
+@hook
 def get_agencies():
     from sdmx.model import common
 
diff --git a/transport_data/jrc/__init__.py b/transport_data/jrc/__init__.py
index defb394..8375ee9 100644
--- a/transport_data/jrc/__init__.py
+++ b/transport_data/jrc/__init__.py
@@ -22,13 +22,17 @@
 import pandas as pd
 import sdmx.model.v21 as m
 
-from transport_data import STORE
-from transport_data.util.pluggy import hookimpl
+from transport_data import STORE, hook
 from transport_data.util.pooch import Pooch
 from transport_data.util.sdmx import anno_generated
 
 
-@hookimpl
+@hook
+def cli_modules():
+    return f"{__name__}.cli"
+
+
+@hook
 def get_agencies():
     """Return information about the agency providing the data set.
 
@@ -50,7 +54,7 @@ def get_agencies():
     return (a,)
 
 
-@hookimpl
+@hook
 def provides():
     return ("ConceptScheme=TDCI:CS_JRC_MEASURE",)
 
diff --git a/transport_data/oica/__init__.py b/transport_data/oica/__init__.py
index 498cf87..6d95ed6 100644
--- a/transport_data/oica/__init__.py
+++ b/transport_data/oica/__init__.py
@@ -20,7 +20,7 @@
 
 import pandas as pd
 
-from transport_data.util.pluggy import hookimpl
+from transport_data import hook
 from transport_data.util.pooch import Pooch
 
 if TYPE_CHECKING:
@@ -329,7 +329,12 @@ def _make_code(value: str):
     return id_for_name
 
 
-@hookimpl
+@hook
+def cli_modules():
+    return f"{__name__}.cli"
+
+
+@hook
 def get_agencies():
     """Return the OICA Agency."""
     from sdmx.model import v21
@@ -342,7 +347,7 @@ def get_agencies():
     return (a,)
 
 
-@hookimpl
+@hook
 def provides():
     return (
         "Codelist=TDCI:CL_OICA_GEO",
diff --git a/transport_data/org/__init__.py b/transport_data/org/__init__.py
index 4a5266e..94af42b 100644
--- a/transport_data/org/__init__.py
+++ b/transport_data/org/__init__.py
@@ -6,13 +6,19 @@
 
 import sdmx.model.v21 as m
 
-from transport_data.util.pluggy import hookimpl, pm
+from transport_data import hook
+from transport_data.util.pluggy import pm
 
 if TYPE_CHECKING:
     import sdmx.model.v21
 
 
-@hookimpl
+@hook
+def cli_modules():
+    return f"{__name__}.cli"
+
+
+@hook
 def get_agencies() -> "sdmx.model.v21.Agency":
     """Return agencies and organizations including and subsidiary to TDCI itself."""
     # Agency
@@ -48,7 +54,7 @@ def get_agencies() -> "sdmx.model.v21.Agency":
     return a1, a2
 
 
-@hookimpl
+@hook
 def provides():
     return ("AgencyScheme=TDCI:TDCI",)
 
diff --git a/transport_data/tests/util/test_pluggy.py b/transport_data/tests/util/test_pluggy.py
index b332d0b..6820ee7 100644
--- a/transport_data/tests/util/test_pluggy.py
+++ b/transport_data/tests/util/test_pluggy.py
@@ -1,4 +1,4 @@
-def test_plugin_manager():
+def test_plugin_manager() -> None:
     from transport_data.util.pluggy import pm
 
-    assert 9 == len(pm.list_name_plugin())
+    assert 10 == len(pm.list_name_plugin())

From 666980ea242f40136e99cef26ff5c3eab689c68e Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 16 Jan 2026 11:47:37 +0100
Subject: [PATCH 08/17] Add CLI commands from cli_modules() hooks

- Reduce MODULES_WITH_CLI to internal/non-provider modules.
---
 transport_data/__init__.py     |  1 +
 transport_data/cli/__init__.py | 26 +++++++++-----------------
 2 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/transport_data/__init__.py b/transport_data/__init__.py
index 47300b9..1dd32f4 100644
--- a/transport_data/__init__.py
+++ b/transport_data/__init__.py
@@ -28,6 +28,7 @@
 # Register plugin hooks
 register_internal(
     "ato",
+    "estat",
     "iamc",
     "ipcc",
     "iso",
diff --git a/transport_data/cli/__init__.py b/transport_data/cli/__init__.py
index c0898ec..b40804d 100644
--- a/transport_data/cli/__init__.py
+++ b/transport_data/cli/__init__.py
@@ -4,12 +4,14 @@
 from collections.abc import MutableMapping
 from dataclasses import dataclass
 from importlib import import_module
+from itertools import chain
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
 import click
 
 from transport_data import CONFIG  # noqa: F401
+from transport_data.util.pluggy import pm
 
 if TYPE_CHECKING:
     from transport_data.util.ckan import Package
@@ -23,27 +25,17 @@ def main():
 #: List of (sub)modules that define CLI (sub)commands. Each should contain a
 #: @click.command() named "main".
 MODULES_WITH_CLI = [
-    "ato.cli",
-    "config",
-    "cli.interactive",
-    "estat",
-    "iamc.cli",
-    "iso.cli",
-    "itdp.cli",
-    "jrc.cli",
-    "oica.cli",
-    "org.cli",
-    "org.ckan",
-    "proto.cli",
-    "store",
-    "testing.cli",
+    "transport_data.config",
+    "transport_data.cli.interactive",
+    "transport_data.org.ckan",
+    "transport_data.proto.cli",
+    "transport_data.store",
+    "transport_data.testing.cli",
 ]
 
-
 # Add commands from each module that defines them
-for name in MODULES_WITH_CLI:
+for full_name in chain(MODULES_WITH_CLI, pm.hook.cli_modules()):
     try:
-        full_name = f"transport_data.{name}"
         module = import_module(full_name)
     except ImportError as e:
         print(f"{full_name} commands not available: {e.args[0]}")

From 83d996199e49a1535fde51ffcf7d320a8de1fa98 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 16 Jan 2026 14:13:18 +0100
Subject: [PATCH 09/17] Separate check- CLI commands into distinct modules

- Adjust tests.
- Type hint tests.
---
 transport_data/cli/__init__.py         | 217 +------------------------
 transport_data/cli/check_file.py       | 134 +++++++++++++++
 transport_data/cli/check_record.py     |  88 ++++++++++
 transport_data/testing/__init__.py     |   6 +-
 transport_data/tests/test_cli.py       |  36 ++--
 transport_data/tests/util/test_ckan.py |   2 +-
 6 files changed, 252 insertions(+), 231 deletions(-)
 create mode 100644 transport_data/cli/check_file.py
 create mode 100644 transport_data/cli/check_record.py

diff --git a/transport_data/cli/__init__.py b/transport_data/cli/__init__.py
index b40804d..4caf8a6 100644
--- a/transport_data/cli/__init__.py
+++ b/transport_data/cli/__init__.py
@@ -1,21 +1,13 @@
 """Command-line interface."""
 
-from collections import defaultdict
-from collections.abc import MutableMapping
-from dataclasses import dataclass
 from importlib import import_module
 from itertools import chain
-from pathlib import Path
-from typing import TYPE_CHECKING, Any
 
 import click
 
 from transport_data import CONFIG  # noqa: F401
 from transport_data.util.pluggy import pm
 
-if TYPE_CHECKING:
-    from transport_data.util.ckan import Package
-
 
 @click.group("tdc")
 def main():
@@ -27,6 +19,8 @@ def main():
 MODULES_WITH_CLI = [
     "transport_data.config",
     "transport_data.cli.interactive",
+    "transport_data.cli.check_file",
+    "transport_data.cli.check_record",
     "transport_data.org.ckan",
     "transport_data.proto.cli",
     "transport_data.store",
@@ -41,210 +35,3 @@ def main():
         print(f"{full_name} commands not available: {e.args[0]}")
     else:
         main.add_command(getattr(module, "main"))
-
-
-@main.command()
-@click.argument("structure_urn", metavar="URN")
-@click.argument(
-    "path", metavar="FILE", type=click.Path(exists=True, dir_okay=False, path_type=Path)
-)
-@click.option("--sheets", help="Sheet(s) in .xlsx FILE to check.")
-@click.option("-v", "--verbose", count=True, help="Increase verbosity.")
-@click.option("--structure", help="Value for STRUCTURE field.")
-@click.option("--structure-id", "structure_id", help="Value for STRUCTURE_ID field.")
-@click.option("--action", default="I", help="Value for ACTION field.")
-def check(structure_urn: str, path: Path, sheets, verbose, **options):  # noqa: C901
-    """Check that FILE can be read as SDMX-CSV.
-
-    URN is the shortened SDMX URN of a data flow or data structure definition that
-    describes the data in FILE, for example "Dataflow=PROVIDER:EXAMPLE(1.2.3)" (the
-    version is not required). This artefact must already be present in the local store.
-
-    FILE may have a ".csv" or ".xlsx" suffix. In the latter case, it is converted to a
-    temporary set of CSV files. If --sheets are given, only these worksheets are
-    converted and checked.
-
-    If not given, --structure and --structure-id are inferred from URN.
-    """
-    from traceback import format_exception
-
-    import sdmx
-    import sdmx.urn
-    from sdmx.model import common
-
-    from transport_data import STORE
-    from transport_data.util.sdmx import read_csv
-
-    # Pieces of any error message
-    message = []
-
-    # Handle `structure_urn`: retrieve a data structure that describes the data
-    try:
-        structure = STORE.get(structure_urn)
-    except Exception:
-        message.append(f"Structure {structure_urn!r} could not be loaded")
-        structure = structure_cls = structure_id = None
-    else:
-        structure_cls = type(structure).__name__.lower().replace("definition", "")
-        structure_id = sdmx.urn.shorten(structure.urn).split("=")[-1]
-
-    if isinstance(structure, common.BaseDataflow):
-        # Also retrieve the data structure definition
-        STORE.resolve(structure, "structure")
-        assert len(structure.structure.dimensions)
-
-    # Construct keyword arguments for CSVAdapter
-    # TODO Check if this works for full SDMX-CSV
-    adapt = {
-        "structure": options.pop("structure") or structure_cls,
-        "structure_id": options.pop("structure_id") or structure_id,
-        "action": options.pop("action"),
-    }
-
-    # Handle `path`; construct a sequence of (label, path) of CSV files to be processed
-    label_path = []
-
-    if path.suffix == ".csv":
-        label_path.append((f"File: {path}", path))
-    elif path.suffix == ".xlsx":
-        # Explode an Excel file into one or more CSV files in a temporary directory
-        import pandas as pd
-        from platformdirs import user_cache_path
-
-        # Create a cache directory
-        cache_dir = user_cache_path("transport-data").joinpath("check")
-        cache_dir.mkdir(parents=True, exist_ok=True)
-
-        # Explode Excel file into one CSV file per sheet
-        ef = pd.ExcelFile(path)
-        _sheets = set(sheets.split(",")) if sheets else set(ef.sheet_names)
-        for sheet_name in filter(_sheets.__contains__, ef.sheet_names):
-            # Construct a temporary path
-            label_path.append(
-                (
-                    f"File: {path}\nSheet: {sheet_name}",
-                    cache_dir.joinpath(f"{path.stem}_xlsx_{sheet_name}.csv"),
-                )
-            )
-            # Read the sheet from the ExcelFile and write to a CSV file
-            pd.read_excel(ef, sheet_name).to_csv(label_path[-1][1], index=False)
-        ef.close()
-    else:
-        raise click.UsageError(f"Unsupported file extension: {path.suffix!r}")
-
-    # Process `label_path`
-    for label, p in label_path:
-        print(f"\n{label}")
-
-        # Read the file into an SDMX data message
-        try:
-            dm = read_csv(p, structure, adapt)
-        except Exception as e:
-            message.append(f"read failed with\n{type(e).__name__}: {' '.join(e.args)}")
-
-            if len(e.args) and "line 1" in e.args[0]:
-                message.append(
-                    "Hint: try giving --structure= or --structure-id argument(s) to "
-                    "adapt to SDMX-CSV."
-                )
-            elif structure is None:
-                pass
-            else:  # pragma: no cover
-                message.append("\n".join(format_exception(e)))
-
-            print("")
-            raise click.ClickException("\n\n".join(message))
-
-        # Show the contents of the data message
-        dfd_urn = sdmx.urn.shorten(sdmx.urn.make(dm.dataflow))
-        print(f"\n{len(dm.data)} data set(s) in: {dfd_urn!s}")
-
-        # Show information about each data set
-        for i, ds in enumerate(dm.data):
-            print(f"\nData set {i}: action={ds.action}")
-
-            # Show the data set contents or summary, according to verbosity
-            if verbose == 0:
-                print(f"{len(ds)} observations")
-            elif verbose == 1:
-                print(sdmx.to_pandas(ds))
-            else:
-                print(sdmx.to_pandas(ds).to_string())
-
-
-@main.command()
-@click.argument("id")
-def check_record(id: str) -> None:
-    """Check record NAME on the TDC."""
-    # TODO Use .org.ckan.instance_option
-    from transport_data.org.ckan import PROD
-
-    # Retrieve the record, converted to an instance of Package
-    package = PROD.package_show(id)
-
-    # Print general package information
-    print(
-        f"{package!r}",
-        package.portal_url(),
-        f"Title: {package.title!r}",
-        f"Category: {package.tdc_category}",
-        sep="\n- ",
-    )
-
-    check_package0(package)
-
-
-SUFFIXES = {
-    "data": {".xlsx", ".csv"},
-}
-
-
-def check_package0(package: "Package") -> None:
-    """Print some checks about a `package`."""
-    # Convert resource file names to Path instances; count suffixes
-    files = []
-    suffix_count: MutableMapping[str, int] = defaultdict(lambda: 0)
-    for resource in package.resources:
-        path = Path(resource.name)
-        files.append(path)
-        suffix_count[path.suffix.lower()] += 1
-
-    @dataclass
-    class Check:
-        label: str
-        value: Any
-
-        def __str__(self) -> str:
-            return f"{self.label}: {self.value}"
-
-    c0 = Check(
-        "Number of files by extension",
-        ", ".join(f"{c} {s}" for s, c in sorted(suffix_count.items())),
-    )
-    c1 = Check(
-        "Number of data files",
-        sum(c for s, c in suffix_count.items() if s in SUFFIXES["data"]),
-    )
-    c2 = Check("Number of possible SDMX-CSV files", suffix_count[".csv"])
-    checks = [c0, c1, c2]
-
-    lines = [""] + [f"- {check}" for check in checks] + [""]
-
-    lines.append("Criteria for a TDC Formatted record:")
-    c3 = Check("At least one file in CSV format", c2.value >= 1)
-    c4 = Check(
-        "Correct category assigned",
-        package.tdc_category in {"tdc_formatted", "tdc_harmonized"},
-    )
-    c5 = Check("CSV file(s) are in SDMX-CSV format (not implemented yet)", True)
-    c6 = Check("Overall", "YES" if (c3.value and c4.value and c5.value) else "NO")
-
-    lines.extend(f"- {check}" for check in (c3, c4, c5, c6))
-
-    lines.extend(["", "Criteria for a TDC Harmonized record—all of the above, plus:"])
-    c7 = Check("Correct category assigned", package.tdc_category == "tdc_harmonized")
-    c8 = Check("Overall", "YES" if c7.value else "NO")
-
-    lines.extend(f"- {check}" for check in (c7, c8))
-
-    print(*lines, sep="\n")
diff --git a/transport_data/cli/check_file.py b/transport_data/cli/check_file.py
new file mode 100644
index 0000000..5ebd89d
--- /dev/null
+++ b/transport_data/cli/check_file.py
@@ -0,0 +1,134 @@
+from pathlib import Path
+
+import click
+
+from transport_data import CONFIG  # noqa: F401
+
+
+@click.command("check-file")
+@click.argument("structure_urn", metavar="URN")
+@click.argument(
+    "path", metavar="FILE", type=click.Path(exists=True, dir_okay=False, path_type=Path)
+)
+@click.option("--sheets", help="Sheet(s) in .xlsx FILE to check.")
+@click.option("-v", "--verbose", count=True, help="Increase verbosity.")
+@click.option("--structure", help="Value for STRUCTURE field.")
+@click.option("--structure-id", "structure_id", help="Value for STRUCTURE_ID field.")
+@click.option("--action", default="I", help="Value for ACTION field.")
+def main(structure_urn: str, path: Path, sheets, verbose, **options):  # noqa: C901
+    """Check that FILE can be read as SDMX-CSV.
+
+    URN is the shortened SDMX URN of a data flow or data structure definition that
+    describes the data in FILE, for example "Dataflow=PROVIDER:EXAMPLE(1.2.3)" (the
+    version is not required). This artefact must already be present in the local store.
+
+    FILE may have a ".csv" or ".xlsx" suffix. In the latter case, it is converted to a
+    temporary set of CSV files. If --sheets are given, only these worksheets are
+    converted and checked.
+
+    If not given, --structure and --structure-id are inferred from URN.
+    """
+    from traceback import format_exception
+
+    import sdmx
+    import sdmx.urn
+    from sdmx.model import common
+
+    from transport_data import STORE
+    from transport_data.util.sdmx import read_csv
+
+    # Pieces of any error message
+    message = []
+
+    # Handle `structure_urn`: retrieve a data structure that describes the data
+    try:
+        structure = STORE.get(structure_urn)
+    except Exception:
+        message.append(f"Structure {structure_urn!r} could not be loaded")
+        structure = structure_cls = structure_id = None
+    else:
+        structure_cls = type(structure).__name__.lower().replace("definition", "")
+        structure_id = sdmx.urn.shorten(structure.urn).split("=")[-1]
+
+    if isinstance(structure, common.BaseDataflow):
+        # Also retrieve the data structure definition
+        STORE.resolve(structure, "structure")
+        assert len(structure.structure.dimensions)
+
+    # Construct keyword arguments for CSVAdapter
+    # TODO Check if this works for full SDMX-CSV
+    adapt = {
+        "structure": options.pop("structure") or structure_cls,
+        "structure_id": options.pop("structure_id") or structure_id,
+        "action": options.pop("action"),
+    }
+
+    # Handle `path`; construct a sequence of (label, path) of CSV files to be processed
+    label_path = []
+
+    if path.suffix == ".csv":
+        label_path.append((f"File: {path}", path))
+    elif path.suffix == ".xlsx":
+        # Explode an Excel file into one or more CSV files in a temporary directory
+        import pandas as pd
+        from platformdirs import user_cache_path
+
+        # Create a cache directory
+        cache_dir = user_cache_path("transport-data").joinpath("check")
+        cache_dir.mkdir(parents=True, exist_ok=True)
+
+        # Explode Excel file into one CSV file per sheet
+        ef = pd.ExcelFile(path)
+        _sheets = set(sheets.split(",")) if sheets else set(ef.sheet_names)
+        for sheet_name in filter(_sheets.__contains__, ef.sheet_names):
+            # Construct a temporary path
+            label_path.append(
+                (
+                    f"File: {path}\nSheet: {sheet_name}",
+                    cache_dir.joinpath(f"{path.stem}_xlsx_{sheet_name}.csv"),
+                )
+            )
+            # Read the sheet from the ExcelFile and write to a CSV file
+            pd.read_excel(ef, sheet_name).to_csv(label_path[-1][1], index=False)
+        ef.close()
+    else:
+        raise click.UsageError(f"Unsupported file extension: {path.suffix!r}")
+
+    # Process `label_path`
+    for label, p in label_path:
+        print(f"\n{label}")
+
+        # Read the file into an SDMX data message
+        try:
+            dm = read_csv(p, structure, adapt)
+        except Exception as e:
+            message.append(f"read failed with\n{type(e).__name__}: {' '.join(e.args)}")
+
+            if len(e.args) and "line 1" in e.args[0]:
+                message.append(
+                    "Hint: try giving --structure= or --structure-id argument(s) to "
+                    "adapt to SDMX-CSV."
+                )
+            elif structure is None:
+                pass
+            else:  # pragma: no cover
+                message.append("\n".join(format_exception(e)))
+
+            print("")
+            raise click.ClickException("\n\n".join(message))
+
+        # Show the contents of the data message
+        dfd_urn = sdmx.urn.shorten(sdmx.urn.make(dm.dataflow))
+        print(f"\n{len(dm.data)} data set(s) in: {dfd_urn!s}")
+
+        # Show information about each data set
+        for i, ds in enumerate(dm.data):
+            print(f"\nData set {i}: action={ds.action}")
+
+            # Show the data set contents or summary, according to verbosity
+            if verbose == 0:
+                print(f"{len(ds)} observations")
+            elif verbose == 1:
+                print(sdmx.to_pandas(ds))
+            else:
+                print(sdmx.to_pandas(ds).to_string())
diff --git a/transport_data/cli/check_record.py b/transport_data/cli/check_record.py
new file mode 100644
index 0000000..25771e7
--- /dev/null
+++ b/transport_data/cli/check_record.py
@@ -0,0 +1,88 @@
+from collections import defaultdict
+from collections.abc import MutableMapping
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+import click
+
+if TYPE_CHECKING:
+    from transport_data.util.ckan import Package
+
+
+@click.command("check-record")
+@click.argument("id")
+def main(id: str) -> None:
+    """Check record NAME on the TDC."""
+    # TODO Use .org.ckan.instance_option
+    from transport_data.org.ckan import PROD
+
+    # Retrieve the record, converted to an instance of Package
+    package = PROD.package_show(id)
+
+    # Print general package information
+    print(
+        f"{package!r}",
+        package.portal_url(),
+        f"Title: {package.title!r}",
+        f"Category: {package.tdc_category}",
+        sep="\n- ",
+    )
+
+    check_package0(package)
+
+
+SUFFIXES = {
+    "data": {".xlsx", ".csv"},
+}
+
+
+def check_package0(package: "Package") -> None:
+    """Print some checks about a `package`."""
+    # Convert resource file names to Path instances; count suffixes
+    files = []
+    suffix_count: MutableMapping[str, int] = defaultdict(lambda: 0)
+    for resource in package.resources:
+        path = Path(resource.name)
+        files.append(path)
+        suffix_count[path.suffix.lower()] += 1
+
+    @dataclass
+    class Check:
+        label: str
+        value: Any
+
+        def __str__(self) -> str:
+            return f"{self.label}: {self.value}"
+
+    c0 = Check(
+        "Number of files by extension",
+        ", ".join(f"{c} {s}" for s, c in sorted(suffix_count.items())),
+    )
+    c1 = Check(
+        "Number of data files",
+        sum(c for s, c in suffix_count.items() if s in SUFFIXES["data"]),
+    )
+    c2 = Check("Number of possible SDMX-CSV files", suffix_count[".csv"])
+    checks = [c0, c1, c2]
+
+    lines = [""] + [f"- {check}" for check in checks] + [""]
+
+    lines.append("Criteria for a TDC Formatted record:")
+    c3 = Check("At least one file in CSV format", c2.value >= 1)
+    c4 = Check(
+        "Correct category assigned",
+        package.tdc_category in {"tdc_formatted", "tdc_harmonized"},
+    )
+    c5 = Check("CSV file(s) are in SDMX-CSV format (not implemented yet)", True)
+    c6 = Check("Overall", "YES" if (c3.value and c4.value and c5.value) else "NO")
+
+    lines.extend(f"- {check}" for check in (c3, c4, c5, c6))
+
+    lines.extend(["", "Criteria for a TDC Harmonized record—all of the above, plus:"])
+    c7 = Check("Correct category assigned", package.tdc_category == "tdc_harmonized")
+    c8 = Check("Overall", "YES" if c7.value else "NO")
+
+    lines.extend(f"- {check}" for check in (c7, c8))
+
+    print(*lines, sep="\n")
diff --git a/transport_data/testing/__init__.py b/transport_data/testing/__init__.py
index e97b5a1..eab6181 100644
--- a/transport_data/testing/__init__.py
+++ b/transport_data/testing/__init__.py
@@ -3,7 +3,7 @@
 import platform
 import re
 import zipfile
-from collections.abc import Generator, Iterator
+from collections.abc import Iterator
 from typing import TYPE_CHECKING, cast
 
 import click.testing
@@ -170,7 +170,7 @@ def test_data_path() -> Iterator["Traversable"]:
 
 
 @pytest.fixture(scope="session")
-def tmp_config(tmp_path_factory) -> Generator[Config, None, None]:
+def tmp_config(tmp_path_factory) -> Iterator[Config]:
     """A :class:`.Config` instance pointing to a temporary directory."""
     from platformdirs import user_data_path
 
@@ -189,7 +189,7 @@ def tmp_config(tmp_path_factory) -> Generator[Config, None, None]:
 
 
 @pytest.fixture(scope="session")
-def tmp_store(tmp_config) -> Generator[UnionStore, None, None]:
+def tmp_store(tmp_config) -> Iterator[UnionStore]:
     """A :class`.UnionStore` in a temporary directory per :func:`.tmp_config`."""
     result = UnionStore(tmp_config)
 
diff --git a/transport_data/tests/test_cli.py b/transport_data/tests/test_cli.py
index 2d681bd..282cb98 100644
--- a/transport_data/tests/test_cli.py
+++ b/transport_data/tests/test_cli.py
@@ -1,11 +1,13 @@
 import re
+from pathlib import Path
 
 import pytest
 from prompt_toolkit.input.ansi_escape_sequences import REVERSE_ANSI_SEQUENCES
 from prompt_toolkit.keys import Keys
 
 from transport_data.cli.interactive import Editor
-from transport_data.testing import ember_dfd
+from transport_data.store import UnionStore
+from transport_data.testing import CliRunner, ember_dfd
 
 
 @pytest.mark.parametrize(
@@ -14,7 +16,8 @@
         ("--help",),
         ("ato", "--help"),
         ("ato", "fetch", "--all"),
-        ("check", "--help"),
+        ("check-file", "--help"),
+        ("check-record", "--help"),
         ("config", "--help"),
         ("estat", "--help"),
         ("estat", "fetch", "--help"),
@@ -28,7 +31,7 @@
         ("store", "--help"),
     ),
 )
-def test_cli(tdc_cli, command):
+def test_cli(tdc_cli: CliRunner, command: tuple[str, ...]) -> None:
     tdc_cli.invoke(command)
 
 
@@ -41,12 +44,14 @@ def test_cli(tdc_cli, command):
 ]
 
 
-def test_check0(tdc_cli, test_data_path, tmp_store):
+def test_check_file0(
+    tdc_cli: CliRunner, test_data_path: Path, tmp_store: UnionStore
+) -> None:
     """Check a successful read of a .xlsx file."""
     ember_dfd(tmp_store)
 
     path = test_data_path.joinpath("read-csv-2.xlsx")
-    result = tdc_cli.invoke(["check"] + CHECK_ARGS + [str(path)])
+    result = tdc_cli.invoke(["check-file"] + CHECK_ARGS + [str(path)])
 
     # Command runs without error
     assert 0 == result.exit_code, result.output
@@ -76,12 +81,19 @@ def test_check0(tdc_cli, test_data_path, tmp_store):
         (CHECK_ARGS + ["-vv"], 0, ""),  # Show pd.DataFrame full string repr
     ),
 )
-def test_check1(tdc_cli, test_data_path, tmp_store, args, exit_code, text):
+def test_check_file1(
+    tdc_cli: CliRunner,
+    test_data_path: Path,
+    tmp_store: UnionStore,
+    args: list[str],
+    exit_code: int,
+    text: str,
+) -> None:
     """Check various other argument combinations."""
     ember_dfd(tmp_store)
 
     path = test_data_path.joinpath("read-csv-1.csv")
-    result = tdc_cli.invoke(["check"] + args + [str(path)])
+    result = tdc_cli.invoke(["check-file"] + args + [str(path)])
 
     # Command gives the expected exit code
     assert exit_code == result.exit_code, result.output
@@ -91,10 +103,10 @@ def test_check1(tdc_cli, test_data_path, tmp_store, args, exit_code, text):
         assert text in result.output
 
 
-def test_check2(tdc_cli, tmp_path):
+def test_check_file2(tdc_cli: CliRunner, tmp_path: Path) -> None:
     path = tmp_path.joinpath("foo.txt")
     path.touch()
-    result = tdc_cli.invoke(["check", "X", str(path)])
+    result = tdc_cli.invoke(["check-file", "X", str(path)])
 
     assert 2 == result.exit_code, result.output
     assert "Unsupported file extension" in result.output
@@ -149,7 +161,7 @@ def _backspace(text: str) -> str:
 
 
 @pytest.mark.timeout(1)
-def test_edit2(tmp_store) -> None:
+def test_edit2(tmp_store: UnionStore) -> None:
     # CLI runs and accepts the input without error
     run_script(SCRIPT_2)
 
@@ -176,7 +188,7 @@ def test_edit2(tmp_store) -> None:
 
 @pytest.mark.timeout(1)
 @pytest.mark.usefixtures("sdmx_structures")
-def test_edit4(tmp_store) -> None:
+def test_edit4(tmp_store: UnionStore) -> None:
     # CLI runs and accepts the input without error
     run_script(SCRIPT_4)
 
@@ -205,7 +217,7 @@ def test_edit4(tmp_store) -> None:
 
 
 @pytest.mark.timeout(1)
-def test_edit5(tmp_store) -> None:
+def test_edit5(tmp_store: UnionStore) -> None:
     # CLI runs and accepts the input without error
     run_script([REVERSE_ANSI_SEQUENCES[Keys.ControlC]])
     # Nothing was saved because ControlC was given
diff --git a/transport_data/tests/util/test_ckan.py b/transport_data/tests/util/test_ckan.py
index 399c7c3..0cefa0c 100644
--- a/transport_data/tests/util/test_ckan.py
+++ b/transport_data/tests/util/test_ckan.py
@@ -48,7 +48,7 @@ def obj(self, test_data_path) -> Package:
     def test_asdict(self, obj) -> None:
         obj.asdict()
 
-    def test_get_item(self, obj) -> None:
+    def test_get_item(self, obj: Package) -> None:
         g = obj.get_item("groups", 0)
         assert isinstance(g, Group)
 

From 241cad8ff1ced38a6c061d36f8a4ade1110cd17c Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Mon, 26 Jan 2026 15:53:23 +0100
Subject: [PATCH 10/17] Add .util.sdmx.structure_from_csv()

---
 transport_data/util/sdmx.py | 74 +++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/transport_data/util/sdmx.py b/transport_data/util/sdmx.py
index 97b89e8..bcd07a6 100644
--- a/transport_data/util/sdmx.py
+++ b/transport_data/util/sdmx.py
@@ -1,6 +1,7 @@
 """Utilities for :mod:`sdmx`."""
 
 import io
+import logging
 from dataclasses import fields
 from datetime import datetime
 from importlib.metadata import version
@@ -33,6 +34,9 @@ class MAKeywords(VAKeywords):
         maintainer: sdmx.model.common.Agency | None
 
 
+log = logging.getLogger(__name__)
+
+
 class CSVAdapter(io.RawIOBase):
     """Adapt CSV content from `path` into SDMX-CSV.
 
@@ -195,6 +199,76 @@ def read_csv(
     )
 
 
+def structure_from_csv(
+    path: "pathlib.Path",
+) -> tuple["sdmx.model.v30.Dataflow", dict]:
+    """Infer a data flow and arguments for :func:`.read_csv` from `path`.
+
+    Returns
+    -------
+    tuple
+        with 2 elements:
+
+        1. a :class:`sdmx.model.v30.Dataflow`.
+        2. :class:`dict`, a value for the :py:`adapt` argument of :func:`read_csv`.
+    """
+
+    import csv
+
+    from sdmx.model import v30
+
+    from transport_data.org import get_agencyscheme
+
+    # Parse the first line of the file as CSV
+    with open(path, "r") as f:
+        reader = csv.reader(f)
+        row = next(reader)
+
+    dsd = v30.DataStructureDefinition(
+        id="DS_INFERRED",
+        description=f"Inferred from the contents of {path}",
+        maintainer=get_agencyscheme()["TDCI"],
+    )
+    adapt = dict()
+
+    for column, default in (
+        ("STRUCTURE", "datastructure"),
+        ("STRUCTURE_ID", dsd.id),
+        ("ACTION", "I"),
+    ):
+        try:
+            row.remove(column)
+        except ValueError:
+            adapt[column.lower()] = default
+
+    # Assume the measure ID "OBS_VALUE"
+    index_obs_value = row.index("OBS_VALUE")
+    dsd.measures.getdefault(id="OBS_VALUE")
+
+    # Preceding columns are dimensions
+    for dim_id in row[:index_obs_value]:
+        dsd.dimensions.getdefault(id=dim_id)
+
+    # Following columns are attributes
+    for attr_id in row[index_obs_value + 1 :]:
+        dsd.attributes.getdefault(id=attr_id)
+
+    log.info(
+        f"Inferred structure {dsd} with {len(dsd.dimensions)} dimension(s): "
+        + " ".join(d.id for d in dsd.dimensions)
+    )
+
+    # Construct a dataflow definition matching `dsd`
+    dfd = v30.Dataflow(
+        id="DF_INFERRED",
+        description=dsd.description,
+        maintainer=dsd.maintainer,
+        structure=dsd,
+    )
+
+    return dfd, adapt
+
+
 def fields_to_mda(
     cls: type,
     rs: "sdmx.model.v21.ReportStructure",

From 781a450745a1c89695b014017d4127972426df4b Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Sun, 1 Feb 2026 15:26:01 +0100
Subject: [PATCH 11/17] Adjust oica.update_registry() per CI failures

- Use sub-paths from registry in .is_available() call.
- Handle ConnectionError/HTTPSConnectionPool max retries exceeded.
  This may be caused by repeated queries to incorrect URLs.
---
 transport_data/oica/__init__.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/transport_data/oica/__init__.py b/transport_data/oica/__init__.py
index 6d95ed6..6912b73 100644
--- a/transport_data/oica/__init__.py
+++ b/transport_data/oica/__init__.py
@@ -19,6 +19,7 @@
 from typing import TYPE_CHECKING
 
 import pandas as pd
+from requests.exceptions import ConnectionError
 
 from transport_data import hook
 from transport_data.util.pooch import Pooch
@@ -513,11 +514,13 @@ def update_registry() -> None:
 
     for dfd, _ in map(get_structures, ["PROD", "SALES", "STOCK"]):
         for file in filenames_for_dfd(dfd, fetch=False):
-            filename = file.name
+            filename = str(file)
             existing_hash = POOCH.registry.setdefault(filename, None)
 
-            if not POOCH.is_available(filename):
-                # File doesn't exist on the remote
+            try:
+                assert POOCH.is_available(filename)
+            except (AssertionError, ConnectionError):
+                # File doesn't exist on the remote, or request times out
                 POOCH.registry.pop(filename)
                 continue
 

From 1ac323de83fc3855cf56daffdde526adcfdca6ba Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Tue, 28 Apr 2026 19:35:21 +0200
Subject: [PATCH 12/17] Fetch local copy of data files in check-record

---
 transport_data/cli/check_record.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/transport_data/cli/check_record.py b/transport_data/cli/check_record.py
index 25771e7..6a92f44 100644
--- a/transport_data/cli/check_record.py
+++ b/transport_data/cli/check_record.py
@@ -1,7 +1,6 @@
 from collections import defaultdict
 from collections.abc import MutableMapping
 from dataclasses import dataclass
-from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
 import click
@@ -43,7 +42,8 @@ def check_package0(package: "Package") -> None:
     files = []
     suffix_count: MutableMapping[str, int] = defaultdict(lambda: 0)
     for resource in package.resources:
-        path = Path(resource.name)
+        # Fetch a local copy of the resource; return its path
+        path = resource.fetch()
         files.append(path)
         suffix_count[path.suffix.lower()] += 1
 

From 026723b66548b6781ac160e4b023a1b6e83474db Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Tue, 28 Apr 2026 19:47:38 +0200
Subject: [PATCH 13/17] Order .util.sdmx alphabetically

---
 transport_data/util/sdmx.py | 120 ++++++++++++++++++------------------
 1 file changed, 60 insertions(+), 60 deletions(-)

diff --git a/transport_data/util/sdmx.py b/transport_data/util/sdmx.py
index bcd07a6..d625b44 100644
--- a/transport_data/util/sdmx.py
+++ b/transport_data/util/sdmx.py
@@ -144,6 +144,66 @@ def anno_generated(obj: "sdmx.model.common.AnnotableArtefact") -> None:
     )
 
 
+def fields_to_mda(
+    cls: type,
+    rs: "sdmx.model.v21.ReportStructure",
+    cs: "sdmx.model.common.ConceptScheme | None" = None,
+) -> None:
+    """Populate `rs` with MetadataAttributes corresponding to dataclass fields of `cls`.
+
+    Examples
+    --------
+    >>> @dataclass
+    ... class MDSExample:
+    ...     #: Foo
+    ...     #:
+    ...     #: Description of Foo.
+    ...     foo: str
+    ...
+    ...     bar: int
+    ...
+    ... fields_to_mda(MDSExample)
+
+    In this example, two metadata attributes will be added to `rs`:
+
+    1. With id="foo" and an annotation with id="data-type" and text="<class 'str'>".
+       The concept identity for the metadata attribute will also have id="foo",
+       name="Foo", and description="Description of Foo."
+    2. With id="bar" and an annotation with id="data-type" and text="<class 'int'>".
+    """
+    from sdmx.model import common, v21
+
+    # Assemble info about the dataclass fields of `cls`
+    field_info = {f.name: (f, "") for f in fields(cls)}
+
+    # Tokenize the source code of `cls` and update `field_info` with the Sphinx-style
+    # comments that precede each of the fields
+    #
+    # Thanks to https://davidism.com/attribute-docstrings/ and
+    # https://stackoverflow.com/a/7457047
+    comments = []  # Accumulate comment tokens
+    for tok in generate_tokens(StringIO(dedent(getsource(cls))).readline):
+        if tok.type == COMMENT:
+            comments.append(tok.string.lstrip("#: "))  # Store
+        elif tok.type == NAME and tok.string in field_info and len(comments):
+            # Reached the definition of a field, and there are accumulated comments
+            field_info[tok.string] = (field_info[tok.string][0], "\n".join(comments))
+            comments = []  # Reset
+
+    cs = cs or common.ConceptScheme()
+
+    for id_, (f, docstring) in field_info.items():
+        # Split the docstring, if any, to a name and optional description
+        name, _, desc = docstring.partition("\n\n")
+
+        # Construct the ConceptIdentity and add to `cs`
+        ci = cs.setdefault(id=id_, name=name or None, description=desc or None)
+        # Construct the data type annotation
+        type_anno = v21.Annotation(id="data-type", text={"zxx": repr(f.type)})
+        # Add the metadata attribute to the report structure
+        rs.getdefault(id=id_, concept_identity=ci, annotations=[type_anno])
+
+
 def make_obs(
     row: "pd.Series", dsd: "sdmx.model.v21.DataStructureDefinition"
 ) -> "sdmx.model.v21.Observation":
@@ -267,63 +327,3 @@ def structure_from_csv(
     )
 
     return dfd, adapt
-
-
-def fields_to_mda(
-    cls: type,
-    rs: "sdmx.model.v21.ReportStructure",
-    cs: "sdmx.model.common.ConceptScheme | None" = None,
-) -> None:
-    """Populate `rs` with MetadataAttributes corresponding to dataclass fields of `cls`.
-
-    Examples
-    --------
-    >>> @dataclass
-    ... class MDSExample:
-    ...     #: Foo
-    ...     #:
-    ...     #: Description of Foo.
-    ...     foo: str
-    ...
-    ...     bar: int
-    ...
-    ... fields_to_mda(MDSExample)
-
-    In this example, two metadata attributes will be added to `rs`:
-
-    1. With id="foo" and an annotation with id="data-type" and text="<class 'str'>".
-       The concept identity for the metadata attribute will also have id="foo",
-       name="Foo", and description="Description of Foo."
-    2. With id="bar" and an annotation with id="data-type" and text="<class 'int'>".
-    """
-    from sdmx.model import common, v21
-
-    # Assemble info about the dataclass fields of `cls`
-    field_info = {f.name: (f, "") for f in fields(cls)}
-
-    # Tokenize the source code of `cls` and update `field_info` with the Sphinx-style
-    # comments that precede each of the fields
-    #
-    # Thanks to https://davidism.com/attribute-docstrings/ and
-    # https://stackoverflow.com/a/7457047
-    comments = []  # Accumulate comment tokens
-    for tok in generate_tokens(StringIO(dedent(getsource(cls))).readline):
-        if tok.type == COMMENT:
-            comments.append(tok.string.lstrip("#: "))  # Store
-        elif tok.type == NAME and tok.string in field_info and len(comments):
-            # Reached the definition of a field, and there are accumulated comments
-            field_info[tok.string] = (field_info[tok.string][0], "\n".join(comments))
-            comments = []  # Reset
-
-    cs = cs or common.ConceptScheme()
-
-    for id_, (f, docstring) in field_info.items():
-        # Split the docstring, if any, to a name and optional description
-        name, _, desc = docstring.partition("\n\n")
-
-        # Construct the ConceptIdentity and add to `cs`
-        ci = cs.setdefault(id=id_, name=name or None, description=desc or None)
-        # Construct the data type annotation
-        type_anno = v21.Annotation(id="data-type", text={"zxx": repr(f.type)})
-        # Add the metadata attribute to the report structure
-        rs.getdefault(id=id_, concept_identity=ci, annotations=[type_anno])

From 755618f61ad54ce86b26f6003b8fcaa61921a2ba Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Tue, 28 Apr 2026 20:44:43 +0200
Subject: [PATCH 14/17] Check readable SDMX-CSV in .check_record

---
 transport_data/cli/check_record.py | 38 +++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/transport_data/cli/check_record.py b/transport_data/cli/check_record.py
index 6a92f44..d14a2c3 100644
--- a/transport_data/cli/check_record.py
+++ b/transport_data/cli/check_record.py
@@ -6,6 +6,10 @@
 import click
 
 if TYPE_CHECKING:
+    from pathlib import Path
+
+    from sdmx.model.v30 import Dataflow
+
     from transport_data.util.ckan import Package
 
 
@@ -38,14 +42,28 @@ def main(id: str) -> None:
 
 def check_package0(package: "Package") -> None:
     """Print some checks about a `package`."""
-    # Convert resource file names to Path instances; count suffixes
-    files = []
+    from transport_data.util.sdmx import structure_from_csv
+
+    # Mapping from Path instances to args for read_csv() or None
+    file_args: dict["Path", tuple["Dataflow", dict] | None] = {}
+
+    # Counts of suffixes
     suffix_count: MutableMapping[str, int] = defaultdict(lambda: 0)
     for resource in package.resources:
         # Fetch a local copy of the resource; return its path
         path = resource.fetch()
-        files.append(path)
-        suffix_count[path.suffix.lower()] += 1
+
+        # Count the file suffix
+        suffix_lower = path.suffix.lower()
+        suffix_count[suffix_lower] += 1
+
+        try:
+            # Infer a data structure definition from the file
+            file_args[path] = structure_from_csv(path)
+        except Exception:
+            # Not a CSV file or cannot infer a DSD
+            file_args[path] = None
+            continue
 
     @dataclass
     class Check:
@@ -74,15 +92,19 @@ def __str__(self) -> str:
         "Correct category assigned",
         package.tdc_category in {"tdc_formatted", "tdc_harmonized"},
     )
-    c5 = Check("CSV file(s) are in SDMX-CSV format (not implemented yet)", True)
+    c5 = Check("≥1 CSV file is in SDMX-CSV format", any(map(bool, file_args.values())))
     c6 = Check("Overall", "YES" if (c3.value and c4.value and c5.value) else "NO")
 
     lines.extend(f"- {check}" for check in (c3, c4, c5, c6))
 
     lines.extend(["", "Criteria for a TDC Harmonized record—all of the above, plus:"])
-    c7 = Check("Correct category assigned", package.tdc_category == "tdc_harmonized")
-    c8 = Check("Overall", "YES" if c7.value else "NO")
+    c7 = Check(
+        "Data structure dimension IDs are all in TDCI:CS_CONCEPTS (not implemented yet)",
+        True,
+    )
+    c8 = Check("Correct category assigned", package.tdc_category == "tdc_harmonized")
+    c9 = Check("Overall", "YES" if (c7.value and c8.value) else "NO")
 
-    lines.extend(f"- {check}" for check in (c7, c8))
+    lines.extend(f"- {check}" for check in (c7, c8, c9))
 
     print(*lines, sep="\n")

From 1ba4de8aebf2da57e219103951d5c47f061d32cc Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Tue, 28 Apr 2026 23:02:59 +0200
Subject: [PATCH 15/17] Test check_package0

---
 transport_data/tests/test_cli.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/transport_data/tests/test_cli.py b/transport_data/tests/test_cli.py
index 282cb98..50c5ee0 100644
--- a/transport_data/tests/test_cli.py
+++ b/transport_data/tests/test_cli.py
@@ -5,9 +5,17 @@
 from prompt_toolkit.input.ansi_escape_sequences import REVERSE_ANSI_SEQUENCES
 from prompt_toolkit.keys import Keys
 
+from transport_data.cli.check_record import check_package0
 from transport_data.cli.interactive import Editor
 from transport_data.store import UnionStore
 from transport_data.testing import CliRunner, ember_dfd
+from transport_data.util.ckan import Package
+
+
+@pytest.fixture
+def package(test_data_path: Path) -> Package:
+    """A :class:`.Package` from a test specimen."""
+    return Package.from_file(test_data_path.joinpath("ckan", "package.json"))
 
 
 @pytest.mark.parametrize(
@@ -112,6 +120,12 @@ def test_check_file2(tdc_cli: CliRunner, tmp_path: Path) -> None:
     assert "Unsupported file extension" in result.output
 
 
+def test_check_package0(package: Package) -> None:
+    # Function runs
+    check_package0(package)
+    # TODO extend with further assertions about stdout
+
+
 def run_script(lines: list[str]) -> None:
     """Create a contained instance of :class:`.Editor` and feed it `lines`."""
     from prompt_toolkit.application import create_app_session

From fc9945adbc7e94e5d50e1afb6852afe4685f9dcb Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Tue, 28 Apr 2026 23:16:49 +0200
Subject: [PATCH 16/17] Handle hashlib.file_digest() not in Python 3.10

---
 transport_data/util/ckan.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/transport_data/util/ckan.py b/transport_data/util/ckan.py
index 50ab61a..84a6ac6 100644
--- a/transport_data/util/ckan.py
+++ b/transport_data/util/ckan.py
@@ -223,7 +223,6 @@ def fetch(self, max_size: int = 10_000_000) -> Path:
         AssertionError
             if the size of the file is equal to or greater than `max_size`.
         """
-        from hashlib import file_digest
 
         import requests
 
@@ -245,19 +244,24 @@ def fetch(self, max_size: int = 10_000_000) -> Path:
 
         file_hash = ""
         try:
+            from hashlib import file_digest
+
             # Check existence and hash of local file
             with open(target, "rb") as fd:
                 file_hash = file_digest(fd, "md5").hexdigest()
 
             # Allow that self.hash is empty; don't force download in this case
             assert self.hash in ("", file_hash)
-        except (AssertionError, FileNotFoundError) as e:
+        except (AssertionError, FileNotFoundError, ImportError) as e:
             # Hash does not match or file does not exist
-            if isinstance(e, AssertionError):
-                print(
-                    f"Hash {file_hash} of {target} does not match expected {self.hash};"
-                    " will re-download"
-                )
+            match e:
+                case AssertionError():
+                    print(
+                        f"Hash {file_hash} of {target} does not match expected "
+                        f"{self.hash}; will re-download"
+                    )
+                case ImportError():
+                    print("hashlib.file_digest() not available in Python 3.10")
             response = requests.get(self.url, stream=True)
             with open(target, "wb") as fd:
                 for chunk in response.iter_content():

From 438982fbf2803782172e060db83d2264288e6f60 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Tue, 28 Apr 2026 19:46:07 +0200
Subject: [PATCH 17/17] Add #58 to doc/whatsnew

---
 doc/whatsnew.rst | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/doc/whatsnew.rst b/doc/whatsnew.rst
index a7462a8..62f66cc 100644
--- a/doc/whatsnew.rst
+++ b/doc/whatsnew.rst
@@ -6,12 +6,28 @@ Next release
 
 - :mod:`transport_data` supports and is tested with
   `Pandas 3.0.0 <https://pandas.pydata.org/pandas-docs/stable/whatsnew/v3.0.0.html>`_,
-   released 2026-01-21 (:pull:`59`).
+  released 2026-01-21 (:pull:`59`).
 - Update for pycountry 26.2.16, released 2026-02-17 (:pull:`61`).
-- :program:`tdc` command-line interface warns but does not error
-  if some modules/commands are not available (:pull:`63`).
-- New CLI command :program:`tdc org qr` (:pull:`63`).
 - New HOWTO :doc:`Get involved <howto/get-involved>` (:pull:`62`).
+- Improvements to the :program:`tdc` command-line interface (CLI):
+
+  - :program:`tdc` warns but does not error
+    if some modules/commands are not available (:pull:`63`).
+  - New command :program:`tdc org qr` (:pull:`63`).
+  - New command :program:`tdc check-record` (:pull:`58`).
+  - Rename command :program:`tdc check` to :program:`tdc check-file` (:pull:`58`).
+
+- Improve :mod:`.util.ckan` (:pull:`58`):
+
+  - New method :meth:`.Package.portal_url`.
+  - New method :meth:`.Resource.fetch` to fetch and cache files.
+  - Add type hints for commonly-used attributes of :class:`.Package`, :class:`.Resource`.
+
+- Improve utility code (:pull:`58`):
+
+  - :any:`transport_data.hook` is available as a top-level import for marking hook implementations.
+  - New hook :func:`~.util.hooks.cli_modules`.
+  - New function :func:`.sdmx.structure_from_csv`.
 
 v26.1.13
 ========