Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions doc/whatsnew.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,28 @@ Next release

- :mod:`transport_data` supports and is tested with
`Pandas 3.0.0 <https://pandas.pydata.org/pandas-docs/stable/whatsnew/v3.0.0.html>`_,
released 2026-01-21 (:pull:`59`).
released 2026-01-21 (:pull:`59`).
- Update for pycountry 26.2.16, released 2026-02-17 (:pull:`61`).
- :program:`tdc` command-line interface warns but does not error
if some modules/commands are not available (:pull:`63`).
- New CLI command :program:`tdc org qr` (:pull:`63`).
- New HOWTO :doc:`Get involved <howto/get-involved>` (:pull:`62`).
- Improvements to the :program:`tdc` command-line interface (CLI):

- :program:`tdc` warns but does not error
if some modules/commands are not available (:pull:`63`).
- New command :program:`tdc org qr` (:pull:`63`).
- New command :program:`tdc check-record` (:pull:`58`).
- Rename command :program:`tdc check` to :program:`tdc check-file` (:pull:`58`).

- Improve :mod:`.util.ckan` (:pull:`58`):

- New method :meth:`.Package.portal_url`.
- New method :meth:`.Resource.fetch` to fetch and cache files.
- Add type hints for commonly-used attributes of :class:`.Package`, :class:`.Resource`.

- Improve utility code (:pull:`58`):

- :any:`transport_data.hook` is available as a top-level import for marking hook implementations.
- New hook :func:`~.util.hooks.cli_modules`.
- New function :func:`.sdmx.structure_from_csv`.

v26.1.13
========
Expand Down
11 changes: 11 additions & 0 deletions transport_data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
import logging
import sys
from warnings import filterwarnings

from .config import Config
from .store import UnionStore
from .util.pluggy import hookimpl as hook
from .util.pluggy import register_internal

__all__ = [
"CONFIG",
"STORE",
"hook",
]

filterwarnings("ignore", "pkg_resources is deprecated", UserWarning, "ckanapi.version")

log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
log.addHandler(logging.StreamHandler(sys.stdout))
Expand All @@ -18,6 +28,7 @@
# Register plugin hooks
register_internal(
"ato",
"estat",
"iamc",
"ipcc",
"iso",
Expand Down
11 changes: 8 additions & 3 deletions transport_data/ato/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import sdmx.model.v21 as m
from sdmx.model import common, v21

from transport_data.util.pluggy import hookimpl
from transport_data import hook
from transport_data.util.pooch import Pooch
from transport_data.util.sdmx import anno_generated

Expand Down Expand Up @@ -373,7 +373,12 @@ def format_data_provider(value: str) -> str:
return value + "—republished by ATO"


@hookimpl
@hook
def cli_modules():
return f"{__name__}.cli"


@hook
def get_agencies():
a = m.Agency(
id="ATO",
Expand All @@ -392,7 +397,7 @@ def get_agencies():
return (a,)


@hookimpl
@hook
def provides():
return (
"Codelist=TDCI:CL_ATO_ECONOMY",
Expand Down
158 changes: 11 additions & 147 deletions transport_data/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Command-line interface."""

from importlib import import_module
from pathlib import Path
from itertools import chain

import click

from transport_data import CONFIG # noqa: F401
from transport_data.util.pluggy import pm


@click.group("tdc")
Expand All @@ -16,158 +17,21 @@ def main():
#: List of (sub)modules that define CLI (sub)commands. Each should contain a
#: @click.command() named "main".
MODULES_WITH_CLI = [
"ato.cli",
"config",
"cli.interactive",
"estat",
"iamc.cli",
"iso.cli",
"itdp.cli",
"jrc.cli",
"oica.cli",
"org.cli",
"org.ckan",
"proto.cli",
"store",
"testing.cli",
"transport_data.config",
"transport_data.cli.interactive",
"transport_data.cli.check_file",
"transport_data.cli.check_record",
"transport_data.org.ckan",
"transport_data.proto.cli",
"transport_data.store",
"transport_data.testing.cli",
]


# Add commands from each module that defines them
for name in MODULES_WITH_CLI:
for full_name in chain(MODULES_WITH_CLI, pm.hook.cli_modules()):
try:
full_name = f"transport_data.{name}"
module = import_module(full_name)
except ImportError as e:
print(f"{full_name} commands not available: {e.args[0]}")
else:
main.add_command(getattr(module, "main"))


@main.command()
@click.argument("structure_urn", metavar="URN")
@click.argument(
"path", metavar="FILE", type=click.Path(exists=True, dir_okay=False, path_type=Path)
)
@click.option("--sheets", help="Sheet(s) in .xlsx FILE to check.")
@click.option("-v", "--verbose", count=True, help="Increase verbosity.")
@click.option("--structure", help="Value for STRUCTURE field.")
@click.option("--structure-id", "structure_id", help="Value for STRUCTURE_ID field.")
@click.option("--action", default="I", help="Value for ACTION field.")
def check(structure_urn: str, path: Path, sheets, verbose, **options): # noqa: C901
"""Check that FILE can be read as SDMX-CSV.

URN is the shortened SDMX URN of a data flow or data structure definition that
describes the data in FILE, for example "Dataflow=PROVIDER:EXAMPLE(1.2.3)" (the
version is not required). This artefact must already be present in the local store.

FILE may have a ".csv" or ".xlsx" suffix. In the latter case, it is converted to a
temporary set of CSV files. If --sheets are given, only these worksheets are
converted and checked.

If not given, --structure and --structure-id are inferred from URN.
"""
from traceback import format_exception

import sdmx
import sdmx.urn
from sdmx.model import common

from transport_data import STORE
from transport_data.util.sdmx import read_csv

# Pieces of any error message
message = []

# Handle `structure_urn`: retrieve a data structure that describes the data
try:
structure = STORE.get(structure_urn)
except Exception:
message.append(f"Structure {structure_urn!r} could not be loaded")
structure = structure_cls = structure_id = None
else:
structure_cls = type(structure).__name__.lower().replace("definition", "")
structure_id = sdmx.urn.shorten(structure.urn).split("=")[-1]

if isinstance(structure, common.BaseDataflow):
# Also retrieve the data structure definition
STORE.resolve(structure, "structure")
assert len(structure.structure.dimensions)

# Construct keyword arguments for CSVAdapter
# TODO Check if this works for full SDMX-CSV
adapt = {
"structure": options.pop("structure") or structure_cls,
"structure_id": options.pop("structure_id") or structure_id,
"action": options.pop("action"),
}

# Handle `path`; construct a sequence of (label, path) of CSV files to be processed
label_path = []

if path.suffix == ".csv":
label_path.append((f"File: {path}", path))
elif path.suffix == ".xlsx":
# Explode an Excel file into one or more CSV files in a temporary directory
import pandas as pd
from platformdirs import user_cache_path

# Create a cache directory
cache_dir = user_cache_path("transport-data").joinpath("check")
cache_dir.mkdir(parents=True, exist_ok=True)

# Explode Excel file into one CSV file per sheet
ef = pd.ExcelFile(path)
_sheets = set(sheets.split(",")) if sheets else set(ef.sheet_names)
for sheet_name in filter(_sheets.__contains__, ef.sheet_names):
# Construct a temporary path
label_path.append(
(
f"File: {path}\nSheet: {sheet_name}",
cache_dir.joinpath(f"{path.stem}_xlsx_{sheet_name}.csv"),
)
)
# Read the sheet from the ExcelFile and write to a CSV file
pd.read_excel(ef, sheet_name).to_csv(label_path[-1][1], index=False)
ef.close()
else:
raise click.UsageError(f"Unsupported file extension: {path.suffix!r}")

# Process `label_path`
for label, p in label_path:
print(f"\n{label}")

# Read the file into an SDMX data message
try:
dm = read_csv(p, structure, adapt)
except Exception as e:
message.append(f"read failed with\n{type(e).__name__}: {' '.join(e.args)}")

if len(e.args) and "line 1" in e.args[0]:
message.append(
"Hint: try giving --structure= or --structure-id argument(s) to "
"adapt to SDMX-CSV."
)
elif structure is None:
pass
else: # pragma: no cover
message.append("\n".join(format_exception(e)))

print("")
raise click.ClickException("\n\n".join(message))

# Show the contents of the data message
dfd_urn = sdmx.urn.shorten(sdmx.urn.make(dm.dataflow))
print(f"\n{len(dm.data)} data set(s) in: {dfd_urn!s}")

# Show information about each data set
for i, ds in enumerate(dm.data):
print(f"\nData set {i}: action={ds.action}")

# Show the data set contents or summary, according to verbosity
if verbose == 0:
print(f"{len(ds)} observations")
elif verbose == 1:
print(sdmx.to_pandas(ds))
else:
print(sdmx.to_pandas(ds).to_string())
Loading
Loading