diff --git a/.github/workflows/cenace-data.yaml b/.github/workflows/cenace-data.yaml new file mode 100644 index 0000000..f6d86e5 --- /dev/null +++ b/.github/workflows/cenace-data.yaml @@ -0,0 +1,52 @@ +name: CENACE Data + +on: + schedule: + - cron: "30 6 * * *" + workflow_dispatch: + inputs: + execution_date: + description: "Execution date YYYY-MM-DD. Target downloaded date is execution_date + 1 day." + required: false + type: string + +jobs: + update-cenace-data: + runs-on: ubuntu-latest + + env: + MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }} + MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }} + MODAL_ENVIRONMENT: ${{ secrets.MODAL_ENVIRONMENT }} + + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v6 + + - name: Set benchmark dates + id: dates + run: | + if [ -n "${{ inputs.execution_date }}" ]; then + execution_date="${{ inputs.execution_date }}" + else + execution_date="$(date -u -d 'yesterday' +%F)" + fi + + target_date="$(date -u -d "$execution_date + 1 day" +%F)" + + echo "execution_date=$execution_date" >> "$GITHUB_OUTPUT" + echo "target_date=$target_date" >> "$GITHUB_OUTPUT" + echo "evaluation_cutoff=${execution_date}T23:00:00" >> "$GITHUB_OUTPUT" + echo "forecast_cutoff=${target_date}T23:00:00" >> "$GITHUB_OUTPUT" + + - name: Update CENACE data + run: make update-cenace-data EXECUTION_DATE=${{ steps.dates.outputs.execution_date }} + + - name: Evaluate previous CENACE forecasts + continue-on-error: true + run: make update-cenace-evaluate CUTOFF=${{ steps.dates.outputs.evaluation_cutoff }} + + - name: Forecast next CENACE day + run: make update-cenace-forecast CUTOFF=${{ steps.dates.outputs.forecast_cutoff }} diff --git a/Makefile b/Makefile index 0685a97..965aa87 100644 --- a/Makefile +++ b/Makefile @@ -51,3 +51,19 @@ $(addprefix validate-evaluate-,$(EV_FREQUENCIES)): validate-evaluate-%: .PHONY: leaderboard leaderboard: # Build leaderboard parquet from all evaluation parquets $(MODAL) src.evaluation.gh_archive.modal_app::build_leaderboard + +## CENACE Data + +.PHONY: update-cenace-data +update-cenace-data: + $(MODAL) src.data.cenace.modal_app --execution-date $(EXECUTION_DATE) + +## CENACE Forecast/Evaluation + +.PHONY: update-cenace-forecast +update-cenace-forecast: + $(MODAL) src.forecast.cenace.modal_app::forecast --cutoff $(CUTOFF) + +.PHONY: update-cenace-evaluate +update-cenace-evaluate: + $(MODAL) src.forecast.cenace.modal_app::evaluate --cutoff $(CUTOFF) diff --git a/pyproject.toml b/pyproject.toml index 79b05ca..3c251a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ dev = [ [project] dependencies = [ + "beautifulsoup4>=4.15.0", "boto3>=1.42.24", "duckdb>=1.4.3", "freezegun>=1.5.5", diff --git a/src/data/cenace/__init__.py b/src/data/cenace/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data/cenace/aggregate/core.py b/src/data/cenace/aggregate/core.py new file mode 100644 index 0000000..cf10f55 --- /dev/null +++ b/src/data/cenace/aggregate/core.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +import argparse +import shutil +import tempfile +from pathlib import Path + +import pandas as pd + +from src.data.cenace.config import PROCESSED_CSV, PROCESSED_EVENTS_HOURLY_DIR + +INPUT_CSV = PROCESSED_CSV +OUTPUT_ROOT = PROCESSED_EVENTS_HOURLY_DIR + + +def write_hourly_partitions( + df: pd.DataFrame, + output_root: Path = OUTPUT_ROOT, +) -> int: + df = df.copy() + + df["ds"] = pd.to_datetime(df["ds"], errors="coerce") + df["y"] = pd.to_numeric(df["y"], errors="coerce") + + df = df.dropna(subset=["unique_id", "ds", "y"]).copy() + df = df.sort_values(["unique_id", "ds"]).drop_duplicates(["unique_id", "ds"]) + + df["year"] = df["ds"].dt.year + df["month"] = df["ds"].dt.month + df["day"] = df["ds"].dt.day + + output_root.mkdir(parents=True, exist_ok=True) + + n_written = 0 + for (year, month, day), part in df.groupby(["year", "month", "day"], sort=True): + part_dir = ( + output_root / f"year={year:04d}" / f"month={month:02d}" / f"day={day:02d}" + ) + part_dir.mkdir(parents=True, exist_ok=True) + + out_path = part_dir / "series.parquet" + with tempfile.NamedTemporaryFile(suffix=".parquet") as tmp: + part[["unique_id", "ds", "y"]].to_parquet(tmp.name, index=False) + shutil.copyfile(tmp.name, out_path) + + print(f"Saved: {out_path}") + n_written += 1 + + return n_written + + +def build_hourly_partitions( + input_csv: Path = INPUT_CSV, + output_root: Path = OUTPUT_ROOT, +) -> int: + df = pd.read_csv(input_csv) + return write_hourly_partitions(df, output_root) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--input-csv", type=Path, default=INPUT_CSV) + parser.add_argument("--output-root", type=Path, default=OUTPUT_ROOT) + args = parser.parse_args() + + n_written = build_hourly_partitions(args.input_csv, args.output_root) + print(f"\nDone. Wrote {n_written} daily partitions.") + + +if __name__ == "__main__": + main() diff --git a/src/data/cenace/config.py b/src/data/cenace/config.py new file mode 100644 index 0000000..510c4bb --- /dev/null +++ b/src/data/cenace/config.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +import os +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[3] + +DATA_ROOT = Path(os.environ.get("CENACE_DATA_ROOT", ROOT / "data" / "cenace")) + +RAW_DIR = DATA_ROOT / "raw" +TMP_DIR = DATA_ROOT / "tmp" + +PROCESSED_DIR = DATA_ROOT / "processed" +PROCESSED_CSV = PROCESSED_DIR / "cenace.csv" + +PROCESSED_EVENTS_HOURLY_DIR = DATA_ROOT / "processed-events" / "hourly" +FORECASTS_HOURLY_DIR = DATA_ROOT / "forecasts" / "hourly" +EVALUATIONS_HOURLY_DIR = DATA_ROOT / "evaluations" / "hourly" diff --git a/src/data/cenace/extract/core.py b/src/data/cenace/extract/core.py new file mode 100644 index 0000000..273dcdf --- /dev/null +++ b/src/data/cenace/extract/core.py @@ -0,0 +1,169 @@ +from __future__ import annotations + +import argparse +import zipfile +from datetime import datetime, timedelta +from pathlib import Path + +import requests +from bs4 import BeautifulSoup + +URL = "https://www.cenace.gob.mx/Paginas/SIM/Reportes/PreEnerServConMTR.aspx" + +session = requests.Session() + +HEADERS = { + "User-Agent": "Mozilla/5.0", + "Referer": URL, + "Origin": "https://www.cenace.gob.mx", + "Content-Type": "application/x-www-form-urlencoded", +} + +# repo root = impermanent/ +ROOT_DIR = Path(__file__).resolve().parents[4] +DEFAULT_BASE_DIR = ROOT_DIR / "data" / "cenace" + + +def target_date_for_execution(execution_date: datetime) -> datetime: + return execution_date + timedelta(days=1) + + +def raw_zip_path(date: datetime, raw_dir: Path) -> Path: + return raw_dir / f"{date.strftime('%Y%m%d')}.zip" + + +def get_form_state() -> dict[str, str]: + r = session.get(URL, headers=HEADERS) + r.raise_for_status() + soup = BeautifulSoup(r.text, "html.parser") + + def get_value(name: str) -> str: + el = soup.find("input", {"name": name}) + return el.get("value") if el else "" + + return { + "__VIEWSTATE": get_value("__VIEWSTATE"), + "__VIEWSTATEGENERATOR": get_value("__VIEWSTATEGENERATOR"), + "__VIEWSTATEENCRYPTED": get_value("__VIEWSTATEENCRYPTED"), + "__EVENTVALIDATION": get_value("__EVENTVALIDATION"), + } + + +def download_and_extract(date: datetime, raw_dir: Path, tmp_dir: Path) -> bool: + date_str = date.strftime("%d/%m/%Y") + period_str = f"{date_str} - {date_str}" + + state = get_form_state() + + payload = { + "ctl00$ContentPlaceHolder1$ddlReporte": "362,325", + "ctl00$ContentPlaceHolder1$ddlPeriodicidad": "D", + "ctl00$ContentPlaceHolder1$ddlSistema": "SIN", + "ctl00$ContentPlaceHolder1$txtPeriodo": period_str, + "ctl00$ContentPlaceHolder1$hdfStartDateSelected": date_str, + "ctl00$ContentPlaceHolder1$hdfEndDateSelected": date_str, + "ctl00$ContentPlaceHolder1$btnDescargarZIP": "Descargar ZIP", + "__VIEWSTATE": state["__VIEWSTATE"], + "__VIEWSTATEGENERATOR": state["__VIEWSTATEGENERATOR"], + "__VIEWSTATEENCRYPTED": state["__VIEWSTATEENCRYPTED"], + "__EVENTVALIDATION": state["__EVENTVALIDATION"], + "__EVENTTARGET": "", + "__EVENTARGUMENT": "", + } + + r = session.post(URL, data=payload, headers=HEADERS) + r.raise_for_status() + + size = len(r.content) + print(f"{date_str} | {size} bytes") + + if size < 10000: + print(f"Skipping {date_str}: file not published or response too small") + return False + + raw_dir.mkdir(parents=True, exist_ok=True) + tmp_dir.mkdir(parents=True, exist_ok=True) + + zip_path = raw_zip_path(date, raw_dir) + + with open(zip_path, "wb") as f: + f.write(r.content) + + with zipfile.ZipFile(zip_path, "r") as z: + z.extractall(tmp_dir) + + return True + + +def backfill_missing(start_date: datetime, end_date: datetime, base_dir: Path) -> None: + raw_dir = base_dir / "raw" + tmp_dir = base_dir / "tmp" + + current = start_date + while current <= end_date: + zip_path = raw_zip_path(current, raw_dir) + if zip_path.exists(): + print(f"Already have {current.strftime('%Y-%m-%d')}, skipping") + else: + try: + ok = download_and_extract(current, raw_dir, tmp_dir) + if not ok: + print(f"Stopping at {current.strftime('%Y-%m-%d')}") + break + except Exception as e: + print(f"Error on {current.strftime('%Y-%m-%d')}: {e}") + break + current += timedelta(days=1) + + +def run_execution_date(execution_date: datetime, base_dir: Path) -> bool: + raw_dir = base_dir / "raw" + tmp_dir = base_dir / "tmp" + target_date = target_date_for_execution(execution_date) + + zip_path = raw_zip_path(target_date, raw_dir) + if zip_path.exists(): + print(f"Already have {target_date.strftime('%Y-%m-%d')}, skipping") + return True + + return download_and_extract(target_date, raw_dir, tmp_dir) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument("--execution-date", default=None) + parser.add_argument("--start-date", default=None) + parser.add_argument("--end-date", default=None) + parser.add_argument("--out", default=str(DEFAULT_BASE_DIR)) + return parser.parse_args() + + +def main() -> None: + args = parse_args() + base_dir = Path(args.out).resolve() + + if args.start_date: + start_date = datetime.strptime(args.start_date, "%Y-%m-%d") + if args.end_date: + end_date = datetime.strptime(args.end_date, "%Y-%m-%d") + elif args.execution_date: + end_date = target_date_for_execution( + datetime.strptime(args.execution_date, "%Y-%m-%d") + ) + else: + end_date = datetime.today() + backfill_missing(start_date=start_date, end_date=end_date, base_dir=base_dir) + return + + if args.execution_date: + execution_date = datetime.strptime(args.execution_date, "%Y-%m-%d") + ok = run_execution_date(execution_date=execution_date, base_dir=base_dir) + if not ok: + print("No new CENACE publication detected; stopping cleanly") + return + + raise ValueError("Provide either --start-date or --execution-date") + + +if __name__ == "__main__": + main() diff --git a/src/data/cenace/modal_app.py b/src/data/cenace/modal_app.py new file mode 100644 index 0000000..cfbee41 --- /dev/null +++ b/src/data/cenace/modal_app.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +import modal + +CENACE_DATA_ROOT = "/s3-bucket/v0.1.0/cenace" + +app = modal.App(name="timecopilot-cenace-data") +image = ( + modal.Image.debian_slim(python_version="3.11") + .pip_install("uv") + .add_local_file("pyproject.toml", "/root/pyproject.toml", copy=True) + .add_local_file(".python-version", "/root/.python-version", copy=True) + .add_local_file("uv.lock", "/root/uv.lock", copy=True) + .workdir("/root") + .run_commands("uv pip install . --system --compile-bytecode") +) + +secret = modal.Secret.from_name( + "aws-secret", + required_keys=["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"], +) + +volume = { + "/s3-bucket": modal.CloudBucketMount( + bucket_name="impermanent-benchmark", + secret=secret, + ) +} + + +@app.function( + image=image, + volumes=volume, + timeout=60 * 15, +) +def update_cenace_execution_date(execution_date: str) -> int: + import os + from datetime import datetime + + os.environ["CENACE_DATA_ROOT"] = CENACE_DATA_ROOT + + from src.data.cenace.pipeline import update_execution_date + + return update_execution_date(datetime.fromisoformat(execution_date)) + + +@app.local_entrypoint() +def update(execution_date: str): + n_written = update_cenace_execution_date.remote(execution_date) + print(f"Done. Wrote {n_written} CENACE daily partitions.") diff --git a/src/data/cenace/pipeline.py b/src/data/cenace/pipeline.py new file mode 100644 index 0000000..b8c00bc --- /dev/null +++ b/src/data/cenace/pipeline.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import argparse +import shutil +import tempfile +from datetime import datetime +from pathlib import Path + +from src.data.cenace.aggregate.core import write_hourly_partitions +from src.data.cenace.config import DATA_ROOT, PROCESSED_EVENTS_HOURLY_DIR, RAW_DIR +from src.data.cenace.extract.core import run_execution_date, target_date_for_execution +from src.data.cenace.transform.core import transform_raw_files + + +def update_execution_date( + execution_date: datetime, + data_root: Path = DATA_ROOT, + raw_dir: Path = RAW_DIR, + output_root: Path = PROCESSED_EVENTS_HOURLY_DIR, +) -> int: + target_date = target_date_for_execution(execution_date) + + with tempfile.TemporaryDirectory() as tmp: + tmp_root = Path(tmp) + tmp_raw_dir = tmp_root / "raw" + tmp_extract_dir = tmp_root / "tmp" + + ok = run_execution_date( + execution_date=execution_date, + base_dir=tmp_root, + ) + if not ok: + target = f"{target_date:%Y-%m-%d}" + raise RuntimeError(f"CENACE download failed for target date {target}") + + raw_dir.mkdir(parents=True, exist_ok=True) + source_zip = tmp_raw_dir / f"{target_date:%Y%m%d}.zip" + if source_zip.exists(): + shutil.copyfile(source_zip, raw_dir / source_zip.name) + + df = transform_raw_files(tmp_extract_dir) + return write_hourly_partitions(df, output_root) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--execution-date", required=True) + args = parser.parse_args() + + n_written = update_execution_date(datetime.fromisoformat(args.execution_date)) + print(f"Done. Wrote {n_written} CENACE daily partitions.") + + +if __name__ == "__main__": + main() diff --git a/src/data/cenace/transform/core.py b/src/data/cenace/transform/core.py new file mode 100644 index 0000000..2b181b0 --- /dev/null +++ b/src/data/cenace/transform/core.py @@ -0,0 +1,136 @@ +from __future__ import annotations + +import argparse +import re +from pathlib import Path + +import pandas as pd + +from src.data.cenace.config import PROCESSED_CSV, TMP_DIR + +SELECTED_NODES = ( + "01CMC-230", + "01PAC-85", + "02PRI-115", + "02VRU-115", + "03GDU-230", + "03QRO-115", + "03SLP-115", + "04HLU-115", + "04MZD-230", + "05CUN-115", + "05TRR-115", + "06ALT-115", + "06ESC-115", + "08CTE-115", + "08MDA-115", +) + + +def read_clean_file(path: Path) -> pd.DataFrame: + lines = path.read_text(encoding="latin-1").splitlines() + + header_idx = next( + i for i, line in enumerate(lines) if "Hora" in line and "Clave del nodo" in line + ) + + return pd.read_csv( + path, + encoding="latin-1", + skiprows=header_idx, + sep=",", + quotechar='"', + ) + + +def clean_columns(df: pd.DataFrame) -> pd.DataFrame: + df = df.copy() + df.columns = [ + c.strip() + .lower() + .replace("á", "a") + .replace("é", "e") + .replace("í", "i") + .replace("ó", "o") + .replace("ú", "u") + for c in df.columns + ] + + col_map = {} + for col in df.columns: + if "hora" in col: + col_map[col] = "hour" + elif "clave" in col and "nodo" in col: + col_map[col] = "unique_id" + elif "precio marginal" in col: + col_map[col] = "y" + + return df.rename(columns=col_map) + + +def build_timestamp(df: pd.DataFrame, path: Path) -> pd.DataFrame: + match = re.search(r"\d{4}-\d{2}-\d{2}", path.name) + if not match: + raise ValueError(f"Date not found in filename: {path.name}") + + result = df.copy() + base_date = pd.to_datetime(match.group(0)) + + result["hour"] = pd.to_numeric(result["hour"], errors="coerce") + result = result.dropna(subset=["hour"]).copy() + result["hour"] = result["hour"].astype(int) + result["ds"] = base_date + pd.to_timedelta(result["hour"] - 1, unit="h") + + return result + + +def transform_raw_files(tmp_dir: Path = TMP_DIR) -> pd.DataFrame: + frames: list[pd.DataFrame] = [] + + for path in sorted(tmp_dir.glob("*.csv")): + if path.name.startswith(".~lock"): + continue + + frame = read_clean_file(path) + frame = clean_columns(frame) + frame = build_timestamp(frame, path) + frame = frame[["unique_id", "ds", "y"]] + frame = frame[frame["unique_id"].isin(SELECTED_NODES)] + frames.append(frame) + + if not frames: + raise RuntimeError(f"No valid CENACE CSV files parsed from {tmp_dir}") + + result = pd.concat(frames, ignore_index=True) + result["y"] = pd.to_numeric(result["y"], errors="coerce") + + return ( + result.dropna(subset=["unique_id", "ds", "y"]) + .drop_duplicates(["unique_id", "ds"]) + .sort_values(["unique_id", "ds"]) + .reset_index(drop=True) + ) + + +def transform_cenace( + tmp_dir: Path = TMP_DIR, + output_path: Path = PROCESSED_CSV, +) -> Path: + result = transform_raw_files(tmp_dir) + + output_path.parent.mkdir(parents=True, exist_ok=True) + result.to_csv(output_path, index=False) + + print(result.groupby("unique_id").size()) + print(f"Saved {len(result)} rows to {output_path}") + + return output_path + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--tmp-dir", type=Path, default=TMP_DIR) + parser.add_argument("--output", type=Path, default=PROCESSED_CSV) + args = parser.parse_args() + + transform_cenace(args.tmp_dir, args.output) diff --git a/src/data/cenace/utils/cenace_data.py b/src/data/cenace/utils/cenace_data.py new file mode 100644 index 0000000..5142720 --- /dev/null +++ b/src/data/cenace/utils/cenace_data.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +import duckdb +import pandas as pd + + +@dataclass +class CENACEData: + base_path: Path + freq: str = "hourly" + h: int = 24 + max_window_size: int = 24 * 90 + + def __post_init__(self) -> None: + self.base_path = Path(self.base_path) + + def _date_to_partition(self, d: pd.Timestamp) -> Path: + return ( + self.base_path + / f"year={d.year:04d}" + / f"month={d.month:02d}" + / f"day={d.day:02d}" + / "series.parquet" + ) + + def _paths_for_range(self, start: pd.Timestamp, end: pd.Timestamp) -> list[str]: + days = pd.date_range(start.normalize(), end.normalize(), freq="D") + paths = [self._date_to_partition(d) for d in days] + existing = [str(p) for p in paths if p.exists()] + if not existing: + raise FileNotFoundError( + f"No parquet files found between {start} and \ + {end} under {self.base_path}" + ) + return existing + + def get_df( + self, + cutoff: str | pd.Timestamp, + max_window_size: int | None = None, + sort: bool = True, + ) -> pd.DataFrame: + cutoff = pd.Timestamp(cutoff) + window = max_window_size or self.max_window_size + start = cutoff - pd.Timedelta(hours=window - 1) + + paths = self._paths_for_range(start, cutoff) + + query = f""" + SELECT unique_id, ds, y + FROM read_parquet({paths}) + WHERE ds >= TIMESTAMP '{start}' + AND ds <= TIMESTAMP '{cutoff}' + """ + + df = duckdb.sql(query).df() + df["ds"] = pd.to_datetime(df["ds"]) + + if sort: + df = df.sort_values(["unique_id", "ds"]).reset_index(drop=True) + + return df + + def get_actuals( + self, cutoff: str | pd.Timestamp, h: int | None = None + ) -> pd.DataFrame: + cutoff = pd.Timestamp(cutoff) + horizon = h or self.h + + start = cutoff + pd.Timedelta(hours=1) + end = cutoff + pd.Timedelta(hours=horizon) + + paths = self._paths_for_range(start, end) + + query = f""" + SELECT unique_id, ds, y + FROM read_parquet({paths}) + WHERE ds >= TIMESTAMP '{start}' + AND ds <= TIMESTAMP '{end}' + """ + + df = duckdb.sql(query).df() + df["ds"] = pd.to_datetime(df["ds"]) + df = df.sort_values(["unique_id", "ds"]).reset_index(drop=True) + return df diff --git a/src/evaluation/cenace/__init__.py b/src/evaluation/cenace/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/evaluation/cenace/core.py b/src/evaluation/cenace/core.py new file mode 100644 index 0000000..423784a --- /dev/null +++ b/src/evaluation/cenace/core.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +import argparse +import shutil +import tempfile +from pathlib import Path + +import pandas as pd + +from src.data.cenace.config import ( + EVALUATIONS_HOURLY_DIR, + FORECASTS_HOURLY_DIR, + PROCESSED_EVENTS_HOURLY_DIR, +) +from src.data.cenace.utils.cenace_data import CENACEData +from src.evaluation.evaluate import evaluate_forecast + + +def cutoff_partition(root: Path, cutoff: pd.Timestamp) -> Path: + return ( + root + / f"year={cutoff.year:04d}" + / f"month={cutoff.month:02d}" + / f"day={cutoff.day:02d}" + ) + + +def run_evaluation( + cutoff: str | pd.Timestamp, + model: str, + h: int = 24, + max_window_size: int = 48, +) -> Path: + cutoff = pd.Timestamp(cutoff) + + data = CENACEData( + base_path=PROCESSED_EVENTS_HOURLY_DIR, + freq="hourly", + h=h, + max_window_size=max_window_size, + ) + + forecast_path = ( + FORECASTS_HOURLY_DIR + / model + / f"year={cutoff.year:04d}" + / f"month={cutoff.month:02d}" + / f"day={cutoff.day:02d}" + / "forecasts.parquet" + ) + + train = data.get_df(cutoff, max_window_size=max_window_size) + actuals = data.get_actuals(cutoff, h=h) + forecasts = pd.read_parquet(forecast_path) + + metrics, _ = evaluate_forecast( + forecast_df=forecasts, + actuals_df=actuals, + train_df=train, + seasonality=24, + ) + + eval_root = EVALUATIONS_HOURLY_DIR / model + out_dir = cutoff_partition(eval_root, cutoff) + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / "metrics.parquet" + + with tempfile.NamedTemporaryFile(suffix=".parquet") as tmp: + metrics.to_parquet(tmp.name, index=False) + shutil.copyfile(tmp.name, out_path) + return out_path + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument("--cutoff", required=True) + parser.add_argument("--model", required=True) + parser.add_argument("--h", type=int, default=24) + parser.add_argument("--max-window-size", type=int, default=48) + return parser.parse_args() + + +def main() -> None: + args = parse_args() + out_path = run_evaluation( + cutoff=args.cutoff, + model=args.model, + h=args.h, + max_window_size=args.max_window_size, + ) + metrics = pd.read_parquet(out_path) + print(f"Saved metrics: {out_path}") + print(metrics.head()) + print(metrics.shape) + + +if __name__ == "__main__": + main() diff --git a/src/forecast/cenace/__init__.py b/src/forecast/cenace/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/forecast/cenace/core.py b/src/forecast/cenace/core.py new file mode 100644 index 0000000..442f6fc --- /dev/null +++ b/src/forecast/cenace/core.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import argparse +import shutil +import tempfile +from pathlib import Path + +import pandas as pd + +from src.data.cenace.config import FORECASTS_HOURLY_DIR, PROCESSED_EVENTS_HOURLY_DIR +from src.data.cenace.utils.cenace_data import CENACEData +from src.forecast.forecast import generate_forecast + + +def cutoff_partition(root: Path, cutoff: pd.Timestamp) -> Path: + return ( + root + / f"year={cutoff.year:04d}" + / f"month={cutoff.month:02d}" + / f"day={cutoff.day:02d}" + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument("--cutoff", required=True) + parser.add_argument("--model", required=True) + parser.add_argument("--h", type=int, default=24) + parser.add_argument("--max-window-size", type=int, default=48) + return parser.parse_args() + + +def run_forecast( + cutoff: str | pd.Timestamp, + model: str, + h: int = 24, + max_window_size: int = 48, +) -> Path: + cutoff = pd.Timestamp(cutoff) + + data = CENACEData( + base_path=PROCESSED_EVENTS_HOURLY_DIR, + freq="hourly", + h=h, + max_window_size=max_window_size, + ) + + train = data.get_df(cutoff, max_window_size=max_window_size) + + model_name = "seasonal_naive" if model == "seasonal_naive_24" else model + + forecasts = generate_forecast( + model_name=model_name, + df=train, + h=h, + freq="h", + ) + + if "y_hat" in forecasts.columns: + forecasts = forecasts.rename(columns={"y_hat": model}) + + forecast_root = FORECASTS_HOURLY_DIR / model + out_dir = cutoff_partition(forecast_root, cutoff) + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / "forecasts.parquet" + + with tempfile.NamedTemporaryFile(suffix=".parquet") as tmp: + forecasts.to_parquet(tmp.name, index=False) + shutil.copyfile(tmp.name, out_path) + return out_path + + +def main() -> None: + args = parse_args() + out_path = run_forecast( + cutoff=args.cutoff, + model=args.model, + h=args.h, + max_window_size=args.max_window_size, + ) + print(f"Forecasts saved to: {out_path}") + + +if __name__ == "__main__": + main() diff --git a/src/forecast/cenace/modal_app.py b/src/forecast/cenace/modal_app.py new file mode 100644 index 0000000..8ec808b --- /dev/null +++ b/src/forecast/cenace/modal_app.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import modal + +CENACE_DATA_ROOT = "/s3-bucket/v0.1.0/cenace" + +CPU_MODELS = ( + "seasonal_naive", + "historic_average", + "auto_ets", + "auto_ces", + "dynamic_optimized_theta", +) + +app = modal.App(name="timecopilot-cenace-forecast") +image = ( + modal.Image.debian_slim(python_version="3.11") + .pip_install("uv") + .add_local_file("pyproject.toml", "/root/pyproject.toml", copy=True) + .add_local_file(".python-version", "/root/.python-version", copy=True) + .add_local_file("uv.lock", "/root/uv.lock", copy=True) + .workdir("/root") + .run_commands("uv pip install . --system --compile-bytecode") +) + +secret = modal.Secret.from_name( + "aws-secret", + required_keys=["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"], +) + +volume = { + "/s3-bucket": modal.CloudBucketMount( + bucket_name="impermanent-benchmark", + secret=secret, + ) +} + + +@app.function( + image=image, + volumes=volume, + timeout=60 * 30, +) +def run_forecast_model(cutoff: str, model: str) -> tuple[str, str, str | None]: + import os + + os.environ["CENACE_DATA_ROOT"] = CENACE_DATA_ROOT + + from src.forecast.cenace.core import run_forecast + + try: + forecast_path = run_forecast( + cutoff=cutoff, + model=model, + h=24, + max_window_size=24 * 30, + ) + return model, str(forecast_path), None + except Exception as exc: + return model, "", repr(exc) + + +@app.function( + image=image, + volumes=volume, + timeout=60 * 30, +) +def run_evaluation_model(cutoff: str, model: str) -> tuple[str, str, str | None]: + import os + + os.environ["CENACE_DATA_ROOT"] = CENACE_DATA_ROOT + + from src.evaluation.cenace.core import run_evaluation + + try: + metrics_path = run_evaluation( + cutoff=cutoff, + model=model, + h=24, + max_window_size=24 * 30, + ) + return model, str(metrics_path), None + except Exception as exc: + return model, "", repr(exc) + + +def _print_results(results: list[tuple[str, str, str | None]]) -> None: + failures = [result for result in results if result[2] is not None] + + for model, path, error in results: + if error: + print(f"FAILED {model}: {error}") + else: + print(f"OK {model}: {path}") + + if failures: + raise RuntimeError(f"{len(failures)} CENACE model runs failed") + + +@app.local_entrypoint() +def forecast(cutoff: str): + results = list( + run_forecast_model.starmap([(cutoff, model) for model in CPU_MODELS]) + ) + _print_results(results) + + +@app.local_entrypoint() +def evaluate(cutoff: str): + results = list( + run_evaluation_model.starmap([(cutoff, model) for model in CPU_MODELS]) + ) + _print_results(results) diff --git a/uv.lock b/uv.lock index af3fa47..895fd6c 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.13' and sys_platform == 'linux'", @@ -448,6 +448,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/71/cc/18245721fa7747065ab478316c7fea7c74777d07f37ae60db2e84f8172e8/beartype-0.22.9-py3-none-any.whl", hash = "sha256:d16c9bbc61ea14637596c5f6fbff2ee99cbe3573e46a716401734ef50c3060c2", size = 1333658, upload-time = "2025-12-13T06:50:28.266Z" }, ] +[[package]] +name = "beautifulsoup4" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/43/65/318323f98dbee45d42dff61d8f047181bc6f2268a9068cfad035a46be5af/beautifulsoup4-4.15.0.tar.gz", hash = "sha256:288e3ca7d54b06f2ac191970bc275c1939cb46d450b255bf6718b04aa37ab4f7", size = 632571, upload-time = "2026-06-07T16:44:20.453Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/c6/92fcd42f1ba33e1184263f25bfabf3d27c383410470f169e4b8163bf9c17/beautifulsoup4-4.15.0-py3-none-any.whl", hash = "sha256:d6f88de62e1d4e38ecb1077eb9724cd0eff29d2a08ca16a401e9b9e93f117cf9", size = 109924, upload-time = "2026-06-07T16:44:21.566Z" }, +] + [[package]] name = "black" version = "26.1.0" @@ -2041,7 +2054,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fe/65/5b235b40581ad75ab97dcd8b4218022ae8e3ab77c13c919f1a1dfe9171fd/greenlet-3.3.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:04bee4775f40ecefcdaa9d115ab44736cd4b9c5fba733575bfe9379419582e13", size = 273723, upload-time = "2026-01-23T15:30:37.521Z" }, { url = "https://files.pythonhosted.org/packages/ce/ad/eb4729b85cba2d29499e0a04ca6fbdd8f540afd7be142fd571eea43d712f/greenlet-3.3.1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:50e1457f4fed12a50e427988a07f0f9df53cf0ee8da23fab16e6732c2ec909d4", size = 574874, upload-time = "2026-01-23T16:00:54.551Z" }, { url = "https://files.pythonhosted.org/packages/87/32/57cad7fe4c8b82fdaa098c89498ef85ad92dfbb09d5eb713adedfc2ae1f5/greenlet-3.3.1-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:070472cd156f0656f86f92e954591644e158fd65aa415ffbe2d44ca77656a8f5", size = 586309, upload-time = "2026-01-23T16:05:25.18Z" }, - { url = "https://files.pythonhosted.org/packages/66/66/f041005cb87055e62b0d68680e88ec1a57f4688523d5e2fb305841bc8307/greenlet-3.3.1-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1108b61b06b5224656121c3c8ee8876161c491cbe74e5c519e0634c837cf93d5", size = 597461, upload-time = "2026-01-23T16:15:51.943Z" }, { url = "https://files.pythonhosted.org/packages/87/eb/8a1ec2da4d55824f160594a75a9d8354a5fe0a300fb1c48e7944265217e1/greenlet-3.3.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3a300354f27dd86bae5fbf7002e6dd2b3255cd372e9242c933faf5e859b703fe", size = 586985, upload-time = "2026-01-23T15:32:47.968Z" }, { url = "https://files.pythonhosted.org/packages/15/1c/0621dd4321dd8c351372ee8f9308136acb628600658a49be1b7504208738/greenlet-3.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e84b51cbebf9ae573b5fbd15df88887815e3253fc000a7d0ff95170e8f7e9729", size = 1547271, upload-time = "2026-01-23T16:04:18.977Z" }, { url = "https://files.pythonhosted.org/packages/9d/53/24047f8924c83bea7a59c8678d9571209c6bfe5f4c17c94a78c06024e9f2/greenlet-3.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e0093bd1a06d899892427217f0ff2a3c8f306182b8c754336d32e2d587c131b4", size = 1613427, upload-time = "2026-01-23T15:33:44.428Z" }, @@ -2049,7 +2061,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/e8/2e1462c8fdbe0f210feb5ac7ad2d9029af8be3bf45bd9fa39765f821642f/greenlet-3.3.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:5fd23b9bc6d37b563211c6abbb1b3cab27db385a4449af5c32e932f93017080c", size = 274974, upload-time = "2026-01-23T15:31:02.891Z" }, { url = "https://files.pythonhosted.org/packages/7e/a8/530a401419a6b302af59f67aaf0b9ba1015855ea7e56c036b5928793c5bd/greenlet-3.3.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f51496a0bfbaa9d74d36a52d2580d1ef5ed4fdfcff0a73730abfbbbe1403dd", size = 577175, upload-time = "2026-01-23T16:00:56.213Z" }, { url = "https://files.pythonhosted.org/packages/8e/89/7e812bb9c05e1aaef9b597ac1d0962b9021d2c6269354966451e885c4e6b/greenlet-3.3.1-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb0feb07fe6e6a74615ee62a880007d976cf739b6669cce95daa7373d4fc69c5", size = 590401, upload-time = "2026-01-23T16:05:26.365Z" }, - { url = "https://files.pythonhosted.org/packages/70/ae/e2d5f0e59b94a2269b68a629173263fa40b63da32f5c231307c349315871/greenlet-3.3.1-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:67ea3fc73c8cd92f42467a72b75e8f05ed51a0e9b1d15398c913416f2dafd49f", size = 601161, upload-time = "2026-01-23T16:15:53.456Z" }, { url = "https://files.pythonhosted.org/packages/5c/ae/8d472e1f5ac5efe55c563f3eabb38c98a44b832602e12910750a7c025802/greenlet-3.3.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:39eda9ba259cc9801da05351eaa8576e9aa83eb9411e8f0c299e05d712a210f2", size = 590272, upload-time = "2026-01-23T15:32:49.411Z" }, { url = "https://files.pythonhosted.org/packages/a8/51/0fde34bebfcadc833550717eade64e35ec8738e6b097d5d248274a01258b/greenlet-3.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e2e7e882f83149f0a71ac822ebf156d902e7a5d22c9045e3e0d1daf59cee2cc9", size = 1550729, upload-time = "2026-01-23T16:04:20.867Z" }, { url = "https://files.pythonhosted.org/packages/16/c9/2fb47bee83b25b119d5a35d580807bb8b92480a54b68fef009a02945629f/greenlet-3.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:80aa4d79eb5564f2e0a6144fcc744b5a37c56c4a92d60920720e99210d88db0f", size = 1615552, upload-time = "2026-01-23T15:33:45.743Z" }, @@ -2058,7 +2069,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f9/c8/9d76a66421d1ae24340dfae7e79c313957f6e3195c144d2c73333b5bfe34/greenlet-3.3.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:7e806ca53acf6d15a888405880766ec84721aa4181261cd11a457dfe9a7a4975", size = 276443, upload-time = "2026-01-23T15:30:10.066Z" }, { url = "https://files.pythonhosted.org/packages/81/99/401ff34bb3c032d1f10477d199724f5e5f6fbfb59816ad1455c79c1eb8e7/greenlet-3.3.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d842c94b9155f1c9b3058036c24ffb8ff78b428414a19792b2380be9cecf4f36", size = 597359, upload-time = "2026-01-23T16:00:57.394Z" }, { url = "https://files.pythonhosted.org/packages/2b/bc/4dcc0871ed557792d304f50be0f7487a14e017952ec689effe2180a6ff35/greenlet-3.3.1-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:20fedaadd422fa02695f82093f9a98bad3dab5fcda793c658b945fcde2ab27ba", size = 607805, upload-time = "2026-01-23T16:05:28.068Z" }, - { url = "https://files.pythonhosted.org/packages/3b/cd/7a7ca57588dac3389e97f7c9521cb6641fd8b6602faf1eaa4188384757df/greenlet-3.3.1-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c620051669fd04ac6b60ebc70478210119c56e2d5d5df848baec4312e260e4ca", size = 622363, upload-time = "2026-01-23T16:15:54.754Z" }, { url = "https://files.pythonhosted.org/packages/cf/05/821587cf19e2ce1f2b24945d890b164401e5085f9d09cbd969b0c193cd20/greenlet-3.3.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14194f5f4305800ff329cbf02c5fcc88f01886cadd29941b807668a45f0d2336", size = 609947, upload-time = "2026-01-23T15:32:51.004Z" }, { url = "https://files.pythonhosted.org/packages/a4/52/ee8c46ed9f8babaa93a19e577f26e3d28a519feac6350ed6f25f1afee7e9/greenlet-3.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7b2fe4150a0cf59f847a67db8c155ac36aed89080a6a639e9f16df5d6c6096f1", size = 1567487, upload-time = "2026-01-23T16:04:22.125Z" }, { url = "https://files.pythonhosted.org/packages/8f/7c/456a74f07029597626f3a6db71b273a3632aecb9afafeeca452cfa633197/greenlet-3.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:49f4ad195d45f4a66a0eb9c1ba4832bb380570d361912fa3554746830d332149", size = 1636087, upload-time = "2026-01-23T15:33:47.486Z" }, @@ -2067,7 +2077,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/ab/d26750f2b7242c2b90ea2ad71de70cfcd73a948a49513188a0fc0d6fc15a/greenlet-3.3.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:7ab327905cabb0622adca5971e488064e35115430cec2c35a50fd36e72a315b3", size = 275205, upload-time = "2026-01-23T15:30:24.556Z" }, { url = "https://files.pythonhosted.org/packages/10/d3/be7d19e8fad7c5a78eeefb2d896a08cd4643e1e90c605c4be3b46264998f/greenlet-3.3.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:65be2f026ca6a176f88fb935ee23c18333ccea97048076aef4db1ef5bc0713ac", size = 599284, upload-time = "2026-01-23T16:00:58.584Z" }, { url = "https://files.pythonhosted.org/packages/ae/21/fe703aaa056fdb0f17e5afd4b5c80195bbdab701208918938bd15b00d39b/greenlet-3.3.1-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7a3ae05b3d225b4155bda56b072ceb09d05e974bc74be6c3fc15463cf69f33fd", size = 610274, upload-time = "2026-01-23T16:05:29.312Z" }, - { url = "https://files.pythonhosted.org/packages/06/00/95df0b6a935103c0452dad2203f5be8377e551b8466a29650c4c5a5af6cc/greenlet-3.3.1-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:12184c61e5d64268a160226fb4818af4df02cfead8379d7f8b99a56c3a54ff3e", size = 624375, upload-time = "2026-01-23T16:15:55.915Z" }, { url = "https://files.pythonhosted.org/packages/cb/86/5c6ab23bb3c28c21ed6bebad006515cfe08b04613eb105ca0041fecca852/greenlet-3.3.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6423481193bbbe871313de5fd06a082f2649e7ce6e08015d2a76c1e9186ca5b3", size = 612904, upload-time = "2026-01-23T15:32:52.317Z" }, { url = "https://files.pythonhosted.org/packages/c2/f3/7949994264e22639e40718c2daf6f6df5169bf48fb038c008a489ec53a50/greenlet-3.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:33a956fe78bbbda82bfc95e128d61129b32d66bcf0a20a1f0c08aa4839ffa951", size = 1567316, upload-time = "2026-01-23T16:04:23.316Z" }, { url = "https://files.pythonhosted.org/packages/8d/6e/d73c94d13b6465e9f7cd6231c68abde838bb22408596c05d9059830b7872/greenlet-3.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b065d3284be43728dd280f6f9a13990b56470b81be20375a207cdc814a983f2", size = 1636549, upload-time = "2026-01-23T15:33:48.643Z" }, @@ -2076,7 +2085,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ae/fb/011c7c717213182caf78084a9bea51c8590b0afda98001f69d9f853a495b/greenlet-3.3.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:bd59acd8529b372775cd0fcbc5f420ae20681c5b045ce25bd453ed8455ab99b5", size = 275737, upload-time = "2026-01-23T15:32:16.889Z" }, { url = "https://files.pythonhosted.org/packages/41/2e/a3a417d620363fdbb08a48b1dd582956a46a61bf8fd27ee8164f9dfe87c2/greenlet-3.3.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b31c05dd84ef6871dd47120386aed35323c944d86c3d91a17c4b8d23df62f15b", size = 646422, upload-time = "2026-01-23T16:01:00.354Z" }, { url = "https://files.pythonhosted.org/packages/b4/09/c6c4a0db47defafd2d6bab8ddfe47ad19963b4e30f5bed84d75328059f8c/greenlet-3.3.1-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:02925a0bfffc41e542c70aa14c7eda3593e4d7e274bfcccca1827e6c0875902e", size = 658219, upload-time = "2026-01-23T16:05:30.956Z" }, - { url = "https://files.pythonhosted.org/packages/e2/89/b95f2ddcc5f3c2bc09c8ee8d77be312df7f9e7175703ab780f2014a0e781/greenlet-3.3.1-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3e0f3878ca3a3ff63ab4ea478585942b53df66ddde327b59ecb191b19dbbd62d", size = 671455, upload-time = "2026-01-23T16:15:57.232Z" }, { url = "https://files.pythonhosted.org/packages/80/38/9d42d60dffb04b45f03dbab9430898352dba277758640751dc5cc316c521/greenlet-3.3.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34a729e2e4e4ffe9ae2408d5ecaf12f944853f40ad724929b7585bca808a9d6f", size = 660237, upload-time = "2026-01-23T15:32:53.967Z" }, { url = "https://files.pythonhosted.org/packages/96/61/373c30b7197f9e756e4c81ae90a8d55dc3598c17673f91f4d31c3c689c3f/greenlet-3.3.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:aec9ab04e82918e623415947921dea15851b152b822661cce3f8e4393c3df683", size = 1615261, upload-time = "2026-01-23T16:04:25.066Z" }, { url = "https://files.pythonhosted.org/packages/fd/d3/ca534310343f5945316f9451e953dcd89b36fe7a19de652a1dc5a0eeef3f/greenlet-3.3.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:71c767cf281a80d02b6c1bdc41c9468e1f5a494fb11bc8688c360524e273d7b1", size = 1683719, upload-time = "2026-01-23T15:33:50.61Z" }, @@ -2085,7 +2093,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/28/24/cbbec49bacdcc9ec652a81d3efef7b59f326697e7edf6ed775a5e08e54c2/greenlet-3.3.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:3e63252943c921b90abb035ebe9de832c436401d9c45f262d80e2d06cc659242", size = 282706, upload-time = "2026-01-23T15:33:05.525Z" }, { url = "https://files.pythonhosted.org/packages/86/2e/4f2b9323c144c4fe8842a4e0d92121465485c3c2c5b9e9b30a52e80f523f/greenlet-3.3.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:76e39058e68eb125de10c92524573924e827927df5d3891fbc97bd55764a8774", size = 651209, upload-time = "2026-01-23T16:01:01.517Z" }, { url = "https://files.pythonhosted.org/packages/d9/87/50ca60e515f5bb55a2fbc5f0c9b5b156de7d2fc51a0a69abc9d23914a237/greenlet-3.3.1-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c9f9d5e7a9310b7a2f416dd13d2e3fd8b42d803968ea580b7c0f322ccb389b97", size = 654300, upload-time = "2026-01-23T16:05:32.199Z" }, - { url = "https://files.pythonhosted.org/packages/7c/25/c51a63f3f463171e09cb586eb64db0861eb06667ab01a7968371a24c4f3b/greenlet-3.3.1-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b9721549a95db96689458a1e0ae32412ca18776ed004463df3a9299c1b257ab", size = 662574, upload-time = "2026-01-23T16:15:58.364Z" }, { url = "https://files.pythonhosted.org/packages/1d/94/74310866dfa2b73dd08659a3d18762f83985ad3281901ba0ee9a815194fb/greenlet-3.3.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92497c78adf3ac703b57f1e3813c2d874f27f71a178f9ea5887855da413cd6d2", size = 653842, upload-time = "2026-01-23T15:32:55.671Z" }, { url = "https://files.pythonhosted.org/packages/97/43/8bf0ffa3d498eeee4c58c212a3905dd6146c01c8dc0b0a046481ca29b18c/greenlet-3.3.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ed6b402bc74d6557a705e197d47f9063733091ed6357b3de33619d8a8d93ac53", size = 1614917, upload-time = "2026-01-23T16:04:26.276Z" }, { url = "https://files.pythonhosted.org/packages/89/90/a3be7a5f378fc6e84abe4dcfb2ba32b07786861172e502388b4c90000d1b/greenlet-3.3.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:59913f1e5ada20fde795ba906916aea25d442abcc0593fba7e26c92b7ad76249", size = 1676092, upload-time = "2026-01-23T15:33:52.176Z" }, @@ -2386,6 +2393,7 @@ name = "impermanent" version = "0.0.1" source = { editable = "." } dependencies = [ + { name = "beautifulsoup4" }, { name = "boto3" }, { name = "duckdb" }, { name = "freezegun" }, @@ -2408,6 +2416,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "beautifulsoup4", specifier = ">=4.15.0" }, { name = "boto3", specifier = ">=1.42.24" }, { name = "duckdb", specifier = ">=1.4.3" }, { name = "freezegun", specifier = ">=1.5.5" }, @@ -7010,6 +7019,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" }, ] +[[package]] +name = "soupsieve" +version = "2.8.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/2c/0a5f6f8ee0d5589e48c7640213ed5175d52cf540a06725b628cc1a45d6ce/soupsieve-2.8.4.tar.gz", hash = "sha256:e121fd02e975c695e4e9e8774a5ee35d74714b59307868dcc5319ad2d9e3328e", size = 121110, upload-time = "2026-05-24T13:55:57.154Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5e/f5/0c41cb68dcae6b7de4fac4188a3a9589e21fb31df21ea3a2e888db95e6c9/soupsieve-2.8.4-py3-none-any.whl", hash = "sha256:e7e6b0769c8f51ed59acab6e994b00621096cfb1c640a7509295987388fbaf65", size = 37304, upload-time = "2026-05-24T13:55:55.406Z" }, +] + [[package]] name = "sqlalchemy" version = "2.0.46" @@ -7898,6 +7916,13 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0f/8b/4b61d6e13f7108f36910df9ab4b58fd389cc2520d54d81b88660804aad99/torch-2.10.0-2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:418997cb02d0a0f1497cf6a09f63166f9f5df9f3e16c8a716ab76a72127c714f", size = 79423467, upload-time = "2026-02-10T21:44:48.711Z" }, { url = "https://files.pythonhosted.org/packages/d3/54/a2ba279afcca44bbd320d4e73675b282fcee3d81400ea1b53934efca6462/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:13ec4add8c3faaed8d13e0574f5cd4a323c11655546f91fbe6afa77b57423574", size = 79498202, upload-time = "2026-02-10T21:44:52.603Z" }, { url = "https://files.pythonhosted.org/packages/ec/23/2c9fe0c9c27f7f6cb865abcea8a4568f29f00acaeadfc6a37f6801f84cb4/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:e521c9f030a3774ed770a9c011751fb47c4d12029a3d6522116e48431f2ff89e", size = 79498254, upload-time = "2026-02-10T21:44:44.095Z" }, + { url = "https://files.pythonhosted.org/packages/16/ee/efbd56687be60ef9af0c9c0ebe106964c07400eade5b0af8902a1d8cd58c/torch-2.10.0-3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a1ff626b884f8c4e897c4c33782bdacdff842a165fee79817b1dd549fdda1321", size = 915510070, upload-time = "2026-03-11T14:16:39.386Z" }, + { url = "https://files.pythonhosted.org/packages/36/ab/7b562f1808d3f65414cd80a4f7d4bb00979d9355616c034c171249e1a303/torch-2.10.0-3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:ac5bdcbb074384c66fa160c15b1ead77839e3fe7ed117d667249afce0acabfac", size = 915518691, upload-time = "2026-03-11T14:15:43.147Z" }, + { url = "https://files.pythonhosted.org/packages/b3/7a/abada41517ce0011775f0f4eacc79659bc9bc6c361e6bfe6f7052a6b9363/torch-2.10.0-3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:98c01b8bb5e3240426dcde1446eed6f40c778091c8544767ef1168fc663a05a6", size = 915622781, upload-time = "2026-03-11T14:17:11.354Z" }, + { url = "https://files.pythonhosted.org/packages/ab/c6/4dfe238342ffdcec5aef1c96c457548762d33c40b45a1ab7033bb26d2ff2/torch-2.10.0-3-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:80b1b5bfe38eb0e9f5ff09f206dcac0a87aadd084230d4a36eea5ec5232c115b", size = 915627275, upload-time = "2026-03-11T14:16:11.325Z" }, + { url = "https://files.pythonhosted.org/packages/d8/f0/72bf18847f58f877a6a8acf60614b14935e2f156d942483af1ffc081aea0/torch-2.10.0-3-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:46b3574d93a2a8134b3f5475cfb98e2eb46771794c57015f6ad1fb795ec25e49", size = 915523474, upload-time = "2026-03-11T14:17:44.422Z" }, + { url = "https://files.pythonhosted.org/packages/f4/39/590742415c3030551944edc2ddc273ea1fdfe8ffb2780992e824f1ebee98/torch-2.10.0-3-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b1d5e2aba4eb7f8e87fbe04f86442887f9167a35f092afe4c237dfcaaef6e328", size = 915632474, upload-time = "2026-03-11T14:15:13.666Z" }, + { url = "https://files.pythonhosted.org/packages/b6/8e/34949484f764dde5b222b7fe3fede43e4a6f0da9d7f8c370bb617d629ee2/torch-2.10.0-3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:0228d20b06701c05a8f978357f657817a4a63984b0c90745def81c18aedfa591", size = 915523882, upload-time = "2026-03-11T14:14:46.311Z" }, { url = "https://files.pythonhosted.org/packages/0c/1a/c61f36cfd446170ec27b3a4984f072fd06dab6b5d7ce27e11adb35d6c838/torch-2.10.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:5276fa790a666ee8becaffff8acb711922252521b28fbce5db7db5cf9cb2026d", size = 145992962, upload-time = "2026-01-21T16:24:14.04Z" }, { url = "https://files.pythonhosted.org/packages/b5/60/6662535354191e2d1555296045b63e4279e5a9dbad49acf55a5d38655a39/torch-2.10.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:aaf663927bcd490ae971469a624c322202a2a1e68936eb952535ca4cd3b90444", size = 915599237, upload-time = "2026-01-21T16:23:25.497Z" }, { url = "https://files.pythonhosted.org/packages/40/b8/66bbe96f0d79be2b5c697b2e0b187ed792a15c6c4b8904613454651db848/torch-2.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:a4be6a2a190b32ff5c8002a0977a25ea60e64f7ba46b1be37093c141d9c49aeb", size = 113720931, upload-time = "2026-01-21T16:24:23.743Z" },