diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5c27e52..db90747 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,6 +28,9 @@ jobs: python -m pip install --upgrade pip python -m pip install . build + - name: Verify console script is installed + run: python -c "import shutil, sys; sys.exit(0 if shutil.which('kalshi-research-mcp') else 1)" + - name: Run tests run: python -m unittest discover -s tests -v diff --git a/README.md b/README.md index c9cc974..a142435 100644 --- a/README.md +++ b/README.md @@ -94,13 +94,19 @@ For a direct GitHub install: pip install "git+https://github.com/JleviEderer/KalshiMarketMaker.git@main" ``` -### 4. Install MCP runtime dependencies manually if you are not using package install +### 4. Install MCP runtime dependencies manually only if you plan to run from source ```bash pip install -r requirements.txt ``` -That installs the MCP server runtime and research/backtest dependencies. +That installs the MCP server runtime and research/backtest dependencies, but it does not install the `kalshi-research-mcp` console script. + +If you choose this source-only path, run the server with: + +```bash +python server.py +``` If you also want the older plotting, notebook, or legacy trading scripts: @@ -132,7 +138,8 @@ That starts the MCP server over `stdio`, which is the normal local setup for Cla ## Dependency Notes -- `requirements.txt` is the publishable MCP-first install path. +- `pip install .` is the canonical public install path. +- `requirements.txt` is a source-run fallback if you want to execute `python server.py` directly. - `requirements-legacy.txt` adds optional dependencies used by old plotting or live/demo scripts. - The original pinned `pandas==2.2.2` install path is not reliable on Windows Python 3.13 because it can fall back to a failing source build. The current version range in `requirements.txt` is chosen to allow binary wheels on modern Python versions. @@ -219,6 +226,9 @@ Shows: Downloads the public daily Kalshi market archive and writes a CSV locally. +If you call it with no dates, it defaults to the last 7 completed days. +For the full archive, pass an explicit `start_date`, for example `2021-06-30`. + Example parameters: - `start_date` @@ -285,7 +295,7 @@ That wording is better because: If another person wants to use it, they need to: 1. clone the repo -2. run `pip install -r requirements.txt` +2. run `pip install .` 3. add `server.py` to their MCP client config 4. start using the tools through Claude Code diff --git a/backtest_engine.py b/backtest_engine.py index 4ab9da4..21fb871 100644 --- a/backtest_engine.py +++ b/backtest_engine.py @@ -12,6 +12,8 @@ from http_utils import build_retry_session from mm import AbstractTradingAPI, AvellanedaMarketMaker +REQUIRED_ARCHIVE_COLUMNS = {"ticker_name", "status", "date"} + class KalshiMarketDataClient: """Public read-only client for market metadata and candlesticks.""" @@ -261,8 +263,13 @@ def find_settled_markets(self, file_path: str, search_term: str | None = None) - self.logger.info("Searching for '%s' in %s", search_term, file_path) market_info: dict[str, dict[str, Any]] = {} - try: - for chunk in pd.read_csv(file_path, chunksize=10_000, low_memory=False): + with pd.read_csv(file_path, chunksize=10_000, low_memory=False) as reader: + for chunk in reader: + missing_columns = REQUIRED_ARCHIVE_COLUMNS.difference(chunk.columns) + if missing_columns: + missing = ", ".join(sorted(missing_columns)) + raise ValueError(f"Archive file is missing required columns: {missing}") + settled_chunk = chunk[chunk["status"].isin(["settled", "closed", "finalized"])].copy() if search_term: settled_chunk = settled_chunk[ @@ -271,20 +278,21 @@ def find_settled_markets(self, file_path: str, search_term: str | None = None) - for _, row in settled_chunk.iterrows(): ticker = row["ticker_name"] - if ticker in market_info: - continue - market_info[ticker] = { + candidate = { "ticker": ticker, "title": row["ticker_name"], "series_ticker": row.get("series_ticker") or row.get("report_ticker"), "report_ticker": row.get("report_ticker"), "close_time": row.get("date"), } - - return list(market_info.values()) - except Exception as exc: - self.logger.error("Failed to read or parse %s: %s", file_path, exc) - return [] + existing = market_info.get(ticker) + if existing is None or (candidate["close_time"] or "") >= (existing.get("close_time") or ""): + if existing: + candidate["series_ticker"] = candidate["series_ticker"] or existing.get("series_ticker") + candidate["report_ticker"] = candidate["report_ticker"] or existing.get("report_ticker") + market_info[ticker] = candidate + + return list(market_info.values()) def fetch_historical_data( self, diff --git a/download_market_archive.py b/download_market_archive.py index 94a3d07..e6e5c63 100644 --- a/download_market_archive.py +++ b/download_market_archive.py @@ -10,16 +10,25 @@ from http_utils import build_retry_session -DEFAULT_START_DATE = "2021-06-30" +EARLIEST_ARCHIVE_DATE = "2021-06-30" +DEFAULT_LOOKBACK_DAYS = 7 DEFAULT_OUTPUT_PATH = "kalshi_all_markets_archive.csv" PUBLIC_ARCHIVE_URL = "https://kalshi-public-docs.s3.amazonaws.com/reporting/market_data_{day}.json" -def build_date_range(start_date: str = DEFAULT_START_DATE, end_date: str | None = None) -> list[str]: - start = pd.to_datetime(start_date).date() +def resolve_date_window(start_date: str | None = None, end_date: str | None = None) -> tuple[date, date]: end = pd.to_datetime(end_date).date() if end_date else date.today() - timedelta(days=1) + if start_date: + start = pd.to_datetime(start_date).date() + else: + start = end - timedelta(days=DEFAULT_LOOKBACK_DAYS - 1) if end < start: raise ValueError("end_date must be on or after start_date") + return start, end + + +def build_date_range(start_date: str | None = None, end_date: str | None = None) -> list[str]: + start, end = resolve_date_window(start_date, end_date) return pd.date_range(start, end).strftime("%Y-%m-%d").tolist() @@ -34,11 +43,12 @@ def fetch_market_file(day_str: str, timeout: int = 30, session: requests.Session def download_market_archive( - start_date: str = DEFAULT_START_DATE, + start_date: str | None = None, end_date: str | None = None, output_path: str = DEFAULT_OUTPUT_PATH, ) -> dict[str, Any]: - date_range = build_date_range(start_date, end_date) + resolved_start, resolved_end = resolve_date_window(start_date, end_date) + date_range = pd.date_range(resolved_start, resolved_end).strftime("%Y-%m-%d").tolist() frames: list[pd.DataFrame] = [] downloaded_days = 0 session = build_retry_session() @@ -68,14 +78,14 @@ def download_market_archive( "rows": int(len(archive)), "days_requested": len(date_range), "days_downloaded": downloaded_days, - "start_date": start_date, - "end_date": end_date or date_range[-1], + "start_date": resolved_start.isoformat(), + "end_date": resolved_end.isoformat(), } def main() -> None: parser = argparse.ArgumentParser(description="Download Kalshi public market archive data") - parser.add_argument("--start-date", default=DEFAULT_START_DATE, help="Inclusive start date in YYYY-MM-DD format") + parser.add_argument("--start-date", default=None, help="Inclusive start date in YYYY-MM-DD format") parser.add_argument("--end-date", default=None, help="Inclusive end date in YYYY-MM-DD format") parser.add_argument("--output-path", default=DEFAULT_OUTPUT_PATH, help="Where to write the consolidated CSV") args = parser.parse_args() diff --git a/server.py b/server.py index 73cb568..cd4f678 100644 --- a/server.py +++ b/server.py @@ -2,7 +2,7 @@ import logging import os -from datetime import datetime, timezone +from datetime import date, datetime, timedelta, timezone from importlib.metadata import PackageNotFoundError, version from pathlib import Path from typing import Any @@ -16,6 +16,7 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") DEFAULT_ARCHIVE_PATH = Path(os.getenv("KALSHI_ARCHIVE_PATH", DEFAULT_OUTPUT_PATH)) +DEFAULT_ARCHIVE_LOOKBACK_DAYS = 7 mcp = FastMCP("Kalshi Research", json_response=True) @@ -42,6 +43,12 @@ def _server_version() -> str: return "0.1.0" +def _default_archive_window() -> tuple[str, str]: + resolved_end = date.today() - timedelta(days=1) + resolved_start = resolved_end - timedelta(days=DEFAULT_ARCHIVE_LOOKBACK_DAYS - 1) + return resolved_start.isoformat(), resolved_end.isoformat() + + def _to_utc_iso8601(value: datetime) -> str: return value.astimezone(timezone.utc).isoformat().replace("+00:00", "Z") @@ -65,6 +72,7 @@ def server_info() -> dict[str, Any]: "version": _server_version(), "focus": "historical market discovery, archive download, and backtesting", "default_archive_path": str(DEFAULT_ARCHIVE_PATH), + "download_archive_default_window_days": DEFAULT_ARCHIVE_LOOKBACK_DAYS, "tools": [ "server_info", "download_archive", @@ -76,14 +84,19 @@ def server_info() -> dict[str, Any]: @mcp.tool() def download_archive( - start_date: str = "2021-06-30", + start_date: str = "", end_date: str = "", output_path: str = "", ) -> dict[str, Any]: """Download the public Kalshi archive CSV used for market discovery.""" + resolved_start_date = start_date.strip() or None + resolved_end_date = end_date.strip() or None + if resolved_start_date is None and resolved_end_date is None: + resolved_start_date, resolved_end_date = _default_archive_window() + summary = download_market_archive( - start_date=start_date, - end_date=end_date or None, + start_date=resolved_start_date, + end_date=resolved_end_date, output_path=output_path or str(DEFAULT_ARCHIVE_PATH), ) return summary diff --git a/tests/test_server_tools.py b/tests/test_server_tools.py index 3b60ac7..2ec4f08 100644 --- a/tests/test_server_tools.py +++ b/tests/test_server_tools.py @@ -1,4 +1,5 @@ import unittest +from datetime import date, timedelta from pathlib import Path from unittest.mock import patch from uuid import uuid4 @@ -10,6 +11,19 @@ class ServerToolTests(unittest.TestCase): + def test_download_archive_defaults_to_recent_safe_window(self): + expected_end = date.today() - timedelta(days=1) + expected_start = expected_end - timedelta(days=server.DEFAULT_ARCHIVE_LOOKBACK_DAYS - 1) + + with patch.object(server, "download_market_archive", return_value={"ok": True}) as mock_download: + server.download_archive() + + mock_download.assert_called_once_with( + start_date=expected_start.isoformat(), + end_date=expected_end.isoformat(), + output_path=str(server.DEFAULT_ARCHIVE_PATH), + ) + def test_run_backtest_rejects_inverted_time_window(self): with self.assertRaisesRegex(ValueError, "end_date must be later than start_date"): server.run_backtest( @@ -43,6 +57,41 @@ def test_download_market_archive_creates_parent_directory(self): output_path.unlink(missing_ok=True) output_path.parent.rmdir() + def test_search_settled_markets_prefers_latest_close_time_per_ticker(self): + temp_root = Path(__file__).resolve().parents[1] / ".tmp-tests" + temp_root.mkdir(exist_ok=True) + csv_path = temp_root / f"archive-{uuid4().hex}.csv" + csv_path.write_text( + "ticker_name,status,report_ticker,date\n" + "GDPW-2023-A2,finalized,GDPW,2025-03-07\n" + "GDPW-2023-A2,finalized,GDPW,2025-03-08\n", + encoding="utf-8", + ) + + try: + matches = server.search_settled_markets(search_term="GDPW", archive_path=str(csv_path)) + finally: + csv_path.unlink(missing_ok=True) + + self.assertEqual(1, len(matches)) + self.assertEqual("2025-03-08", matches[0]["close_time"]) + + def test_search_settled_markets_raises_for_invalid_archive_schema(self): + temp_root = Path(__file__).resolve().parents[1] / ".tmp-tests" + temp_root.mkdir(exist_ok=True) + csv_path = temp_root / f"archive-{uuid4().hex}.csv" + csv_path.write_text( + "ticker_name,report_ticker,date\n" + "GDPW-2023-A2,GDPW,2025-03-08\n", + encoding="utf-8", + ) + + try: + with self.assertRaisesRegex(ValueError, "missing required columns: status"): + server.search_settled_markets(search_term="GDPW", archive_path=str(csv_path)) + finally: + csv_path.unlink(missing_ok=True) + if __name__ == "__main__": unittest.main()