Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ jobs:
python -m pip install --upgrade pip
python -m pip install . build

- name: Verify console script is installed
run: python -c "import shutil, sys; sys.exit(0 if shutil.which('kalshi-research-mcp') else 1)"

- name: Run tests
run: python -m unittest discover -s tests -v

Expand Down
18 changes: 14 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,19 @@ For a direct GitHub install:
pip install "git+https://github.com/JleviEderer/KalshiMarketMaker.git@main"
```

### 4. Install MCP runtime dependencies manually if you are not using package install
### 4. Install MCP runtime dependencies manually only if you plan to run from source

```bash
pip install -r requirements.txt
```

That installs the MCP server runtime and research/backtest dependencies.
That installs the MCP server runtime and research/backtest dependencies, but it does not install the `kalshi-research-mcp` console script.

If you choose this source-only path, run the server with:

```bash
python server.py
```

If you also want the older plotting, notebook, or legacy trading scripts:

Expand Down Expand Up @@ -132,7 +138,8 @@ That starts the MCP server over `stdio`, which is the normal local setup for Cla

## Dependency Notes

- `requirements.txt` is the publishable MCP-first install path.
- `pip install .` is the canonical public install path.
- `requirements.txt` is a source-run fallback if you want to execute `python server.py` directly.
- `requirements-legacy.txt` adds optional dependencies used by old plotting or live/demo scripts.
- The original pinned `pandas==2.2.2` install path is not reliable on Windows Python 3.13 because it can fall back to a failing source build. The current version range in `requirements.txt` is chosen to allow binary wheels on modern Python versions.

Expand Down Expand Up @@ -219,6 +226,9 @@ Shows:

Downloads the public daily Kalshi market archive and writes a CSV locally.

If you call it with no dates, it defaults to the last 7 completed days.
For the full archive, pass an explicit `start_date`, for example `2021-06-30`.

Example parameters:

- `start_date`
Expand Down Expand Up @@ -285,7 +295,7 @@ That wording is better because:
If another person wants to use it, they need to:

1. clone the repo
2. run `pip install -r requirements.txt`
2. run `pip install .`
3. add `server.py` to their MCP client config
4. start using the tools through Claude Code

Expand Down
28 changes: 18 additions & 10 deletions backtest_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from http_utils import build_retry_session
from mm import AbstractTradingAPI, AvellanedaMarketMaker

REQUIRED_ARCHIVE_COLUMNS = {"ticker_name", "status", "date"}


class KalshiMarketDataClient:
"""Public read-only client for market metadata and candlesticks."""
Expand Down Expand Up @@ -261,8 +263,13 @@ def find_settled_markets(self, file_path: str, search_term: str | None = None) -
self.logger.info("Searching for '%s' in %s", search_term, file_path)
market_info: dict[str, dict[str, Any]] = {}

try:
for chunk in pd.read_csv(file_path, chunksize=10_000, low_memory=False):
with pd.read_csv(file_path, chunksize=10_000, low_memory=False) as reader:
for chunk in reader:
missing_columns = REQUIRED_ARCHIVE_COLUMNS.difference(chunk.columns)
if missing_columns:
missing = ", ".join(sorted(missing_columns))
raise ValueError(f"Archive file is missing required columns: {missing}")

settled_chunk = chunk[chunk["status"].isin(["settled", "closed", "finalized"])].copy()
if search_term:
settled_chunk = settled_chunk[
Expand All @@ -271,20 +278,21 @@ def find_settled_markets(self, file_path: str, search_term: str | None = None) -

for _, row in settled_chunk.iterrows():
ticker = row["ticker_name"]
if ticker in market_info:
continue
market_info[ticker] = {
candidate = {
"ticker": ticker,
"title": row["ticker_name"],
"series_ticker": row.get("series_ticker") or row.get("report_ticker"),
"report_ticker": row.get("report_ticker"),
"close_time": row.get("date"),
}

return list(market_info.values())
except Exception as exc:
self.logger.error("Failed to read or parse %s: %s", file_path, exc)
return []
existing = market_info.get(ticker)
if existing is None or (candidate["close_time"] or "") >= (existing.get("close_time") or ""):
if existing:
candidate["series_ticker"] = candidate["series_ticker"] or existing.get("series_ticker")
candidate["report_ticker"] = candidate["report_ticker"] or existing.get("report_ticker")
market_info[ticker] = candidate

return list(market_info.values())

def fetch_historical_data(
self,
Expand Down
26 changes: 18 additions & 8 deletions download_market_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,25 @@

from http_utils import build_retry_session

DEFAULT_START_DATE = "2021-06-30"
EARLIEST_ARCHIVE_DATE = "2021-06-30"
DEFAULT_LOOKBACK_DAYS = 7
DEFAULT_OUTPUT_PATH = "kalshi_all_markets_archive.csv"
PUBLIC_ARCHIVE_URL = "https://kalshi-public-docs.s3.amazonaws.com/reporting/market_data_{day}.json"


def build_date_range(start_date: str = DEFAULT_START_DATE, end_date: str | None = None) -> list[str]:
start = pd.to_datetime(start_date).date()
def resolve_date_window(start_date: str | None = None, end_date: str | None = None) -> tuple[date, date]:
end = pd.to_datetime(end_date).date() if end_date else date.today() - timedelta(days=1)
if start_date:
start = pd.to_datetime(start_date).date()
else:
start = end - timedelta(days=DEFAULT_LOOKBACK_DAYS - 1)
if end < start:
raise ValueError("end_date must be on or after start_date")
return start, end


def build_date_range(start_date: str | None = None, end_date: str | None = None) -> list[str]:
start, end = resolve_date_window(start_date, end_date)
return pd.date_range(start, end).strftime("%Y-%m-%d").tolist()


Expand All @@ -34,11 +43,12 @@ def fetch_market_file(day_str: str, timeout: int = 30, session: requests.Session


def download_market_archive(
start_date: str = DEFAULT_START_DATE,
start_date: str | None = None,
end_date: str | None = None,
output_path: str = DEFAULT_OUTPUT_PATH,
) -> dict[str, Any]:
date_range = build_date_range(start_date, end_date)
resolved_start, resolved_end = resolve_date_window(start_date, end_date)
date_range = pd.date_range(resolved_start, resolved_end).strftime("%Y-%m-%d").tolist()
frames: list[pd.DataFrame] = []
downloaded_days = 0
session = build_retry_session()
Expand Down Expand Up @@ -68,14 +78,14 @@ def download_market_archive(
"rows": int(len(archive)),
"days_requested": len(date_range),
"days_downloaded": downloaded_days,
"start_date": start_date,
"end_date": end_date or date_range[-1],
"start_date": resolved_start.isoformat(),
"end_date": resolved_end.isoformat(),
}


def main() -> None:
parser = argparse.ArgumentParser(description="Download Kalshi public market archive data")
parser.add_argument("--start-date", default=DEFAULT_START_DATE, help="Inclusive start date in YYYY-MM-DD format")
parser.add_argument("--start-date", default=None, help="Inclusive start date in YYYY-MM-DD format")
parser.add_argument("--end-date", default=None, help="Inclusive end date in YYYY-MM-DD format")
parser.add_argument("--output-path", default=DEFAULT_OUTPUT_PATH, help="Where to write the consolidated CSV")
args = parser.parse_args()
Expand Down
21 changes: 17 additions & 4 deletions server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
import os
from datetime import datetime, timezone
from datetime import date, datetime, timedelta, timezone
from importlib.metadata import PackageNotFoundError, version
from pathlib import Path
from typing import Any
Expand All @@ -16,6 +16,7 @@
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

DEFAULT_ARCHIVE_PATH = Path(os.getenv("KALSHI_ARCHIVE_PATH", DEFAULT_OUTPUT_PATH))
DEFAULT_ARCHIVE_LOOKBACK_DAYS = 7
mcp = FastMCP("Kalshi Research", json_response=True)


Expand All @@ -42,6 +43,12 @@ def _server_version() -> str:
return "0.1.0"


def _default_archive_window() -> tuple[str, str]:
resolved_end = date.today() - timedelta(days=1)
resolved_start = resolved_end - timedelta(days=DEFAULT_ARCHIVE_LOOKBACK_DAYS - 1)
return resolved_start.isoformat(), resolved_end.isoformat()


def _to_utc_iso8601(value: datetime) -> str:
return value.astimezone(timezone.utc).isoformat().replace("+00:00", "Z")

Expand All @@ -65,6 +72,7 @@ def server_info() -> dict[str, Any]:
"version": _server_version(),
"focus": "historical market discovery, archive download, and backtesting",
"default_archive_path": str(DEFAULT_ARCHIVE_PATH),
"download_archive_default_window_days": DEFAULT_ARCHIVE_LOOKBACK_DAYS,
"tools": [
"server_info",
"download_archive",
Expand All @@ -76,14 +84,19 @@ def server_info() -> dict[str, Any]:

@mcp.tool()
def download_archive(
start_date: str = "2021-06-30",
start_date: str = "",
end_date: str = "",
output_path: str = "",
) -> dict[str, Any]:
"""Download the public Kalshi archive CSV used for market discovery."""
resolved_start_date = start_date.strip() or None
resolved_end_date = end_date.strip() or None
if resolved_start_date is None and resolved_end_date is None:
resolved_start_date, resolved_end_date = _default_archive_window()

summary = download_market_archive(
start_date=start_date,
end_date=end_date or None,
start_date=resolved_start_date,
end_date=resolved_end_date,
output_path=output_path or str(DEFAULT_ARCHIVE_PATH),
)
return summary
Expand Down
49 changes: 49 additions & 0 deletions tests/test_server_tools.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import unittest
from datetime import date, timedelta
from pathlib import Path
from unittest.mock import patch
from uuid import uuid4
Expand All @@ -10,6 +11,19 @@


class ServerToolTests(unittest.TestCase):
def test_download_archive_defaults_to_recent_safe_window(self):
expected_end = date.today() - timedelta(days=1)
expected_start = expected_end - timedelta(days=server.DEFAULT_ARCHIVE_LOOKBACK_DAYS - 1)

with patch.object(server, "download_market_archive", return_value={"ok": True}) as mock_download:
server.download_archive()

mock_download.assert_called_once_with(
start_date=expected_start.isoformat(),
end_date=expected_end.isoformat(),
output_path=str(server.DEFAULT_ARCHIVE_PATH),
)

def test_run_backtest_rejects_inverted_time_window(self):
with self.assertRaisesRegex(ValueError, "end_date must be later than start_date"):
server.run_backtest(
Expand Down Expand Up @@ -43,6 +57,41 @@ def test_download_market_archive_creates_parent_directory(self):
output_path.unlink(missing_ok=True)
output_path.parent.rmdir()

def test_search_settled_markets_prefers_latest_close_time_per_ticker(self):
temp_root = Path(__file__).resolve().parents[1] / ".tmp-tests"
temp_root.mkdir(exist_ok=True)
csv_path = temp_root / f"archive-{uuid4().hex}.csv"
csv_path.write_text(
"ticker_name,status,report_ticker,date\n"
"GDPW-2023-A2,finalized,GDPW,2025-03-07\n"
"GDPW-2023-A2,finalized,GDPW,2025-03-08\n",
encoding="utf-8",
)

try:
matches = server.search_settled_markets(search_term="GDPW", archive_path=str(csv_path))
finally:
csv_path.unlink(missing_ok=True)

self.assertEqual(1, len(matches))
self.assertEqual("2025-03-08", matches[0]["close_time"])

def test_search_settled_markets_raises_for_invalid_archive_schema(self):
temp_root = Path(__file__).resolve().parents[1] / ".tmp-tests"
temp_root.mkdir(exist_ok=True)
csv_path = temp_root / f"archive-{uuid4().hex}.csv"
csv_path.write_text(
"ticker_name,report_ticker,date\n"
"GDPW-2023-A2,GDPW,2025-03-08\n",
encoding="utf-8",
)

try:
with self.assertRaisesRegex(ValueError, "missing required columns: status"):
server.search_settled_markets(search_term="GDPW", archive_path=str(csv_path))
finally:
csv_path.unlink(missing_ok=True)


if __name__ == "__main__":
unittest.main()
Loading