JleviEderer · JleviEderer · Mar 11, 2026 · Mar 11, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -28,6 +28,9 @@ jobs:
           python -m pip install --upgrade pip
           python -m pip install . build
 
+      - name: Verify console script is installed
+        run: python -c "import shutil, sys; sys.exit(0 if shutil.which('kalshi-research-mcp') else 1)"
+
       - name: Run tests
         run: python -m unittest discover -s tests -v
 

diff --git a/README.md b/README.md
@@ -94,13 +94,19 @@ For a direct GitHub install:
 pip install "git+https://github.com/JleviEderer/KalshiMarketMaker.git@main"
 ```
 
-### 4. Install MCP runtime dependencies manually if you are not using package install
+### 4. Install MCP runtime dependencies manually only if you plan to run from source
 
 ```bash
 pip install -r requirements.txt
 ```
 
-That installs the MCP server runtime and research/backtest dependencies.
+That installs the MCP server runtime and research/backtest dependencies, but it does not install the `kalshi-research-mcp` console script.
+
+If you choose this source-only path, run the server with:
+
+```bash
+python server.py
+```
 
 If you also want the older plotting, notebook, or legacy trading scripts:
 
@@ -132,7 +138,8 @@ That starts the MCP server over `stdio`, which is the normal local setup for Cla
 
 ## Dependency Notes
 
-- `requirements.txt` is the publishable MCP-first install path.
+- `pip install .` is the canonical public install path.
+- `requirements.txt` is a source-run fallback if you want to execute `python server.py` directly.
 - `requirements-legacy.txt` adds optional dependencies used by old plotting or live/demo scripts.
 - The original pinned `pandas==2.2.2` install path is not reliable on Windows Python 3.13 because it can fall back to a failing source build. The current version range in `requirements.txt` is chosen to allow binary wheels on modern Python versions.
 
@@ -219,6 +226,9 @@ Shows:
 
 Downloads the public daily Kalshi market archive and writes a CSV locally.
 
+If you call it with no dates, it defaults to the last 7 completed days.
+For the full archive, pass an explicit `start_date`, for example `2021-06-30`.
+
 Example parameters:
 
 - `start_date`
@@ -285,7 +295,7 @@ That wording is better because:
 If another person wants to use it, they need to:
 
 1. clone the repo
-2. run `pip install -r requirements.txt`
+2. run `pip install .`
 3. add `server.py` to their MCP client config
 4. start using the tools through Claude Code
 

diff --git a/backtest_engine.py b/backtest_engine.py
@@ -12,6 +12,8 @@
 from http_utils import build_retry_session
 from mm import AbstractTradingAPI, AvellanedaMarketMaker
 
+REQUIRED_ARCHIVE_COLUMNS = {"ticker_name", "status", "date"}
+
 
 class KalshiMarketDataClient:
     """Public read-only client for market metadata and candlesticks."""
@@ -261,8 +263,13 @@ def find_settled_markets(self, file_path: str, search_term: str | None = None) -
         self.logger.info("Searching for '%s' in %s", search_term, file_path)
         market_info: dict[str, dict[str, Any]] = {}
 
-        try:
-            for chunk in pd.read_csv(file_path, chunksize=10_000, low_memory=False):
+        with pd.read_csv(file_path, chunksize=10_000, low_memory=False) as reader:
+            for chunk in reader:
+                missing_columns = REQUIRED_ARCHIVE_COLUMNS.difference(chunk.columns)
+                if missing_columns:
+                    missing = ", ".join(sorted(missing_columns))
+                    raise ValueError(f"Archive file is missing required columns: {missing}")
+
                 settled_chunk = chunk[chunk["status"].isin(["settled", "closed", "finalized"])].copy()
                 if search_term:
                     settled_chunk = settled_chunk[
@@ -271,20 +278,21 @@ def find_settled_markets(self, file_path: str, search_term: str | None = None) -
 
                 for _, row in settled_chunk.iterrows():
                     ticker = row["ticker_name"]
-                    if ticker in market_info:
-                        continue
-                    market_info[ticker] = {
+                    candidate = {
                         "ticker": ticker,
                         "title": row["ticker_name"],
                         "series_ticker": row.get("series_ticker") or row.get("report_ticker"),
                         "report_ticker": row.get("report_ticker"),
                         "close_time": row.get("date"),
                     }
-
-            return list(market_info.values())
-        except Exception as exc:
-            self.logger.error("Failed to read or parse %s: %s", file_path, exc)
-            return []
+                    existing = market_info.get(ticker)
+                    if existing is None or (candidate["close_time"] or "") >= (existing.get("close_time") or ""):
+                        if existing:
+                            candidate["series_ticker"] = candidate["series_ticker"] or existing.get("series_ticker")
+                            candidate["report_ticker"] = candidate["report_ticker"] or existing.get("report_ticker")
+                        market_info[ticker] = candidate
+
+        return list(market_info.values())
 
     def fetch_historical_data(
         self,

diff --git a/download_market_archive.py b/download_market_archive.py
@@ -10,16 +10,25 @@
 
 from http_utils import build_retry_session
 
-DEFAULT_START_DATE = "2021-06-30"
+EARLIEST_ARCHIVE_DATE = "2021-06-30"
+DEFAULT_LOOKBACK_DAYS = 7
 DEFAULT_OUTPUT_PATH = "kalshi_all_markets_archive.csv"
 PUBLIC_ARCHIVE_URL = "https://kalshi-public-docs.s3.amazonaws.com/reporting/market_data_{day}.json"
 
 
-def build_date_range(start_date: str = DEFAULT_START_DATE, end_date: str | None = None) -> list[str]:
-    start = pd.to_datetime(start_date).date()
+def resolve_date_window(start_date: str | None = None, end_date: str | None = None) -> tuple[date, date]:
     end = pd.to_datetime(end_date).date() if end_date else date.today() - timedelta(days=1)
+    if start_date:
+        start = pd.to_datetime(start_date).date()
+    else:
+        start = end - timedelta(days=DEFAULT_LOOKBACK_DAYS - 1)
     if end < start:
         raise ValueError("end_date must be on or after start_date")
+    return start, end
+
+
+def build_date_range(start_date: str | None = None, end_date: str | None = None) -> list[str]:
+    start, end = resolve_date_window(start_date, end_date)
     return pd.date_range(start, end).strftime("%Y-%m-%d").tolist()
 
 
@@ -34,11 +43,12 @@ def fetch_market_file(day_str: str, timeout: int = 30, session: requests.Session
 
 
 def download_market_archive(
-    start_date: str = DEFAULT_START_DATE,
+    start_date: str | None = None,
     end_date: str | None = None,
     output_path: str = DEFAULT_OUTPUT_PATH,
 ) -> dict[str, Any]:
-    date_range = build_date_range(start_date, end_date)
+    resolved_start, resolved_end = resolve_date_window(start_date, end_date)
+    date_range = pd.date_range(resolved_start, resolved_end).strftime("%Y-%m-%d").tolist()
     frames: list[pd.DataFrame] = []
     downloaded_days = 0
     session = build_retry_session()
@@ -68,14 +78,14 @@ def download_market_archive(
         "rows": int(len(archive)),
         "days_requested": len(date_range),
         "days_downloaded": downloaded_days,
-        "start_date": start_date,
-        "end_date": end_date or date_range[-1],
+        "start_date": resolved_start.isoformat(),
+        "end_date": resolved_end.isoformat(),
     }
 
 
 def main() -> None:
     parser = argparse.ArgumentParser(description="Download Kalshi public market archive data")
-    parser.add_argument("--start-date", default=DEFAULT_START_DATE, help="Inclusive start date in YYYY-MM-DD format")
+    parser.add_argument("--start-date", default=None, help="Inclusive start date in YYYY-MM-DD format")
     parser.add_argument("--end-date", default=None, help="Inclusive end date in YYYY-MM-DD format")
     parser.add_argument("--output-path", default=DEFAULT_OUTPUT_PATH, help="Where to write the consolidated CSV")
     args = parser.parse_args()

diff --git a/server.py b/server.py
@@ -2,7 +2,7 @@
 
 import logging
 import os
-from datetime import datetime, timezone
+from datetime import date, datetime, timedelta, timezone
 from importlib.metadata import PackageNotFoundError, version
 from pathlib import Path
 from typing import Any
@@ -16,6 +16,7 @@
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 
 DEFAULT_ARCHIVE_PATH = Path(os.getenv("KALSHI_ARCHIVE_PATH", DEFAULT_OUTPUT_PATH))
+DEFAULT_ARCHIVE_LOOKBACK_DAYS = 7
 mcp = FastMCP("Kalshi Research", json_response=True)
 
 
@@ -42,6 +43,12 @@ def _server_version() -> str:
         return "0.1.0"
 
 
+def _default_archive_window() -> tuple[str, str]:
+    resolved_end = date.today() - timedelta(days=1)
+    resolved_start = resolved_end - timedelta(days=DEFAULT_ARCHIVE_LOOKBACK_DAYS - 1)
+    return resolved_start.isoformat(), resolved_end.isoformat()
+
+
 def _to_utc_iso8601(value: datetime) -> str:
     return value.astimezone(timezone.utc).isoformat().replace("+00:00", "Z")
 
@@ -65,6 +72,7 @@ def server_info() -> dict[str, Any]:
         "version": _server_version(),
         "focus": "historical market discovery, archive download, and backtesting",
         "default_archive_path": str(DEFAULT_ARCHIVE_PATH),
+        "download_archive_default_window_days": DEFAULT_ARCHIVE_LOOKBACK_DAYS,
         "tools": [
             "server_info",
             "download_archive",
@@ -76,14 +84,19 @@ def server_info() -> dict[str, Any]:
 
 @mcp.tool()
 def download_archive(
-    start_date: str = "2021-06-30",
+    start_date: str = "",
     end_date: str = "",
     output_path: str = "",
 ) -> dict[str, Any]:
     """Download the public Kalshi archive CSV used for market discovery."""
+    resolved_start_date = start_date.strip() or None
+    resolved_end_date = end_date.strip() or None
+    if resolved_start_date is None and resolved_end_date is None:
+        resolved_start_date, resolved_end_date = _default_archive_window()
+
     summary = download_market_archive(
-        start_date=start_date,
-        end_date=end_date or None,
+        start_date=resolved_start_date,
+        end_date=resolved_end_date,
         output_path=output_path or str(DEFAULT_ARCHIVE_PATH),
     )
     return summary

diff --git a/tests/test_server_tools.py b/tests/test_server_tools.py
@@ -1,4 +1,5 @@
 import unittest
+from datetime import date, timedelta
 from pathlib import Path
 from unittest.mock import patch
 from uuid import uuid4
@@ -10,6 +11,19 @@
 
 
 class ServerToolTests(unittest.TestCase):
+    def test_download_archive_defaults_to_recent_safe_window(self):
+        expected_end = date.today() - timedelta(days=1)
+        expected_start = expected_end - timedelta(days=server.DEFAULT_ARCHIVE_LOOKBACK_DAYS - 1)
+
+        with patch.object(server, "download_market_archive", return_value={"ok": True}) as mock_download:
+            server.download_archive()
+
+        mock_download.assert_called_once_with(
+            start_date=expected_start.isoformat(),
+            end_date=expected_end.isoformat(),
+            output_path=str(server.DEFAULT_ARCHIVE_PATH),
+        )
+
     def test_run_backtest_rejects_inverted_time_window(self):
         with self.assertRaisesRegex(ValueError, "end_date must be later than start_date"):
             server.run_backtest(
@@ -43,6 +57,41 @@ def test_download_market_archive_creates_parent_directory(self):
         output_path.unlink(missing_ok=True)
         output_path.parent.rmdir()
 
+    def test_search_settled_markets_prefers_latest_close_time_per_ticker(self):
+        temp_root = Path(__file__).resolve().parents[1] / ".tmp-tests"
+        temp_root.mkdir(exist_ok=True)
+        csv_path = temp_root / f"archive-{uuid4().hex}.csv"
+        csv_path.write_text(
+            "ticker_name,status,report_ticker,date\n"
+            "GDPW-2023-A2,finalized,GDPW,2025-03-07\n"
+            "GDPW-2023-A2,finalized,GDPW,2025-03-08\n",
+            encoding="utf-8",
+        )
+
+        try:
+            matches = server.search_settled_markets(search_term="GDPW", archive_path=str(csv_path))
+        finally:
+            csv_path.unlink(missing_ok=True)
+
+        self.assertEqual(1, len(matches))
+        self.assertEqual("2025-03-08", matches[0]["close_time"])
+
+    def test_search_settled_markets_raises_for_invalid_archive_schema(self):
+        temp_root = Path(__file__).resolve().parents[1] / ".tmp-tests"
+        temp_root.mkdir(exist_ok=True)
+        csv_path = temp_root / f"archive-{uuid4().hex}.csv"
+        csv_path.write_text(
+            "ticker_name,report_ticker,date\n"
+            "GDPW-2023-A2,GDPW,2025-03-08\n",
+            encoding="utf-8",
+        )
+
+        try:
+            with self.assertRaisesRegex(ValueError, "missing required columns: status"):
+                server.search_settled_markets(search_term="GDPW", archive_path=str(csv_path))
+        finally:
+            csv_path.unlink(missing_ok=True)
+
 
 if __name__ == "__main__":
     unittest.main()