Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ class SearchResponse(BaseModel):
retrieved_at: datetime
cached: bool = False
request_id: str | None = None
stage_timings: dict | None = None # per-stage timing in ms


class CompareResponse(BaseModel):
Expand Down
1 change: 1 addition & 0 deletions api/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def search(self, request: SearchRequest) -> SearchResponse:
related_provisions=response_data.get("related_provisions", []),
response_format=request.format,
retrieved_at=datetime.now(UTC),
stage_timings=getattr(self.orchestrator, "_last_timing", None),
)

def _assess_confidence(
Expand Down
1 change: 1 addition & 0 deletions benchmark/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

1 change: 1 addition & 0 deletions benchmark/adapters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

90 changes: 90 additions & 0 deletions benchmark/adapters/hector_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env python3
"""
HECTOR Benchmark Adapter

Translates HECTOR-specific API responses into the format expected
by the benchmark CLI. Used for profiling stage timing breakdowns.

Usage:
python benchmark/adapters/hector_adapter.py --host localhost --port 8000 --query "What is Section 302 IPC?"
"""

import argparse
import json
import os
import sys

Check failure on line 15 in benchmark/adapters/hector_adapter.py

View workflow job for this annotation

GitHub Actions / lint

ruff (F401)

benchmark/adapters/hector_adapter.py:15:8: F401 `sys` imported but unused help: Remove unused import: `sys`
import time

import requests


def profile_query(host: str, port: int, query: str, api_key: str | None = None) -> dict:
"""Send a single query and return detailed timing breakdown."""
url = f"http://{host}:{port}/v1/search"
headers = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"

payload = {
"query": query,
"top_k": 10,
"response_format": "detailed",
}

t0 = time.perf_counter()
resp = requests.post(url, json=payload, headers=headers, timeout=60)
total_ms = (time.perf_counter() - t0) * 1000
resp.raise_for_status()
data = resp.json()

# Extract stage timings from response
stage_timings = data.get("stage_timings", {})

return {
"query": query,
"total_http_ms": round(total_ms, 1),
"stage_timings": stage_timings,
"num_results": data.get("total_results", 0),
"num_items": len(data.get("items", [])),
"num_citations": len(data.get("citations", [])),
"route": data.get("route", ""),
"confidence_level": data.get("confidence_level", ""),
"cached": data.get("cached", False),
}


def main():
parser = argparse.ArgumentParser(description="HECTOR Benchmark Adapter")
parser.add_argument("--host", default=os.getenv("HECTOR_API_HOST", "localhost"))
parser.add_argument("--port", type=int, default=int(os.getenv("HECTOR_API_PORT", 8000)))
parser.add_argument("--query", required=True, help="Query to profile")
parser.add_argument("--api-key", default=os.getenv("HECTOR_API_KEY"))

args = parser.parse_args()

result = profile_query(args.host, args.port, args.query, args.api_key)
print(json.dumps(result, indent=2))

# Print timing breakdown
timings = result.get("stage_timings", {})
if timings:
print(f"\nStage Timing Breakdown:")

Check failure on line 71 in benchmark/adapters/hector_adapter.py

View workflow job for this annotation

GitHub Actions / lint

ruff (F541)

benchmark/adapters/hector_adapter.py:71:15: F541 f-string without any placeholders help: Remove extraneous `f` prefix
print(f" Routing: {timings.get('route_ms', 0):.1f}ms")
print(f" Normalization: {timings.get('normalize_ms', 0):.1f}ms")
print(f" Generation: {timings.get('generate_ms', 0):.1f}ms")
print(f" Verification: {timings.get('verify_ms', 0):.1f}ms")
print(f" Total: {timings.get('total_ms', 0):.1f}ms")

# Identify bottleneck
stages = {
"routing": timings.get("route_ms", 0),
"normalization": timings.get("normalize_ms", 0),
"generation": timings.get("generate_ms", 0),
"verification": timings.get("verify_ms", 0),
}
bottleneck = max(stages, key=stages.get)
print(f"\n Bottleneck: {bottleneck} ({stages[bottleneck]:.1f}ms)")


if __name__ == "__main__":
main()
30 changes: 30 additions & 0 deletions benchmark/configs/quick_profile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# HECTOR Quick Profile Configuration
# Profile-only mode — no load testing. ~30s runtime.
# Use for fast iteration on retrieval/reranker tuning.

target:
url: "http://localhost:8000"
timeout_s: 30

rag:
endpoint: "/v1/search"
method: "POST"
collection_names: ["indian_law_bns"]
top_k: 10
response_format: "detailed"

profiling:
enabled: true
warmup_requests: 3
profile_requests: 10
capture_stage_timings: true

aiperf:
enabled: false

input:
file: "benchmark/queries.jsonl"

output:
dir: "benchmark/results"
experiment_name: "quick_profile"
34 changes: 34 additions & 0 deletions benchmark/configs/single_run.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# HECTOR Single Run Configuration
# One concurrency level with profiling + optional load test. ~2 min.
# Use for regression checks and baseline measurements.

target:
url: "http://localhost:8000"
timeout_s: 60

rag:
endpoint: "/v1/search"
method: "POST"
collection_names: ["indian_law_bns"]
top_k: 10
response_format: "detailed"

profiling:
enabled: true
warmup_requests: 5
profile_requests: 20
capture_stage_timings: true

aiperf:
enabled: true
concurrency: 5
iterations: 3
duration_s: 60
sleep_between_points_s: 0

input:
file: "benchmark/queries.jsonl"

output:
dir: "benchmark/results"
experiment_name: "single_run"
34 changes: 34 additions & 0 deletions benchmark/configs/sweep.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# HECTOR Sweep Configuration
# Multi-axis sweep across concurrency × top_k. For bottleneck analysis.
# Each axis accepts a list — Cartesian product of all combinations.

target:
url: "http://localhost:8000"
timeout_s: 60

rag:
endpoint: "/v1/search"
method: "POST"
collection_names: ["indian_law_bns"]
top_k: [5, 10, 20]
response_format: "detailed"

profiling:
enabled: true
warmup_requests: 3
profile_requests: 10
capture_stage_timings: true

aiperf:
enabled: true
concurrency: [1, 5, 10, 20]
iterations: 3
duration_s: 30
sleep_between_points_s: 5

input:
file: "benchmark/queries.jsonl"

output:
dir: "benchmark/results"
experiment_name: "sweep"
25 changes: 25 additions & 0 deletions benchmark/queries.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{"query": "What is the punishment for murder under Section 302 IPC?"}
{"query": "Which BNS section replaces IPC Section 302?"}
{"query": "Explain the difference between culpable homicide and murder"}
{"query": "What are the rights of an arrested person?"}
{"query": "What is Section 376 IPC and its BNS equivalent?"}
{"query": "Explain the concept of defamation under Indian law"}
{"query": "What are the essentials of a valid contract?"}
{"query": "IPC Section 420 corresponds to which BNS section?"}
{"query": "What is the doctrine of basic structure?"}
{"query": "What are the grounds for divorce under Hindu Marriage Act?"}
{"query": "What is Section 144 CrPC and when can it be imposed?"}
{"query": "Explain the concept of precedent in Indian legal system"}
{"query": "What are the essential elements of crime?"}
{"query": "What is the punishment for robbery under IPC?"}
{"query": "What are the fundamental rights under Part III of the Constitution?"}
{"query": "What is the concept of estoppel under Indian Evidence Act?"}
{"query": "What are the ingredients of Section 304B IPC dowry death?"}
{"query": "Explain the right to privacy under Indian Constitution"}
{"query": "What is the punishment for criminal conspiracy under IPC?"}
{"query": "What is the difference between Section 300 and Section 304 IPC?"}
{"query": "What are the provisions for anticipatory bail under CrPC?"}
{"query": "What is the role of the Attorney General of India?"}
{"query": "Explain the concept of bail in criminal cases"}
{"query": "What is the difference between IPC and BNS?"}
{"query": "What are the provisions related to anticipatory bail?"}
Loading
Loading