diff --git a/vectordb_bench/backend/runner/read_write_runner.py b/vectordb_bench/backend/runner/read_write_runner.py index d3d1df2fa..9319b62f1 100644 --- a/vectordb_bench/backend/runner/read_write_runner.py +++ b/vectordb_bench/backend/runner/read_write_runner.py @@ -98,10 +98,10 @@ def run_search(self, perc: int): log.info("Search after write - Serial search start") test_time = round(time.perf_counter(), 4) res, ssearch_dur = self.serial_search_runner.run() - recall, ndcg, p99_latency, p95_latency = res + recall, ndcg, p99_latency, p95_latency, avg_latency = res log.info( f"Search after write - Serial search - recall={recall}, ndcg={ndcg}, " - f"p99={p99_latency}, p95={p95_latency}, dur={ssearch_dur:.4f}", + f"p99={p99_latency}, p95={p95_latency}, avg={avg_latency}, dur={ssearch_dur:.4f}", ) log.info( f"Search after wirte - Conc search start, dur for each conc={self.read_dur_after_write}", @@ -125,6 +125,7 @@ def run_search(self, perc: int): ndcg, p99_latency, p95_latency, + avg_latency, conc_failed_rate, conc_num_list, conc_qps_list, @@ -181,14 +182,15 @@ def run_read_write(self) -> Metric: m.st_ndcg_list = [d[4] for d in r] m.st_serial_latency_p99_list = [d[5] for d in r] m.st_serial_latency_p95_list = [d[6] for d in r] - m.st_conc_failed_rate_list = [d[7] for d in r] + m.st_serial_latency_avg_list = [d[7] for d in r] + m.st_conc_failed_rate_list = [d[8] for d in r] # Extract concurrent latency data - m.st_conc_num_list_list = [d[8] for d in r] - m.st_conc_qps_list_list = [d[9] for d in r] - m.st_conc_latency_p99_list_list = [d[10] for d in r] - m.st_conc_latency_p95_list_list = [d[11] for d in r] - m.st_conc_latency_avg_list_list = [d[12] for d in r] + m.st_conc_num_list_list = [d[9] for d in r] + m.st_conc_qps_list_list = [d[10] for d in r] + m.st_conc_latency_p99_list_list = [d[11] for d in r] + m.st_conc_latency_p95_list_list = [d[12] for d in r] + m.st_conc_latency_avg_list_list = [d[13] for d in r] except Exception as e: log.warning(f"Read and write error: {e}") @@ -262,10 +264,10 @@ def wait_next_target(start: int, target_batch: int) -> bool: log.info(f"[{target_batch}/{total_batch}] Serial search - {perc}% start") res, ssearch_dur = self.serial_search_runner.run() ssearch_dur = round(ssearch_dur, 4) - recall, ndcg, p99_latency, p95_latency = res + recall, ndcg, p99_latency, p95_latency, avg_latency = res log.info( f"[{target_batch}/{total_batch}] Serial search - {perc}% done, " - f"recall={recall}, ndcg={ndcg}, p99={p99_latency}, p95={p95_latency}, dur={ssearch_dur}" + f"recall={recall}, ndcg={ndcg}, p99={p99_latency}, p95={p95_latency}, avg={avg_latency}, dur={ssearch_dur}" ) each_conc_search_dur = self.get_each_conc_search_dur( @@ -299,6 +301,7 @@ def wait_next_target(start: int, target_batch: int) -> bool: ndcg, p99_latency, p95_latency, + avg_latency, conc_failed_rate, conc_num_list, conc_qps_list, diff --git a/vectordb_bench/backend/runner/serial_runner.py b/vectordb_bench/backend/runner/serial_runner.py index 300553a4e..247e8cb31 100644 --- a/vectordb_bench/backend/runner/serial_runner.py +++ b/vectordb_bench/backend/runner/serial_runner.py @@ -241,7 +241,7 @@ def _get_db_search_res(self, emb: list[float], retry_idx: int = 0) -> list[int]: return results - def search(self, args: tuple[list, list[list[int]]]) -> tuple[float, float, float, float]: + def search(self, args: tuple[list, list[list[int]]]) -> tuple[float, float, float, float, float]: log.info(f"{mp.current_process().name:14} start search the entire test_data to get recall and latency") with self.db.init(): self.db.prepare_filter(self.filters) @@ -292,7 +292,7 @@ def search(self, args: tuple[list, list[list[int]]]) -> tuple[float, float, floa f"p99={p99}, " f"p95={p95}" ) - return (avg_recall, avg_ndcg, p99, p95) + return (avg_recall, avg_ndcg, p99, p95, avg_latency) def _run_in_subprocess(self) -> tuple[float, float, float, float]: with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor: @@ -300,7 +300,7 @@ def _run_in_subprocess(self) -> tuple[float, float, float, float]: return future.result() @utils.time_it - def run(self) -> tuple[float, float, float, float]: + def run(self) -> tuple[float, float, float, float, float]: log.info(f"{mp.current_process().name:14} start serial search") if self.test_data is None: msg = "empty test_data" diff --git a/vectordb_bench/backend/task_runner.py b/vectordb_bench/backend/task_runner.py index 8224a0415..79086ae29 100644 --- a/vectordb_bench/backend/task_runner.py +++ b/vectordb_bench/backend/task_runner.py @@ -216,7 +216,7 @@ def _run_perf_case(self, drop_old: bool = True) -> Metric: ) = search_results if TaskStage.SEARCH_SERIAL in self.config.stages: search_results = self._serial_search() - m.recall, m.ndcg, m.serial_latency_p99, m.serial_latency_p95 = search_results + m.recall, m.ndcg, m.serial_latency_p99, m.serial_latency_p95, m.serial_latency_avg = search_results except Exception as e: log.warning(f"Failed to run performance case, reason = {e}") @@ -256,7 +256,7 @@ def _load_train_data(self): finally: runner = None - def _serial_search(self) -> tuple[float, float, float, float]: + def _serial_search(self) -> tuple[float, float, float, float, float]: """Performance serial tests, search the entire test data once, calculate the recall, serial_latency_p99, serial_latency_p95 diff --git a/vectordb_bench/metric.py b/vectordb_bench/metric.py index 3634b2114..2501bea61 100644 --- a/vectordb_bench/metric.py +++ b/vectordb_bench/metric.py @@ -22,6 +22,7 @@ class Metric: qps: float = 0.0 serial_latency_p99: float = 0.0 serial_latency_p95: float = 0.0 + serial_latency_avg: float = 0.0 recall: float = 0.0 ndcg: float = 0.0 conc_num_list: list[int] = field(default_factory=list) @@ -39,6 +40,7 @@ class Metric: st_ndcg_list: list[float] = field(default_factory=list) st_serial_latency_p99_list: list[float] = field(default_factory=list) st_serial_latency_p95_list: list[float] = field(default_factory=list) + st_serial_latency_avg_list: list[float] = field(default_factory=list) st_conc_failed_rate_list: list[float] = field(default_factory=list) # for streaming cases - concurrent latency data per stage diff --git a/vectordb_bench/models.py b/vectordb_bench/models.py index 27be85b6e..e41005588 100644 --- a/vectordb_bench/models.py +++ b/vectordb_bench/models.py @@ -365,6 +365,11 @@ def read_file(cls, full_path: pathlib.Path, trans_unit: bool = False) -> Self: case_result["metrics"]["serial_latency_p99"] = ( cur_latency * 1000 if cur_latency > 0 else cur_latency ) + + cur_latency = case_result["metrics"]["serial_latency_avg"] + case_result["metrics"]["serial_latency_avg"] = ( + cur_latency * 1000 if cur_latency > 0 else cur_latency + ) # Handle P95 latency for backward compatibility with existing result files if "serial_latency_p95" in case_result["metrics"]: @@ -438,6 +443,7 @@ def append_return(x: any, y: any): "qps", "latency(p99)", "latency(p95)", + "latency(avg)", "recall", "max_load_count", "label", @@ -461,6 +467,7 @@ def append_return(x: any, y: any): f.metrics.qps, f.metrics.serial_latency_p99, f.metrics.serial_latency_p95, + f.metrics.serial_latency_avg, f.metrics.recall, f.metrics.max_load_count, f.label.value,