Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,13 @@ To list the options for zvec, execute vectordbbench zvec --help

### Run Doris from command line

Doris supports ann index with type hnsw from version 4.0.x
Doris supports ANN indexes from version 4.0.x. VectorDBBench passes Doris index properties through with `--index-prop key=value`, so newly added Doris index properties normally do not require a VectorDBBench code change.

By default, VectorDBBench creates an HNSW index. Use `--index-prop index_type=<type>` to select another Doris ANN index type, and pass index-specific properties with additional `--index-prop` options. For example, IVF and IVF on disk indexes require `nlist`:

```shell
NUM_PER_BATCH=1000000 vectordbbench doris --http-port=8030 --port=9030 --db-name=vector_test --case-type=Performance768D1M --stream-load-rows-per-batch=500000 --index-prop index_type=ivf_on_disk --index-prop nlist=1024
```

```shell
NUM_PER_BATCH=1000000 vectordbbench doris --http-port=8030 --port=9030 --db-name=vector_test --case-type=Performance768D1M --stream-load-rows-per-batch=500000
Expand All @@ -482,11 +488,9 @@ Using flag `--session-var`, if you want to test doris with some customized sessi
NUM_PER_BATCH=1000000 vectordbbench doris --http-port=8030 --port=9030 --db-name=vector_test --case-type=Performance768D1M --stream-load-rows-per-batch=500000 --session-var enable_profile=True
```

Mote options:
More options:

```text
--m INTEGER hnsw m
--ef-construction INTEGER hnsw ef-construction
--username TEXT Username [default: root; required]
--password TEXT Password [default: ""]
--host TEXT Db host [default: 127.0.0.1; required]
Expand All @@ -496,9 +500,10 @@ Mote options:
--ssl / --no-ssl Enable or disable SSL, for Doris Serverless
SSL must be enabled [default: no-ssl]
--index-prop TEXT Extra index PROPERTY as key=value
(repeatable)
(repeatable or comma-separated, for example
index_type=ivf_on_disk,nlist=1024)
--session-var TEXT Session variable key=value applied to each
SQL session (repeatable)
SQL session (repeatable or comma-separated)
--stream-load-rows-per-batch INTEGER
Rows per single stream load request; default
uses NUM_PER_BATCH
Expand Down
8 changes: 1 addition & 7 deletions vectordb_bench/backend/clients/doris/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from ....cli.cli import (
CommonTypedDict,
HNSWBaseTypedDict,
cli,
click_parameter_decorators_from_typed_dict,
run,
Expand Down Expand Up @@ -42,7 +41,7 @@ def _parse_kv_list(_ctx, _param, values): # noqa: ANN001
return parsed


class DorisTypedDict(CommonTypedDict, HNSWBaseTypedDict):
class DorisTypedDict(CommonTypedDict):
user_name: Annotated[
str,
click.option(
Expand Down Expand Up @@ -166,13 +165,8 @@ def Doris(
):
from .config import DorisCaseConfig, DorisConfig

# Merge explicit HNSW params into index properties using Doris naming
index_properties: dict[str, str] = {}
index_properties.update(parameters.get("index_prop", {}) or {})
if parameters.get("m") is not None:
index_properties.setdefault("max_degree", str(parameters["m"]))
if parameters.get("ef_construction") is not None:
index_properties.setdefault("ef_construction", str(parameters["ef_construction"]))

session_vars: dict[str, str] = parameters.get("session_var", {}) or {}

Expand Down
43 changes: 31 additions & 12 deletions vectordb_bench/backend/clients/doris/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from typing import ClassVar

from pydantic import BaseModel, SecretStr, model_validator

Expand Down Expand Up @@ -49,6 +50,11 @@ class DorisCaseConfig(BaseModel, DBCaseConfig):
# Create table without ANN index
no_index: bool = False

REQUIRED_INDEX_PARAMS_BY_TYPE: ClassVar[dict[str, tuple[str, ...]]] = {
"ivf": ("nlist",),
"ivf_on_disk": ("nlist",),
}

def get_metric_fn(self) -> str:
if self.metric_type == MetricType.L2:
return "l2_distance_approximate"
Expand All @@ -65,30 +71,43 @@ def index_param(self) -> dict:
metric_type = self.get_metric_fn()
if metric_type.endswith("_approximate"):
metric_type = metric_type[: -len("_approximate")]
props = {"metric_type": metric_type}
props: dict[str, str] = {"metric_type": metric_type}

# Merge user provided index_properties first; convenience fields fill missing values below.
if self.index_properties:
props.update({str(k): str(v) for k, v in self.index_properties.items()})

if self.index_type is not None:
props.setdefault("index_type", self.index_type)
props["index_type"] = self.index_type
else:
props.setdefault("index_type", "hnsw")

# Merge optional HNSW params
props["index_type"] = str.lower(props["index_type"])
if props["index_type"] == "hnsw":
index_type = str(props["index_type"]).strip().lower()
if not index_type:
index_type = "hnsw"
props["index_type"] = index_type

# Map convenience fields when index type is known.
if index_type == "hnsw":
if self.m is not None:
props.setdefault("max_degree", str(self.m))
if self.ef_construction is not None:
props.setdefault("ef_construction", str(self.ef_construction))
elif props["index_type"] == "ivf":

if index_type in {"ivf", "ivf_on_disk"}:
if self.nlist is not None:
props.setdefault("nlist", str(self.nlist))
else:
msg = f"Unsupported index type: {props['index_type']}"
raise ValueError(msg)

# Merge user provided index_properties
if self.index_properties:
props.update(self.index_properties)
# Validate only known required params; unknown index types are passed through.
required_params = self.REQUIRED_INDEX_PARAMS_BY_TYPE.get(index_type, ())
for param in required_params:
value = props.get(param)
if value is None or not str(value).strip():
msg = f"{param} of ann index must be specified for {index_type} type"
raise ValueError(msg)

if index_type not in {"hnsw", "ivf", "ivf_on_disk"}:
log.info("Passing through unknown Doris index_type without local validation: %s", index_type)
return props

def search_param(self) -> dict:
Expand Down
53 changes: 42 additions & 11 deletions vectordb_bench/backend/clients/doris/doris.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,34 @@ def _build_index_options(self) -> IndexOptions | None:
else:
not_applied[key] = value

# SDK's to_ann_properties does not auto-emit nlist for ivf_on_disk.
# Ensure nlist is forwarded through ANN passthrough properties.
ann_props_to_apply: dict[str, str] = {str(k): str(v) for k, v in not_applied.items()}
if str(index_param.get("index_type", "")).lower() == "ivf_on_disk" and "nlist" in index_param:
ann_props_to_apply.setdefault("nlist", str(index_param["nlist"]))

if ann_props_to_apply:
applied_ann_key = None
for ann_key in ("ann_properties", "properties"):
if not hasattr(index_options, ann_key):
continue
try:
existing = getattr(index_options, ann_key, None)
merged_props = dict(existing) if isinstance(existing, dict) else {}
merged_props.update(ann_props_to_apply)
setattr(index_options, ann_key, merged_props)
applied[ann_key] = merged_props
applied_ann_key = ann_key
not_applied = {}
break
except Exception:
log.debug("Failed to set index_options.%s", ann_key, exc_info=True)
if applied_ann_key is None:
log.warning(
"Unable to attach ANN passthrough properties on IndexOptions: %s",
ann_props_to_apply,
)

log.info(
"Index options prepared: applied_props=%s not_applied_props=%s",
applied,
Expand Down Expand Up @@ -212,17 +240,20 @@ def _create_table_with_options(self, sample_data: pd.DataFrame, index_options: I
self.table.index_options.metric_type = "inner_product"
else:
self.table.index_options.metric_type = "l2_distance"
if (
index_options
and hasattr(index_options, "properties")
and isinstance(index_options.properties, dict)
):
for key, value in index_options.properties.items():
if hasattr(self.table.index_options, key):
try:
setattr(self.table.index_options, key, value)
except Exception:
log.debug("Skip setting index_options.%s at runtime", key)
if index_options:
runtime_props = {}
if hasattr(index_options, "ann_properties") and isinstance(index_options.ann_properties, dict):
runtime_props.update(index_options.ann_properties)
if hasattr(index_options, "properties") and isinstance(index_options.properties, dict):
runtime_props.update(index_options.properties)

if runtime_props:
for key, value in runtime_props.items():
if hasattr(self.table.index_options, key):
try:
setattr(self.table.index_options, key, value)
except Exception:
log.debug("Skip setting index_options.%s at runtime", key)
except Exception:
log.exception("Failed to adjust index options for table: %s", self.table_name)

Expand Down
Loading