Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/full-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
strategy:
matrix:
os: [Windows, macOS] # exclude Ubuntu as it is available in pr-tests
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
include:
- os: Windows
image: windows-2022
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/latest-deps-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
strategy:
matrix:
os: [Ubuntu]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
include:
- os: Ubuntu
image: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
strategy:
matrix:
os: [Ubuntu]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
include:
- os: Ubuntu
image: ubuntu-latest
Expand Down
30 changes: 23 additions & 7 deletions nemoguardrails/embeddings/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,34 @@
import logging
from typing import Any, Dict, List, Optional, Union

from annoy import AnnoyIndex

from nemoguardrails.embeddings.cache import cache_embeddings
from nemoguardrails.embeddings.index import EmbeddingsIndex, IndexItem
from nemoguardrails.embeddings.providers import EmbeddingModel, init_embedding_model
from nemoguardrails.rails.llm.config import EmbeddingsCacheConfig

log = logging.getLogger(__name__)

try:
from annoy import AnnoyIndex
except ImportError:
AnnoyIndex = None
log.info(
"annoy is not installed; falling back to numpy-based nearest-neighbour "
"search. Install annoy for faster index lookups on large knowledge bases."
)
Comment thread
cluster2600 marked this conversation as resolved.
Outdated


class BasicEmbeddingsIndex(EmbeddingsIndex):
"""Basic implementation of an embeddings index.

It uses the `sentence-transformers/all-MiniLM-L6-v2` model to compute embeddings.
Annoy is employed for efficient nearest-neighbor search.
Annoy is employed for efficient nearest-neighbor search when available;
otherwise a numpy-based brute-force fallback is used.

Attributes:
embedding_model (str): The model for computing embeddings.
embedding_engine (str): The engine for computing embeddings.
index (AnnoyIndex): The current embedding index.
index: The current embedding index (AnnoyIndex or NumpyAnnoyIndex).
embedding_size (int): The size of the embeddings.
cache_config (EmbeddingsCacheConfig): The cache configuration.
embeddings (List[List[float]]): The computed embeddings.
Expand All @@ -48,7 +56,6 @@ class BasicEmbeddingsIndex(EmbeddingsIndex):
embedding_model: str
embedding_engine: str
embedding_params: Dict[str, Any]
index: AnnoyIndex
embedding_size: int
cache_config: EmbeddingsCacheConfig
embeddings: List[List[float]]
Expand Down Expand Up @@ -189,8 +196,17 @@ async def add_items(self, items: List[IndexItem]):
self._embedding_size = len(self._embeddings[0])

async def build(self):
"""Builds the Annoy index."""
self._index = AnnoyIndex(len(self._embeddings[0]), "angular")
"""Builds the embeddings index.

Uses Annoy when available, otherwise falls back to a numpy-based
brute-force index (sufficient for typical guardrails index sizes).
"""
if AnnoyIndex is not None:
self._index = AnnoyIndex(len(self._embeddings[0]), "angular")
else:
from nemoguardrails.embeddings.numpy_index import NumpyAnnoyIndex

self._index = NumpyAnnoyIndex(len(self._embeddings[0]), "angular")
for i in range(len(self._embeddings)):
self._index.add_item(i, self._embeddings[i])
self._index.build(10)
Expand Down
145 changes: 145 additions & 0 deletions nemoguardrails/embeddings/numpy_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Numpy-based drop-in replacement for annoy.AnnoyIndex.

This module provides a pure-numpy alternative to the Annoy library for
nearest-neighbour search over embedding vectors. It is used as a fallback
when annoy is not installed (e.g. on Python 3.13+ where the annoy C++
extension triggers a SIGILL).

For the typical guardrails index sizes (tens to hundreds of items) the
brute-force cosine search is more than fast enough.
"""

from typing import List, Optional, Tuple

import numpy as np


class NumpyAnnoyIndex:
"""A numpy-backed nearest-neighbour index that exposes the same API surface
as ``annoy.AnnoyIndex`` for the subset used by NeMo Guardrails.

Supported operations:
* ``add_item(i, vector)``
* ``build(n_trees)`` (no-op -- kept for interface compatibility)
* ``get_nns_by_vector(vector, n, include_distances=False)``
* ``save(path)`` / ``load(path)``

The metric is *angular* distance, matching Annoy's default for text
embeddings. Angular distance is defined as
``sqrt(2 * (1 - cos_sim))`` so that it is ``0`` for identical vectors
and ``2`` for diametrically opposed ones.
"""

def __init__(self, embedding_size: int, metric: str = "angular"):
self._embedding_size = embedding_size
self._metric = metric
# Sparse storage during build phase (id -> vector)
self._vectors_dict: dict = {}
# Dense numpy matrix after build()
self._vectors: Optional[np.ndarray] = None
self._built = False
Comment thread
cluster2600 marked this conversation as resolved.

# ------------------------------------------------------------------
# Build interface
# ------------------------------------------------------------------

def add_item(self, i: int, vector) -> None:
"""Add a single vector with integer id *i*."""
self._vectors_dict[i] = np.asarray(vector, dtype=np.float32)

def build(self, n_trees: int = 10) -> None:
"""Finalise the index. The *n_trees* parameter is ignored (kept
for API compatibility with Annoy)."""
if not self._vectors_dict:
self._vectors = np.empty((0, self._embedding_size), dtype=np.float32)
else:
max_id = max(self._vectors_dict.keys())
self._vectors = np.zeros(
(max_id + 1, self._embedding_size), dtype=np.float32
)
for idx, vec in self._vectors_dict.items():
self._vectors[idx] = vec
self._built = True
Comment thread
cluster2600 marked this conversation as resolved.

# ------------------------------------------------------------------
# Query interface
# ------------------------------------------------------------------

def get_nns_by_vector(
self, vector, n: int, include_distances: bool = False
) -> Tuple[List[int], ...]:
Comment thread
cluster2600 marked this conversation as resolved.
Outdated
"""Return the *n* nearest neighbours of *vector*.

When *include_distances* is ``True`` the return value is a tuple
``(ids, distances)``; otherwise just ``ids``.
"""
if self._vectors is None or len(self._vectors) == 0:
return ([], []) if include_distances else []

query = np.asarray(vector, dtype=np.float32)

# Cosine similarity via normalised dot product
norms = np.linalg.norm(self._vectors, axis=1, keepdims=True)
# Avoid division by zero for zero-vectors
safe_norms = np.where(norms == 0, 1.0, norms)
normed = self._vectors / safe_norms

query_norm = np.linalg.norm(query)
if query_norm == 0:
query_normed = query
else:
query_normed = query / query_norm

cos_sim = normed @ query_normed # shape: (num_items,)

# Angular distance (matches Annoy's definition)
cos_sim_clipped = np.clip(cos_sim, -1.0, 1.0)
distances = np.sqrt(2.0 * (1.0 - cos_sim_clipped))

# Get top-n indices (lowest distance first)
n = min(n, len(distances))
top_indices = np.argpartition(distances, n)[:n]
top_indices = top_indices[np.argsort(distances[top_indices])]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

np.argpartition crashes when n == len(distances)

numpy.argpartition(a, kth) requires kth to be in the range [-a.size, a.size - 1]. After n = min(n, len(distances)), n can equal len(distances), which is out of bounds and raises a ValueError. This will happen in practice any time the caller requests as many (or more) results than there are items in the index — a very common scenario for small knowledge bases (e.g., the default max_results=20 with fewer than 20 chunks indexed).

Reproduce:

idx = NumpyAnnoyIndex(3, "angular")
idx.add_item(0, [1, 0, 0])
idx.build(10)
idx.get_nns_by_vector([1, 0, 0], 5)  # ValueError: kth(=1) out of bounds (1)

Fix — clamp kth to len(distances) - 1:

Suggested change
n = min(n, len(distances))
top_indices = np.argpartition(distances, n)[:n]
top_indices = top_indices[np.argsort(distances[top_indices])]
n = min(n, len(distances))
kth = min(n, len(distances) - 1)
top_indices = np.argpartition(distances, kth)[:n]
top_indices = top_indices[np.argsort(distances[top_indices])]

This ensures the partition index is always within bounds while still taking all n results.

Prompt To Fix With AI
This is a comment left during a code review.
Path: nemoguardrails/embeddings/numpy_index.py
Line: 115-117

Comment:
**`np.argpartition` crashes when `n == len(distances)`**

`numpy.argpartition(a, kth)` requires `kth` to be in the range `[-a.size, a.size - 1]`. After `n = min(n, len(distances))`, `n` can equal `len(distances)`, which is out of bounds and raises a `ValueError`. This will happen in practice any time the caller requests as many (or more) results than there are items in the index — a very common scenario for small knowledge bases (e.g., the default `max_results=20` with fewer than 20 chunks indexed).

Reproduce:
```python
idx = NumpyAnnoyIndex(3, "angular")
idx.add_item(0, [1, 0, 0])
idx.build(10)
idx.get_nns_by_vector([1, 0, 0], 5)  # ValueError: kth(=1) out of bounds (1)
```

Fix — clamp `kth` to `len(distances) - 1`:

```suggestion
        n = min(n, len(distances))
        kth = min(n, len(distances) - 1)
        top_indices = np.argpartition(distances, kth)[:n]
        top_indices = top_indices[np.argsort(distances[top_indices])]
```

This ensures the partition index is always within bounds while still taking all `n` results.

How can I resolve this? If you propose a fix, please make it concise.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — this was already identified and fixed during testing on Python 3.13 (the ValueError: kth(=5) out of bounds (5) was reproducable with small knowledge bases). The current code handles this with an explicit branch at lines 114-121: when n == len(distances) we skip argpartition entirely and use a full argsort instead, which avoids the out-of-bounds kth paramater altogether.


ids = top_indices.tolist()
if include_distances:
return ids, distances[top_indices].tolist()
return ids

# ------------------------------------------------------------------
# Persistence
# ------------------------------------------------------------------

def save(self, path: str) -> None:
"""Save the index to disk as a ``.npy`` file.

If the caller supplies a path ending in ``.ann`` (the annoy
convention), we silently swap the extension to ``.npy`` so that
both backends can coexist in the same cache directory.
"""
if path.endswith(".ann"):
path = path[:-4] + ".npy"
if self._vectors is not None:
np.save(path, self._vectors)
Comment thread
cluster2600 marked this conversation as resolved.
Comment thread
cluster2600 marked this conversation as resolved.

def load(self, path: str) -> None:
"""Load a previously saved index from disk."""
if path.endswith(".ann"):
path = path[:-4] + ".npy"
self._vectors = np.load(path).astype(np.float32)
self._built = True
Comment thread
cluster2600 marked this conversation as resolved.
35 changes: 28 additions & 7 deletions nemoguardrails/kb/kb.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,28 @@ async def build(self):
cache_file = os.path.join(CACHE_FOLDER, f"{hash_value}.ann")
embedding_size_file = os.path.join(CACHE_FOLDER, f"{hash_value}.esize")

# Determine which index backend to use
try:
from annoy import AnnoyIndex

_annoy_available = True
except ImportError:
_annoy_available = False

# When using the numpy fallback the cache file extension is .npy
# instead of .ann; check for both so that caches from either
# backend are honoured.
npy_cache_file = cache_file[:-4] + ".npy" if cache_file.endswith(".ann") else cache_file + ".npy"

has_ann_cache = os.path.exists(cache_file) and _annoy_available
has_npy_cache = os.path.exists(npy_cache_file)

# If we have already computed this before, we use it
if (
self.config.embedding_search_provider.name == "default"
and os.path.exists(cache_file)
and (has_ann_cache or has_npy_cache)
and os.path.exists(embedding_size_file)
Comment thread
cluster2600 marked this conversation as resolved.
):
from annoy import AnnoyIndex

from nemoguardrails.embeddings.basic import BasicEmbeddingsIndex

log.info(cache_file)
Comment thread
cluster2600 marked this conversation as resolved.
Outdated
Expand All @@ -146,8 +160,14 @@ async def build(self):
with open(embedding_size_file, "r") as f:
embedding_size = int(f.read())

ann_index = AnnoyIndex(embedding_size, "angular")
ann_index.load(cache_file)
if has_ann_cache and _annoy_available:
ann_index = AnnoyIndex(embedding_size, "angular")
ann_index.load(cache_file)
Comment thread
cluster2600 marked this conversation as resolved.
Outdated
else:
from nemoguardrails.embeddings.numpy_index import NumpyAnnoyIndex

ann_index = NumpyAnnoyIndex(embedding_size, "angular")
ann_index.load(npy_cache_file)

self.index.embeddings_index = ann_index

Expand All @@ -159,8 +179,9 @@ async def build(self):
await self.index.add_items(index_items)
await self.index.build()

# For the default Embedding Search provider, which uses annoy, we also
# persist the index after it's computed.
# For the default Embedding Search provider, which uses annoy
# (or the numpy fallback), we also persist the index after
# it is computed.
if self.config.embedding_search_provider.name == "default":
from nemoguardrails.embeddings.basic import BasicEmbeddingsIndex

Expand Down
Loading
Loading