Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 32 additions & 7 deletions nemoguardrails/embeddings/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,43 @@
import logging
from typing import Any, Dict, List, Optional, Union

from annoy import AnnoyIndex

from nemoguardrails.embeddings.cache import cache_embeddings
from nemoguardrails.embeddings.index import EmbeddingsIndex, IndexItem
from nemoguardrails.embeddings.providers import EmbeddingModel, init_embedding_model
from nemoguardrails.rails.llm.config import EmbeddingsCacheConfig

log = logging.getLogger(__name__)

try:
from annoy import AnnoyIndex
except ImportError:
AnnoyIndex = None
import sys

if sys.version_info >= (3, 13):
log.warning(
"annoy is not supported on Python 3.13+ (SIGILL in the C++ extension); "
"using numpy-based nearest-neighbour search instead."
)
else:
log.warning(
"annoy is not installed; falling back to numpy-based nearest-neighbour "
"search. Install annoy (or use the [annoy] extra) for faster index "
"lookups on large knowledge bases."
)


class BasicEmbeddingsIndex(EmbeddingsIndex):
"""Basic implementation of an embeddings index.

It uses the `sentence-transformers/all-MiniLM-L6-v2` model to compute embeddings.
Annoy is employed for efficient nearest-neighbor search.
Annoy is employed for efficient nearest-neighbor search when available;
otherwise a numpy-based brute-force fallback is used.

Attributes:
embedding_model (str): The model for computing embeddings.
embedding_engine (str): The engine for computing embeddings.
index (AnnoyIndex): The current embedding index.
index: The current embedding index (AnnoyIndex or NumpyAnnoyIndex).
embedding_size (int): The size of the embeddings.
cache_config (EmbeddingsCacheConfig): The cache configuration.
embeddings (List[List[float]]): The computed embeddings.
Expand All @@ -48,7 +65,6 @@ class BasicEmbeddingsIndex(EmbeddingsIndex):
embedding_model: str
embedding_engine: str
embedding_params: Dict[str, Any]
index: AnnoyIndex
embedding_size: int
cache_config: EmbeddingsCacheConfig
embeddings: List[List[float]]
Expand Down Expand Up @@ -189,8 +205,17 @@ async def add_items(self, items: List[IndexItem]):
self._embedding_size = len(self._embeddings[0])

async def build(self):
"""Builds the Annoy index."""
self._index = AnnoyIndex(len(self._embeddings[0]), "angular")
"""Builds the embeddings index.

Uses Annoy when available, otherwise falls back to a numpy-based
brute-force index (sufficient for typical guardrails index sizes).
"""
if AnnoyIndex is not None:
self._index = AnnoyIndex(len(self._embeddings[0]), "angular")
else:
from nemoguardrails.embeddings.numpy_index import NumpyAnnoyIndex

self._index = NumpyAnnoyIndex(len(self._embeddings[0]), "angular")
for i in range(len(self._embeddings)):
self._index.add_item(i, self._embeddings[i])
self._index.build(10)
Expand Down
164 changes: 164 additions & 0 deletions nemoguardrails/embeddings/numpy_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Numpy-based drop-in replacement for annoy.AnnoyIndex.

This module provides a pure-numpy alternative to the Annoy library for
nearest-neighbour search over embedding vectors. It is used as a fallback
when annoy is not installed (e.g. on Python 3.13+ where the annoy C++
extension triggers a SIGILL).

For the typical guardrails index sizes (tens to hundreds of items) the
brute-force cosine search is more than fast enough.
"""

from typing import List, Optional, Tuple, Union

import numpy as np


class NumpyAnnoyIndex:
"""A numpy-backed nearest-neighbour index that exposes the same API surface
as ``annoy.AnnoyIndex`` for the subset used by NeMo Guardrails.

Supported operations:
* ``add_item(i, vector)``
* ``build(n_trees)`` (no-op -- kept for interface compatibility)
* ``get_nns_by_vector(vector, n, include_distances=False)``
* ``save(path)`` / ``load(path)``

The metric is *angular* distance, matching Annoy's default for text
embeddings. Angular distance is defined as
``sqrt(2 * (1 - cos_sim))`` so that it is ``0`` for identical vectors
and ``2`` for diametrically opposed ones.
"""

def __init__(self, embedding_size: int, metric: str = "angular"):
if metric != "angular":
raise ValueError(
f"NumpyAnnoyIndex only supports metric='angular', got {metric!r}"
)
self._embedding_size = embedding_size
self._metric = metric
# Sparse storage during build phase (id -> vector)
self._vectors_dict: dict = {}
# Dense numpy matrix after build()
self._vectors: Optional[np.ndarray] = None
self._built = False

# ------------------------------------------------------------------
# Build interface
# ------------------------------------------------------------------

def add_item(self, i: int, vector) -> None:
"""Add a single vector with integer id *i*."""
self._vectors_dict[i] = np.asarray(vector, dtype=np.float32)

def build(self, n_trees: int = 10) -> None:
"""Finalise the index. The *n_trees* parameter is ignored (kept
for API compatibility with Annoy)."""
if not self._vectors_dict:
self._vectors = np.empty((0, self._embedding_size), dtype=np.float32)
else:
max_id = max(self._vectors_dict.keys())
self._vectors = np.zeros(
(max_id + 1, self._embedding_size), dtype=np.float32
)
for idx, vec in self._vectors_dict.items():
self._vectors[idx] = vec
self._vectors_dict = {} # release per-item dict memory now stored in _vectors
self._built = True

# ------------------------------------------------------------------
# Query interface
# ------------------------------------------------------------------

def get_nns_by_vector(
self, vector, n: int, include_distances: bool = False
) -> Union[List[int], Tuple[List[int], List[float]]]:
"""Return the *n* nearest neighbours of *vector*.

When *include_distances* is ``True`` the return value is a tuple
``(ids, distances)``; otherwise just ``ids``.
"""
if self._vectors is None or len(self._vectors) == 0:
return ([], []) if include_distances else []

query = np.asarray(vector, dtype=np.float32)

# Cosine similarity via normalised dot product
norms = np.linalg.norm(self._vectors, axis=1, keepdims=True)
# Avoid division by zero for zero-vectors
safe_norms = np.where(norms == 0, 1.0, norms)
normed = self._vectors / safe_norms

query_norm = np.linalg.norm(query)
if query_norm == 0:
query_normed = query
else:
query_normed = query / query_norm

cos_sim = normed @ query_normed # shape: (num_items,)

# Angular distance (matches Annoy's definition)
cos_sim_clipped = np.clip(cos_sim, -1.0, 1.0)
distances = np.sqrt(2.0 * (1.0 - cos_sim_clipped))

# Get top-n indices (lowest distance first)
n = min(n, len(distances))
if n == len(distances):
# All items requested -- just argsort the whole array
top_indices = np.argsort(distances)[:n]
else:
top_indices = np.argpartition(distances, n)[:n]
top_indices = top_indices[np.argsort(distances[top_indices])]

ids = top_indices.tolist()
if include_distances:
return ids, distances[top_indices].tolist()
return ids

# ------------------------------------------------------------------
# Persistence
# ------------------------------------------------------------------

def save(self, path: str) -> None:
"""Save the index to disk as a ``.npy`` file.

If the caller supplies a path ending in ``.ann`` (the annoy
convention), we silently swap the extension to ``.npy`` so that
both backends can coexist in the same cache directory.

Note: ``numpy.save`` automatically appends ``.npy`` when the
path does not already end with that suffix, so callers should
always pass either an ``.ann`` path (which is converted here)
or an explicit ``.npy`` path.
"""
if not self._built:
raise RuntimeError(
"NumpyAnnoyIndex.save() called before build(); call build() first."
)
if path.endswith(".ann"):
path = path[:-4] + ".npy"
if self._vectors is not None:
np.save(path, self._vectors)

def load(self, path: str) -> None:
"""Load a previously saved index from disk."""
if path.endswith(".ann"):
path = path[:-4] + ".npy"
self._vectors_dict = {} # discard any pre-build state
self._vectors = np.load(path).astype(np.float32)
self._built = True
42 changes: 34 additions & 8 deletions nemoguardrails/kb/kb.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,17 +125,31 @@ async def build(self):
cache_file = os.path.join(CACHE_FOLDER, f"{hash_value}.ann")
embedding_size_file = os.path.join(CACHE_FOLDER, f"{hash_value}.esize")

# Determine which index backend to use
try:
from annoy import AnnoyIndex

_annoy_available = True
except ImportError:
_annoy_available = False

# When using the numpy fallback the cache file extension is .npy
# instead of .ann; check for both so that caches from either
# backend are honoured.
npy_cache_file = cache_file[:-4] + ".npy"

has_ann_cache = os.path.exists(cache_file) and _annoy_available
has_npy_cache = os.path.exists(npy_cache_file)

# If we have already computed this before, we use it
if (
self.config.embedding_search_provider.name == "default"
and os.path.exists(cache_file)
and (has_ann_cache or has_npy_cache)
and os.path.exists(embedding_size_file)
):
from annoy import AnnoyIndex

from nemoguardrails.embeddings.basic import BasicEmbeddingsIndex

log.info(cache_file)
log.info(cache_file if has_ann_cache else npy_cache_file)
self.index = cast(
BasicEmbeddingsIndex,
self._get_embeddings_search_instance(
Expand All @@ -146,8 +160,19 @@ async def build(self):
with open(embedding_size_file, "r") as f:
embedding_size = int(f.read())

ann_index = AnnoyIndex(embedding_size, "angular")
ann_index.load(cache_file)
if has_ann_cache:
ann_index = AnnoyIndex(embedding_size, "angular")
ann_index.load(cache_file)
else:
# NOTE: if annoy is installed but only a .npy cache exists
# (e.g. first run was on Python 3.13 without annoy, then the
# user installed annoy), we load via the numpy backend rather
# than regenerating an .ann cache. The cache will be rebuilt
# automatically the next time the KB content hash changes.
from nemoguardrails.embeddings.numpy_index import NumpyAnnoyIndex

ann_index = NumpyAnnoyIndex(embedding_size, "angular")
ann_index.load(npy_cache_file)

self.index.embeddings_index = ann_index

Expand All @@ -159,8 +184,9 @@ async def build(self):
await self.index.add_items(index_items)
await self.index.build()

# For the default Embedding Search provider, which uses annoy, we also
# persist the index after it's computed.
# For the default Embedding Search provider, which uses annoy
# (or the numpy fallback), we also persist the index after
# it is computed.
if self.config.embedding_search_provider.name == "default":
from nemoguardrails.embeddings.basic import BasicEmbeddingsIndex

Expand Down
Loading
Loading