NVIDIA-NeMo · cluster2600 · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026 · Mar 12, 2026
diff --git a/.github/workflows/full-tests.yml b/.github/workflows/full-tests.yml
@@ -19,7 +19,7 @@ jobs:
     strategy:
       matrix:
         os: [Windows, macOS] # exclude Ubuntu as it is available in pr-tests
-        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
         include:
           - os: Windows
             image: windows-2022

diff --git a/.github/workflows/latest-deps-tests.yml b/.github/workflows/latest-deps-tests.yml
@@ -10,7 +10,7 @@ jobs:
     strategy:
       matrix:
         os: [Ubuntu]
-        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
         include:
           - os: Ubuntu
             image: ubuntu-latest

diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml
@@ -11,7 +11,7 @@ jobs:
     strategy:
       matrix:
         os: [Ubuntu]
-        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
         include:
           - os: Ubuntu
             image: ubuntu-latest

diff --git a/nemoguardrails/embeddings/basic.py b/nemoguardrails/embeddings/basic.py
@@ -17,26 +17,34 @@
 import logging
 from typing import Any, Dict, List, Optional, Union
 
-from annoy import AnnoyIndex
-
 from nemoguardrails.embeddings.cache import cache_embeddings
 from nemoguardrails.embeddings.index import EmbeddingsIndex, IndexItem
 from nemoguardrails.embeddings.providers import EmbeddingModel, init_embedding_model
 from nemoguardrails.rails.llm.config import EmbeddingsCacheConfig
 
 log = logging.getLogger(__name__)
 
+try:
+    from annoy import AnnoyIndex
+except ImportError:
+    AnnoyIndex = None
+    log.info(
+        "annoy is not installed; falling back to numpy-based nearest-neighbour "
+        "search.  Install annoy for faster index lookups on large knowledge bases."
+    )
+
 
 class BasicEmbeddingsIndex(EmbeddingsIndex):
     """Basic implementation of an embeddings index.
 
     It uses the `sentence-transformers/all-MiniLM-L6-v2` model to compute embeddings.
-    Annoy is employed for efficient nearest-neighbor search.
+    Annoy is employed for efficient nearest-neighbor search when available;
+    otherwise a numpy-based brute-force fallback is used.
 
     Attributes:
         embedding_model (str): The model for computing embeddings.
         embedding_engine (str): The engine for computing embeddings.
-        index (AnnoyIndex): The current embedding index.
+        index: The current embedding index (AnnoyIndex or NumpyAnnoyIndex).
         embedding_size (int): The size of the embeddings.
         cache_config (EmbeddingsCacheConfig): The cache configuration.
         embeddings (List[List[float]]): The computed embeddings.
@@ -48,7 +56,6 @@ class BasicEmbeddingsIndex(EmbeddingsIndex):
     embedding_model: str
     embedding_engine: str
     embedding_params: Dict[str, Any]
-    index: AnnoyIndex
     embedding_size: int
     cache_config: EmbeddingsCacheConfig
     embeddings: List[List[float]]
@@ -189,8 +196,17 @@ async def add_items(self, items: List[IndexItem]):
             self._embedding_size = len(self._embeddings[0])
 
     async def build(self):
-        """Builds the Annoy index."""
-        self._index = AnnoyIndex(len(self._embeddings[0]), "angular")
+        """Builds the embeddings index.
+
+        Uses Annoy when available, otherwise falls back to a numpy-based
+        brute-force index (sufficient for typical guardrails index sizes).
+        """
+        if AnnoyIndex is not None:
+            self._index = AnnoyIndex(len(self._embeddings[0]), "angular")
+        else:
+            from nemoguardrails.embeddings.numpy_index import NumpyAnnoyIndex
+
+            self._index = NumpyAnnoyIndex(len(self._embeddings[0]), "angular")
         for i in range(len(self._embeddings)):
             self._index.add_item(i, self._embeddings[i])
         self._index.build(10)

diff --git a/nemoguardrails/embeddings/numpy_index.py b/nemoguardrails/embeddings/numpy_index.py
@@ -0,0 +1,145 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Numpy-based drop-in replacement for annoy.AnnoyIndex.
+
+This module provides a pure-numpy alternative to the Annoy library for
+nearest-neighbour search over embedding vectors. It is used as a fallback
+when annoy is not installed (e.g. on Python 3.13+ where the annoy C++
+extension triggers a SIGILL).
+
+For the typical guardrails index sizes (tens to hundreds of items) the
+brute-force cosine search is more than fast enough.
+"""
+
+from typing import List, Optional, Tuple
+
+import numpy as np
+
+
+class NumpyAnnoyIndex:
+    """A numpy-backed nearest-neighbour index that exposes the same API surface
+    as ``annoy.AnnoyIndex`` for the subset used by NeMo Guardrails.
+
+    Supported operations:
+        * ``add_item(i, vector)``
+        * ``build(n_trees)``  (no-op -- kept for interface compatibility)
+        * ``get_nns_by_vector(vector, n, include_distances=False)``
+        * ``save(path)`` / ``load(path)``
+
+    The metric is *angular* distance, matching Annoy's default for text
+    embeddings.  Angular distance is defined as
+    ``sqrt(2 * (1 - cos_sim))`` so that it is ``0`` for identical vectors
+    and ``2`` for diametrically opposed ones.
+    """
+
+    def __init__(self, embedding_size: int, metric: str = "angular"):
+        self._embedding_size = embedding_size
+        self._metric = metric
+        # Sparse storage during build phase (id -> vector)
+        self._vectors_dict: dict = {}
+        # Dense numpy matrix after build()
+        self._vectors: Optional[np.ndarray] = None
+        self._built = False
+
+    # ------------------------------------------------------------------
+    # Build interface
+    # ------------------------------------------------------------------
+
+    def add_item(self, i: int, vector) -> None:
+        """Add a single vector with integer id *i*."""
+        self._vectors_dict[i] = np.asarray(vector, dtype=np.float32)
+
+    def build(self, n_trees: int = 10) -> None:
+        """Finalise the index.  The *n_trees* parameter is ignored (kept
+        for API compatibility with Annoy)."""
+        if not self._vectors_dict:
+            self._vectors = np.empty((0, self._embedding_size), dtype=np.float32)
+        else:
+            max_id = max(self._vectors_dict.keys())
+            self._vectors = np.zeros(
+                (max_id + 1, self._embedding_size), dtype=np.float32
+            )
+            for idx, vec in self._vectors_dict.items():
+                self._vectors[idx] = vec
+        self._built = True
+
+    # ------------------------------------------------------------------
+    # Query interface
+    # ------------------------------------------------------------------
+
+    def get_nns_by_vector(
+        self, vector, n: int, include_distances: bool = False
+    ) -> Tuple[List[int], ...]:
+        """Return the *n* nearest neighbours of *vector*.
+
+        When *include_distances* is ``True`` the return value is a tuple
+        ``(ids, distances)``; otherwise just ``ids``.
+        """
+        if self._vectors is None or len(self._vectors) == 0:
+            return ([], []) if include_distances else []
+
+        query = np.asarray(vector, dtype=np.float32)
+
+        # Cosine similarity via normalised dot product
+        norms = np.linalg.norm(self._vectors, axis=1, keepdims=True)
+        # Avoid division by zero for zero-vectors
+        safe_norms = np.where(norms == 0, 1.0, norms)
+        normed = self._vectors / safe_norms
+
+        query_norm = np.linalg.norm(query)
+        if query_norm == 0:
+            query_normed = query
+        else:
+            query_normed = query / query_norm
+
+        cos_sim = normed @ query_normed  # shape: (num_items,)
+
+        # Angular distance (matches Annoy's definition)
+        cos_sim_clipped = np.clip(cos_sim, -1.0, 1.0)
+        distances = np.sqrt(2.0 * (1.0 - cos_sim_clipped))
+
+        # Get top-n indices (lowest distance first)
+        n = min(n, len(distances))
+        top_indices = np.argpartition(distances, n)[:n]
+        top_indices = top_indices[np.argsort(distances[top_indices])]
-        n = min(n, len(distances))
-        top_indices = np.argpartition(distances, n)[:n]
-        top_indices = top_indices[np.argsort(distances[top_indices])]
+        n = min(n, len(distances))
+        kth = min(n, len(distances) - 1)
+        top_indices = np.argpartition(distances, kth)[:n]
+        top_indices = top_indices[np.argsort(distances[top_indices])]
-        n = min(n, len(distances))
-        top_indices = np.argpartition(distances, n)[:n]
-        top_indices = top_indices[np.argsort(distances[top_indices])]
+        n = min(n, len(distances))
+        kth = min(n, len(distances) - 1)
+        top_indices = np.argpartition(distances, kth)[:n]
+        top_indices = top_indices[np.argsort(distances[top_indices])]
+
+        ids = top_indices.tolist()
+        if include_distances:
+            return ids, distances[top_indices].tolist()
+        return ids
+
+    # ------------------------------------------------------------------
+    # Persistence
+    # ------------------------------------------------------------------
+
+    def save(self, path: str) -> None:
+        """Save the index to disk as a ``.npy`` file.
+
+        If the caller supplies a path ending in ``.ann`` (the annoy
+        convention), we silently swap the extension to ``.npy`` so that
+        both backends can coexist in the same cache directory.
+        """
+        if path.endswith(".ann"):
+            path = path[:-4] + ".npy"
+        if self._vectors is not None:
+            np.save(path, self._vectors)
+
+    def load(self, path: str) -> None:
+        """Load a previously saved index from disk."""
+        if path.endswith(".ann"):
+            path = path[:-4] + ".npy"
+        self._vectors = np.load(path).astype(np.float32)
+        self._built = True
diff --git a/nemoguardrails/kb/kb.py b/nemoguardrails/kb/kb.py
@@ -125,14 +125,28 @@ async def build(self):
         cache_file = os.path.join(CACHE_FOLDER, f"{hash_value}.ann")
         embedding_size_file = os.path.join(CACHE_FOLDER, f"{hash_value}.esize")
 
+        # Determine which index backend to use
+        try:
+            from annoy import AnnoyIndex
+
+            _annoy_available = True
+        except ImportError:
+            _annoy_available = False
+
+        # When using the numpy fallback the cache file extension is .npy
+        # instead of .ann; check for both so that caches from either
+        # backend are honoured.
+        npy_cache_file = cache_file[:-4] + ".npy" if cache_file.endswith(".ann") else cache_file + ".npy"
+
+        has_ann_cache = os.path.exists(cache_file) and _annoy_available
+        has_npy_cache = os.path.exists(npy_cache_file)
+
         # If we have already computed this before, we use it
         if (
             self.config.embedding_search_provider.name == "default"
-            and os.path.exists(cache_file)
+            and (has_ann_cache or has_npy_cache)
             and os.path.exists(embedding_size_file)
         ):
-            from annoy import AnnoyIndex
-
             from nemoguardrails.embeddings.basic import BasicEmbeddingsIndex
 
             log.info(cache_file)
@@ -146,8 +160,14 @@ async def build(self):
             with open(embedding_size_file, "r") as f:
                 embedding_size = int(f.read())
 
-            ann_index = AnnoyIndex(embedding_size, "angular")
-            ann_index.load(cache_file)
+            if has_ann_cache and _annoy_available:
+                ann_index = AnnoyIndex(embedding_size, "angular")
+                ann_index.load(cache_file)
+            else:
+                from nemoguardrails.embeddings.numpy_index import NumpyAnnoyIndex
+
+                ann_index = NumpyAnnoyIndex(embedding_size, "angular")
+                ann_index.load(npy_cache_file)
 
             self.index.embeddings_index = ann_index
 
@@ -159,8 +179,9 @@ async def build(self):
             await self.index.add_items(index_items)
             await self.index.build()
 
-            # For the default Embedding Search provider, which uses annoy, we also
-            # persist the index after it's computed.
+            # For the default Embedding Search provider, which uses annoy
+            # (or the numpy fallback), we also persist the index after
+            # it is computed.
             if self.config.embedding_search_provider.name == "default":
                 from nemoguardrails.embeddings.basic import BasicEmbeddingsIndex